aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-12-01 14:51:49 -0800
committerStephen Hines <srhines@google.com>2014-12-02 16:08:10 -0800
commit37ed9c199ca639565f6ce88105f9e39e898d82d0 (patch)
tree8fb36d3910e3ee4c4e1b7422f4f017108efc52f5 /lib
parentd2327b22152ced7bc46dc629fc908959e8a52d03 (diff)
downloadexternal_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.zip
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.gz
external_llvm-37ed9c199ca639565f6ce88105f9e39e898d82d0.tar.bz2
Update aosp/master LLVM for rebase to r222494.
Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp135
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp8
-rw-r--r--lib/Analysis/AliasSetTracker.cpp145
-rw-r--r--lib/Analysis/Analysis.cpp4
-rw-r--r--lib/Analysis/Android.mk6
-rw-r--r--lib/Analysis/AssumptionTracker.cpp110
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp259
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp23
-rw-r--r--lib/Analysis/CFG.cpp4
-rw-r--r--lib/Analysis/CFGPrinter.cpp16
-rw-r--r--lib/Analysis/CFLAliasAnalysis.cpp1013
-rw-r--r--lib/Analysis/CMakeLists.txt4
-rw-r--r--lib/Analysis/CaptureTracking.cpp89
-rw-r--r--lib/Analysis/CodeMetrics.cpp80
-rw-r--r--lib/Analysis/ConstantFolding.cpp124
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp73
-rw-r--r--lib/Analysis/DominanceFrontier.cpp135
-rw-r--r--lib/Analysis/FunctionTargetTransformInfo.cpp50
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp5
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp9
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp35
-rw-r--r--lib/Analysis/IVUsers.cpp8
-rw-r--r--lib/Analysis/InstructionSimplify.cpp687
-rw-r--r--lib/Analysis/JumpInstrTableInfo.cpp17
-rw-r--r--lib/Analysis/LazyCallGraph.cpp10
-rw-r--r--lib/Analysis/LazyValueInfo.cpp282
-rw-r--r--lib/Analysis/LibCallSemantics.cpp4
-rw-r--r--lib/Analysis/Lint.cpp30
-rw-r--r--lib/Analysis/Loads.cpp182
-rw-r--r--lib/Analysis/LoopInfo.cpp4
-rw-r--r--lib/Analysis/LoopPass.cpp11
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp10
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp193
-rw-r--r--lib/Analysis/NoAliasAnalysis.cpp5
-rw-r--r--lib/Analysis/PHITransAddr.cpp6
-rw-r--r--lib/Analysis/PtrUseVisitor.cpp2
-rw-r--r--lib/Analysis/RegionInfo.cpp828
-rw-r--r--lib/Analysis/RegionPass.cpp4
-rw-r--r--lib/Analysis/RegionPrinter.cpp51
-rw-r--r--lib/Analysis/ScalarEvolution.cpp1061
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp8
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp2
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp245
-rw-r--r--lib/Analysis/StratifiedSets.h692
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp51
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp35
-rw-r--r--lib/Analysis/ValueTracking.cpp1097
-rw-r--r--lib/AsmParser/LLLexer.cpp18
-rw-r--r--lib/AsmParser/LLLexer.h10
-rw-r--r--lib/AsmParser/LLParser.cpp472
-rw-r--r--lib/AsmParser/LLParser.h46
-rw-r--r--lib/AsmParser/LLToken.h12
-rw-r--r--lib/AsmParser/Parser.cpp44
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp7
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp779
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h62
-rw-r--r--lib/Bitcode/Reader/BitstreamReader.cpp89
-rw-r--r--lib/Bitcode/Writer/BitWriter.cpp14
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp174
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp333
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h16
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp34
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h65
-rw-r--r--lib/CodeGen/AllocationOrder.h4
-rw-r--r--lib/CodeGen/Analysis.cpp51
-rw-r--r--lib/CodeGen/Android.mk7
-rw-r--r--lib/CodeGen/AntiDepBreaker.h24
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h4
-rw-r--r--lib/CodeGen/AsmPrinter/Android.mk1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp364
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp34
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterHandler.h4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp11
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h4
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp32
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h8
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp97
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h6
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h144
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocList.h8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp16
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h12
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp862
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h250
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1128
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h203
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h37
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp60
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h45
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h13
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp668
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h228
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h4
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.h52
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp167
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h4
-rw-r--r--lib/CodeGen/AtomicExpandLoadLinkedPass.cpp380
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp563
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp39
-rw-r--r--lib/CodeGen/BranchFolding.cpp92
-rw-r--r--lib/CodeGen/BranchFolding.h28
-rw-r--r--lib/CodeGen/CMakeLists.txt9
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp12
-rw-r--r--lib/CodeGen/CallingConvLower.cpp14
-rw-r--r--lib/CodeGen/CodeGen.cpp3
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp795
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp51
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h27
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp21
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp22
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp8
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp14
-rw-r--r--lib/CodeGen/ErlangGC.cpp3
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp9
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp4
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp7
-rw-r--r--lib/CodeGen/ForwardControlFlowIntegrity.cpp374
-rw-r--r--lib/CodeGen/GCMetadata.cpp2
-rw-r--r--lib/CodeGen/GCStrategy.cpp5
-rw-r--r--lib/CodeGen/GlobalMerge.cpp5
-rw-r--r--lib/CodeGen/IfConversion.cpp30
-rw-r--r--lib/CodeGen/InlineSpiller.cpp68
-rw-r--r--lib/CodeGen/InterferenceCache.h4
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp33
-rw-r--r--lib/CodeGen/JITCodeEmitter.cpp14
-rw-r--r--lib/CodeGen/JumpInstrTables.cpp17
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp65
-rw-r--r--lib/CodeGen/LexicalScopes.cpp11
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp74
-rw-r--r--lib/CodeGen/LiveDebugVariables.h6
-rw-r--r--lib/CodeGen/LiveInterval.cpp2
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp16
-rw-r--r--lib/CodeGen/LiveRangeCalc.h4
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp7
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp3
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp5
-rw-r--r--lib/CodeGen/LiveVariables.cpp262
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp9
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp48
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp9
-rw-r--r--lib/CodeGen/MachineCSE.cpp35
-rw-r--r--lib/CodeGen/MachineCombiner.cpp435
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp5
-rw-r--r--lib/CodeGen/MachineDominanceFrontier.cpp54
-rw-r--r--lib/CodeGen/MachineDominators.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp132
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp3
-rw-r--r--lib/CodeGen/MachineInstr.cpp131
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp19
-rw-r--r--lib/CodeGen/MachineLICM.cpp35
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp8
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp140
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp27
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp4
-rw-r--r--lib/CodeGen/MachineScheduler.cpp43
-rw-r--r--lib/CodeGen/MachineSink.cpp177
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp76
-rw-r--r--lib/CodeGen/MachineVerifier.cpp77
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp7
-rw-r--r--lib/CodeGen/PHIElimination.cpp10
-rw-r--r--lib/CodeGen/PHIEliminationUtils.h4
-rw-r--r--lib/CodeGen/Passes.cpp26
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp795
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp64
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp5
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp47
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h4
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp8
-rw-r--r--lib/CodeGen/RegAllocBase.cpp3
-rw-r--r--lib/CodeGen/RegAllocBase.h6
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp3
-rw-r--r--lib/CodeGen/RegAllocFast.cpp36
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp30
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp777
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp7
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp108
-rw-r--r--lib/CodeGen/RegisterCoalescer.h50
-rw-r--r--lib/CodeGen/RegisterPressure.cpp3
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp131
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp9
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp28
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp1
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp2257
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp1339
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp117
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp48
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h5
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp453
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp125
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp68
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h18
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp66
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp96
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h58
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp32
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp80
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp38
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp477
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1021
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h43
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp147
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp132
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp2
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp53
-rw-r--r--lib/CodeGen/SpillPlacement.cpp54
-rw-r--r--lib/CodeGen/SpillPlacement.h13
-rw-r--r--lib/CodeGen/Spiller.cpp184
-rw-r--r--lib/CodeGen/Spiller.h9
-rw-r--r--lib/CodeGen/SplitKit.cpp33
-rw-r--r--lib/CodeGen/SplitKit.h4
-rw-r--r--lib/CodeGen/StackColoring.cpp2
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp4
-rw-r--r--lib/CodeGen/StackMaps.cpp32
-rw-r--r--lib/CodeGen/StackProtector.cpp9
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp4
-rw-r--r--lib/CodeGen/TailDuplication.cpp9
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp6
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp102
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp87
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp158
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp7
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp11
-rw-r--r--lib/CodeGen/TargetSchedule.cpp33
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp28
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp11
-rw-r--r--lib/CodeGen/VirtRegMap.cpp13
-rw-r--r--lib/DebugInfo/Android.mk1
-rw-r--r--lib/DebugInfo/CMakeLists.txt1
-rw-r--r--lib/DebugInfo/DIContext.cpp2
-rw-r--r--lib/DebugInfo/DWARFAbbreviationDeclaration.h4
-rw-r--r--lib/DebugInfo/DWARFAcceleratorTable.cpp133
-rw-r--r--lib/DebugInfo/DWARFAcceleratorTable.h51
-rw-r--r--lib/DebugInfo/DWARFCompileUnit.h13
-rw-r--r--lib/DebugInfo/DWARFContext.cpp244
-rw-r--r--lib/DebugInfo/DWARFContext.h91
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.cpp4
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.h4
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.h4
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.h4
-rw-r--r--lib/DebugInfo/DWARFDebugFrame.cpp3
-rw-r--r--lib/DebugInfo/DWARFDebugFrame.h4
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.cpp115
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.h17
-rw-r--r--lib/DebugInfo/DWARFDebugLine.cpp38
-rw-r--r--lib/DebugInfo/DWARFDebugLine.h12
-rw-r--r--lib/DebugInfo/DWARFDebugLoc.h4
-rw-r--r--lib/DebugInfo/DWARFDebugRangeList.h4
-rw-r--r--lib/DebugInfo/DWARFFormValue.cpp44
-rw-r--r--lib/DebugInfo/DWARFRelocMap.h6
-rw-r--r--lib/DebugInfo/DWARFSection.h (renamed from lib/CodeGen/MachineCodeEmitter.cpp)18
-rw-r--r--lib/DebugInfo/DWARFTypeUnit.h13
-rw-r--r--lib/DebugInfo/DWARFUnit.cpp45
-rw-r--r--lib/DebugInfo/DWARFUnit.h104
-rw-r--r--lib/ExecutionEngine/CMakeLists.txt2
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp172
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp22
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp94
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt1
-rw-r--r--lib/ExecutionEngine/Interpreter/CMakeLists.txt2
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp35
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.cpp13
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h84
-rw-r--r--lib/ExecutionEngine/JIT/Android.mk17
-rw-r--r--lib/ExecutionEngine/JIT/CMakeLists.txt8
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp695
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h229
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp1256
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp904
-rw-r--r--lib/ExecutionEngine/JIT/Makefile38
-rw-r--r--lib/ExecutionEngine/JITEventListener.cpp (renamed from lib/Target/TargetJITInfo.cpp)7
-rw-r--r--lib/ExecutionEngine/LLVMBuild.txt2
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp186
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h47
-rw-r--r--lib/ExecutionEngine/Makefile2
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp90
-rw-r--r--lib/ExecutionEngine/RTDyldMemoryManager.cpp23
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp8
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h23
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp271
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp1348
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h76
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp175
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h10
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h38
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp919
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h174
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h405
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h277
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h261
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h136
-rw-r--r--lib/ExecutionEngine/TargetSelect.cpp5
-rw-r--r--lib/IR/Android.mk1
-rw-r--r--lib/IR/AsmWriter.cpp278
-rw-r--r--lib/IR/AsmWriter.h11
-rw-r--r--lib/IR/AttributeImpl.h26
-rw-r--r--lib/IR/Attributes.cpp110
-rw-r--r--lib/IR/AutoUpgrade.cpp211
-rw-r--r--lib/IR/BasicBlock.cpp50
-rw-r--r--lib/IR/CMakeLists.txt1
-rw-r--r--lib/IR/ConstantFold.cpp29
-rw-r--r--lib/IR/ConstantFold.h4
-rw-r--r--lib/IR/Constants.cpp509
-rw-r--r--lib/IR/ConstantsContext.h628
-rw-r--r--lib/IR/Core.cpp120
-rw-r--r--lib/IR/DIBuilder.cpp1271
-rw-r--r--lib/IR/DataLayout.cpp34
-rw-r--r--lib/IR/DebugInfo.cpp583
-rw-r--r--lib/IR/DebugLoc.cpp10
-rw-r--r--lib/IR/DiagnosticInfo.cpp29
-rw-r--r--lib/IR/DiagnosticPrinter.cpp2
-rw-r--r--lib/IR/Function.cpp122
-rw-r--r--lib/IR/GCOV.cpp13
-rw-r--r--lib/IR/Globals.cpp27
-rw-r--r--lib/IR/IRBuilder.cpp44
-rw-r--r--lib/IR/InlineAsm.cpp4
-rw-r--r--lib/IR/Instruction.cpp26
-rw-r--r--lib/IR/Instructions.cpp78
-rw-r--r--lib/IR/LLVMContext.cpp65
-rw-r--r--lib/IR/LLVMContextImpl.cpp18
-rw-r--r--lib/IR/LLVMContextImpl.h89
-rw-r--r--lib/IR/LeaksContext.h6
-rw-r--r--lib/IR/LegacyPassManager.cpp14
-rw-r--r--lib/IR/MDBuilder.cpp22
-rw-r--r--lib/IR/Mangler.cpp96
-rw-r--r--lib/IR/Metadata.cpp295
-rw-r--r--lib/IR/Module.cpp58
-rw-r--r--lib/IR/PassManager.cpp2
-rw-r--r--lib/IR/PassRegistry.cpp56
-rw-r--r--lib/IR/SymbolTableListTraitsImpl.h4
-rw-r--r--lib/IR/Type.cpp45
-rw-r--r--lib/IR/TypeFinder.cpp2
-rw-r--r--lib/IR/Use.cpp2
-rw-r--r--lib/IR/UseListOrder.cpp43
-rw-r--r--lib/IR/User.cpp3
-rw-r--r--lib/IR/Value.cpp120
-rw-r--r--lib/IR/ValueSymbolTable.cpp25
-rw-r--r--lib/IR/Verifier.cpp198
-rw-r--r--lib/IRReader/IRReader.cpp56
-rw-r--r--lib/LTO/LLVMBuild.txt2
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp113
-rw-r--r--lib/LTO/LTOModule.cpp233
-rw-r--r--lib/Linker/LinkModules.cpp913
-rw-r--r--lib/MC/Android.mk4
-rw-r--r--lib/MC/CMakeLists.txt5
-rw-r--r--lib/MC/ConstantPools.cpp17
-rw-r--r--lib/MC/ELFObjectWriter.cpp98
-rw-r--r--lib/MC/LLVMBuild.txt2
-rw-r--r--lib/MC/MCAnalysis/Android.mk37
-rw-r--r--lib/MC/MCAnalysis/CMakeLists.txt8
-rw-r--r--lib/MC/MCAnalysis/LLVMBuild.txt5
-rw-r--r--lib/MC/MCAnalysis/MCAtom.cpp114
-rw-r--r--lib/MC/MCAnalysis/MCFunction.cpp76
-rw-r--r--lib/MC/MCAnalysis/MCModule.cpp142
-rw-r--r--lib/MC/MCAnalysis/MCModuleYAML.cpp464
-rw-r--r--lib/MC/MCAnalysis/MCObjectDisassembler.cpp574
-rw-r--r--lib/MC/MCAnalysis/MCObjectSymbolizer.cpp268
-rw-r--r--lib/MC/MCAsmInfo.cpp6
-rw-r--r--lib/MC/MCAsmInfoCOFF.cpp1
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp4
-rw-r--r--lib/MC/MCAsmInfoELF.cpp9
-rw-r--r--lib/MC/MCAsmStreamer.cpp24
-rw-r--r--lib/MC/MCAssembler.cpp76
-rw-r--r--lib/MC/MCContext.cpp62
-rw-r--r--lib/MC/MCDisassembler/Android.mk26
-rw-r--r--lib/MC/MCDisassembler/CMakeLists.txt3
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp65
-rw-r--r--lib/MC/MCDisassembler/Disassembler.h4
-rw-r--r--lib/MC/MCDisassembler/MCDisassembler.cpp (renamed from lib/MC/MCDisassembler.cpp)2
-rw-r--r--lib/MC/MCDisassembler/MCExternalSymbolizer.cpp (renamed from lib/MC/MCExternalSymbolizer.cpp)2
-rw-r--r--lib/MC/MCDisassembler/MCRelocationInfo.cpp (renamed from lib/MC/MCRelocationInfo.cpp)2
-rw-r--r--lib/MC/MCDwarf.cpp252
-rw-r--r--lib/MC/MCELF.cpp4
-rw-r--r--lib/MC/MCELFObjectTargetWriter.cpp3
-rw-r--r--lib/MC/MCELFStreamer.cpp57
-rw-r--r--lib/MC/MCExpr.cpp91
-rw-r--r--lib/MC/MCMachOStreamer.cpp10
-rw-r--r--lib/MC/MCNullStreamer.cpp1
-rw-r--r--lib/MC/MCObjectFileInfo.cpp59
-rw-r--r--lib/MC/MCObjectStreamer.cpp71
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp18
-rw-r--r--lib/MC/MCParser/AsmParser.cpp207
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp4
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp8
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp2
-rw-r--r--lib/MC/MCParser/MCAsmLexer.cpp4
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp2
-rw-r--r--lib/MC/MCSectionCOFF.cpp18
-rw-r--r--lib/MC/MCSectionELF.cpp4
-rw-r--r--lib/MC/MCStreamer.cpp325
-rw-r--r--lib/MC/MCSubtargetInfo.cpp12
-rw-r--r--lib/MC/MCTargetOptions.cpp6
-rw-r--r--lib/MC/MCWin64EH.cpp173
-rw-r--r--lib/MC/MCWinEH.cpp84
-rw-r--r--lib/MC/MachObjectWriter.cpp82
-rw-r--r--lib/MC/Makefile2
-rw-r--r--lib/MC/StringTableBuilder.cpp45
-rw-r--r--lib/MC/SubtargetFeature.cpp18
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp338
-rw-r--r--lib/MC/WinCOFFStreamer.cpp33
-rw-r--r--lib/Object/Archive.cpp40
-rw-r--r--lib/Object/Binary.cpp37
-rw-r--r--lib/Object/COFFObjectFile.cpp1007
-rw-r--r--lib/Object/COFFYAML.cpp106
-rw-r--r--lib/Object/ELF.cpp12
-rw-r--r--lib/Object/ELFObjectFile.cpp37
-rw-r--r--lib/Object/ELFYAML.cpp159
-rw-r--r--lib/Object/Error.cpp10
-rw-r--r--lib/Object/IRObjectFile.cpp74
-rw-r--r--lib/Object/LLVMBuild.txt2
-rw-r--r--lib/Object/MachOObjectFile.cpp1379
-rw-r--r--lib/Object/MachOUniversal.cpp33
-rw-r--r--lib/Object/Object.cpp60
-rw-r--r--lib/Object/ObjectFile.cpp30
-rw-r--r--lib/Object/RecordStreamer.h4
-rw-r--r--lib/Object/SymbolicFile.cpp37
-rw-r--r--lib/Option/ArgList.cpp9
-rw-r--r--lib/Option/OptTable.cpp18
-rw-r--r--lib/Option/Option.cpp12
-rw-r--r--lib/ProfileData/Android.mk12
-rw-r--r--lib/ProfileData/CMakeLists.txt6
-rw-r--r--lib/ProfileData/CoverageMapping.cpp475
-rw-r--r--lib/ProfileData/CoverageMappingReader.cpp553
-rw-r--r--lib/ProfileData/CoverageMappingWriter.cpp187
-rw-r--r--lib/ProfileData/InstrProf.cpp6
-rw-r--r--lib/ProfileData/InstrProfIndexed.h8
-rw-r--r--lib/ProfileData/InstrProfReader.cpp100
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp56
-rw-r--r--lib/ProfileData/LLVMBuild.txt2
-rw-r--r--lib/ProfileData/SampleProf.cpp51
-rw-r--r--lib/ProfileData/SampleProfReader.cpp399
-rw-r--r--lib/ProfileData/SampleProfWriter.cpp126
-rw-r--r--lib/Support/APFloat.cpp72
-rw-r--r--lib/Support/APInt.cpp63
-rw-r--r--lib/Support/Android.mk6
-rw-r--r--lib/Support/CMakeLists.txt70
-rw-r--r--lib/Support/CommandLine.cpp80
-rw-r--r--lib/Support/DataStream.cpp4
-rw-r--r--lib/Support/Debug.cpp9
-rw-r--r--lib/Support/Disassembler.cpp74
-rw-r--r--lib/Support/Dwarf.cpp105
-rw-r--r--lib/Support/ErrorHandling.cpp9
-rw-r--r--lib/Support/FileOutputBuffer.cpp19
-rw-r--r--lib/Support/FileUtilities.cpp16
-rw-r--r--lib/Support/GraphWriter.cpp5
-rw-r--r--lib/Support/Host.cpp4
-rw-r--r--lib/Support/IncludeFile.cpp20
-rw-r--r--lib/Support/LineIterator.cpp55
-rw-r--r--lib/Support/LockFileManager.cpp8
-rw-r--r--lib/Support/MD5.cpp48
-rw-r--r--lib/Support/MathExtras.cpp32
-rw-r--r--lib/Support/MemoryBuffer.cpp149
-rw-r--r--lib/Support/MemoryObject.cpp19
-rw-r--r--lib/Support/Options.cpp33
-rw-r--r--lib/Support/Path.cpp173
-rw-r--r--lib/Support/ScaledNumber.cpp3
-rw-r--r--lib/Support/SmallPtrSet.cpp27
-rw-r--r--lib/Support/SourceMgr.cpp11
-rw-r--r--lib/Support/SpecialCaseList.cpp32
-rw-r--r--lib/Support/StreamingMemoryObject.cpp (renamed from lib/Support/StreamableMemoryObject.cpp)79
-rw-r--r--lib/Support/StringRef.cpp22
-rw-r--r--lib/Support/StringRefMemoryObject.cpp29
-rw-r--r--lib/Support/TimeValue.cpp6
-rw-r--r--lib/Support/Timer.cpp8
-rw-r--r--lib/Support/ToolOutputFile.cpp15
-rw-r--r--lib/Support/Triple.cpp283
-rw-r--r--lib/Support/Unix/Host.inc6
-rw-r--r--lib/Support/Unix/Path.inc138
-rw-r--r--lib/Support/Unix/Process.inc108
-rw-r--r--lib/Support/Unix/Program.inc79
-rw-r--r--lib/Support/Unix/RWMutex.inc12
-rw-r--r--lib/Support/Unix/Signals.inc266
-rw-r--r--lib/Support/Unix/TimeValue.inc2
-rw-r--r--lib/Support/Unix/Unix.h4
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc72
-rw-r--r--lib/Support/Windows/Host.inc2
-rw-r--r--lib/Support/Windows/Path.inc231
-rw-r--r--lib/Support/Windows/Process.inc101
-rw-r--r--lib/Support/Windows/Program.inc184
-rw-r--r--lib/Support/Windows/RWMutex.inc8
-rw-r--r--lib/Support/Windows/ThreadLocal.inc2
-rw-r--r--lib/Support/Windows/WindowsSupport.h3
-rw-r--r--lib/Support/Windows/explicit_symbols.inc30
-rw-r--r--lib/Support/YAMLParser.cpp60
-rw-r--r--lib/Support/YAMLTraits.cpp59
-rw-r--r--lib/Support/raw_ostream.cpp111
-rw-r--r--lib/TableGen/Main.cpp23
-rw-r--r--lib/TableGen/Record.cpp52
-rw-r--r--lib/TableGen/TGLexer.cpp3
-rw-r--r--lib/TableGen/TGLexer.h17
-rw-r--r--lib/TableGen/TGParser.cpp96
-rw-r--r--lib/TableGen/TGParser.h4
-rw-r--r--lib/Target/AArch64/AArch64.h9
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp6
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp6
-rw-r--r--lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp12
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp16
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp11
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td41
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp4
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp52
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp422
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp12
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp9
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp16
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp4494
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp39
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h4
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp247
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp1314
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h37
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td9
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td72
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp952
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h40
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td450
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp16
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp3
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h6
-rw-r--r--lib/Target/AArch64/AArch64MachineCombinerPattern.h42
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp383
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.h38
-rw-r--r--lib/Target/AArch64/AArch64PerfectShuffle.h5
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp6
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp12
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td5
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td371
-rw-r--r--lib/Target/AArch64/AArch64SchedA57WriteRes.td52
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp3
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h4
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp14
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp34
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h38
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp95
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h34
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h4
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp78
-rw-r--r--lib/Target/AArch64/Android.mk2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp229
-rw-r--r--lib/Target/AArch64/CMakeLists.txt4
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp67
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.h11
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h4
-rw-r--r--lib/Target/AArch64/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp18
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h9
-rw-r--r--lib/Target/AArch64/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h101
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp70
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp47
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h15
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp7
-rw-r--r--lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp13
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp36
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h13
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARM.h9
-rw-r--r--lib/Target/ARM/ARM.td27
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp58
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h4
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp254
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h72
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp106
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h14
-rw-r--r--lib/Target/ARM/ARMCallingConv.h21
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp1909
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp41
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h4
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp7
-rw-r--r--lib/Target/ARM/ARMFPUName.def1
-rw-r--r--lib/Target/ARM/ARMFPUName.h6
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp153
-rw-r--r--lib/Target/ARM/ARMFeatures.h4
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp297
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h4
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp4
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h6
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp86
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp636
-rw-r--r--lib/Target/ARM/ARMISelLowering.h36
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td10
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp50
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td109
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td28
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td14
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td128
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td79
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp344
-rw-r--r--lib/Target/ARM/ARMJITInfo.h177
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp273
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h34
-rw-r--r--lib/Target/ARM/ARMPerfectShuffle.h5
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h4
-rw-r--r--lib/Target/ARM/ARMRelocations.h62
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h4
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp139
-rw-r--r--lib/Target/ARM/ARMSubtarget.h67
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp64
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h36
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp31
-rw-r--r--lib/Target/ARM/Android.mk2
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp479
-rw-r--r--lib/Target/ARM/CMakeLists.txt5
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp409
-rw-r--r--lib/Target/ARM/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp156
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h51
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMArchName.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp432
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h69
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h33
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h26
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp17
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp206
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp4
-rw-r--r--lib/Target/ARM/Makefile2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp110
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h4
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp34
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h9
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp391
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp7
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp9
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h10
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp6
-rw-r--r--lib/Target/Android.mk1
-rw-r--r--lib/Target/CMakeLists.txt1
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h16
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt25
-rw-r--r--lib/Target/Hexagon/Disassembler/CMakeLists.txt3
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp114
-rw-r--r--lib/Target/Hexagon/Disassembler/LLVMBuild.txt (renamed from lib/Target/Hexagon/InstPrinter/LLVMBuild.txt)6
-rw-r--r--lib/Target/Hexagon/Disassembler/Makefile (renamed from lib/MC/MCAnalysis/Makefile)30
-rw-r--r--lib/Target/Hexagon/Hexagon.h4
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.h4
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.h4
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp18
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp19
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h4
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp22
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp41
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h4
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td19
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp39
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h9
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td99
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td62
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td5
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td9
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h20
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td53
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h26
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp3
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h31
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h4
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp114
-rw-r--r--lib/Target/Hexagon/HexagonVarargsCallingConvention.h20
-rw-r--r--lib/Target/Hexagon/InstPrinter/CMakeLists.txt3
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp74
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h15
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp62
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp (renamed from lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp)54
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h (renamed from lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h)4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp88
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h60
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp11
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp61
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h26
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/Hexagon/Makefile16
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp2
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h4
-rw-r--r--lib/Target/MSP430/MSP430.h4
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp3
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td2
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp69
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h4
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp6
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp46
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td232
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp5
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h4
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp48
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td38
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h22
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h30
-rw-r--r--lib/Target/Mips/Android.mk4
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp1289
-rw-r--r--lib/Target/Mips/CMakeLists.txt7
-rw-r--r--lib/Target/Mips/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp308
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp22
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h5
-rw-r--r--lib/Target/Mips/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/MCTargetDesc/Android.mk1
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp64
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h46
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp129
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h36
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp9
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp92
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp341
-rw-r--r--lib/Target/Mips/Makefile2
-rw-r--r--lib/Target/Mips/MicroMipsInstrFPU.td8
-rw-r--r--lib/Target/Mips/MicroMipsInstrFormats.td196
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td291
-rw-r--r--lib/Target/Mips/Mips.h6
-rw-r--r--lib/Target/Mips/Mips.td10
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp8
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h4
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp2
-rw-r--r--lib/Target/Mips/Mips16HardFloat.h7
-rw-r--r--lib/Target/Mips/Mips16HardFloatInfo.h4
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp15
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp70
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h22
-rw-r--r--lib/Target/Mips/Mips16InstrFormats.td2
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp74
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h6
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td4
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp8
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h4
-rw-r--r--lib/Target/Mips/Mips32r6InstrFormats.td2
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td4
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td24
-rw-r--r--lib/Target/Mips/Mips64r6InstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsABIInfo.cpp45
-rw-r--r--lib/Target/Mips/MipsABIInfo.h61
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.cpp3
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp80
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h18
-rw-r--r--lib/Target/Mips/MipsCCState.cpp142
-rw-r--r--lib/Target/Mips/MipsCCState.h136
-rw-r--r--lib/Target/Mips/MipsCallingConv.td236
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp434
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp24
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp56
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp1118
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp7
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h7
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h6
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1079
-rw-r--r--lib/Target/Mips/MipsISelLowering.h174
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td48
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp50
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h14
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td183
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp286
-rw-r--r--lib/Target/Mips/MipsJITInfo.h71
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp20
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp12
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h19
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.h12
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp2
-rw-r--r--lib/Target/Mips/MipsOptionRecord.h78
-rw-r--r--lib/Target/Mips/MipsOs16.h8
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp12
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td32
-rw-r--r--lib/Target/Mips/MipsRelocations.h41
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp199
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h4
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp3
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp186
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h22
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp97
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h6
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp4
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h4
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp175
-rw-r--r--lib/Target/Mips/MipsSubtarget.h107
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp101
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h56
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp73
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h23
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h71
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt24
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp4
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h4
-rw-r--r--lib/Target/NVPTX/LLVMBuild.txt2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h15
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h8
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h4
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h4
-rw-r--r--lib/Target/NVPTX/NVPTX.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp277
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h21
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp10
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h4
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp15
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp1689
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h14
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp1529
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h324
-rw-r--r--lib/Target/NVPTX/NVPTXInstrFormats.td24
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td37
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td3456
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h4
-rw-r--r--lib/Target/NVPTX/NVPTXLowerStructArgs.cpp134
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h7
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h5
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td4
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp276
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h4
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h31
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h39
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h10
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp115
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h4
-rw-r--r--lib/Target/NVPTX/NVPTXutil.h4
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp262
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp28
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp18
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp26
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp75
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp57
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp73
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp64
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--lib/Target/PowerPC/Makefile2
-rw-r--r--lib/Target/PowerPC/PPC.h23
-rw-r--r--lib/Target/PowerPC/PPC.td31
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp440
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp3
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp7
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td37
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp293
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp125
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp60
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h19
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h12
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp329
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1466
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h82
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td27
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td245
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h4
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td70
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp49
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td425
-rw-r--r--lib/Target/PowerPC/PPCInstrSPE.td447
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td110
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp482
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h46
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp22
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h19
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h5
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp82
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td1
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp140
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h90
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp94
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h42
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp69
-rw-r--r--lib/Target/R600/AMDGPU.h20
-rw-r--r--lib/Target/R600/AMDGPU.td43
-rw-r--r--lib/Target/R600/AMDGPUAlwaysInlinePass.cpp66
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp154
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h24
-rw-r--r--lib/Target/R600/AMDGPUCallingConv.td32
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.h6
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp489
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp964
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h98
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp56
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h20
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td60
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td125
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.cpp2
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.h8
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td3
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp23
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.h6
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.cpp8
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.h19
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp48
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h6
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp79
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h80
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp95
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h35
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp58
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp5
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp320
-rw-r--r--lib/Target/R600/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/R600/AsmParser/LLVMBuild.txt (renamed from lib/ExecutionEngine/JIT/LLVMBuild.txt)9
-rw-r--r--lib/Target/R600/AsmParser/Makefile (renamed from lib/Target/Hexagon/InstPrinter/Makefile)8
-rw-r--r--lib/Target/R600/CMakeLists.txt7
-rw-r--r--lib/Target/R600/CaymanInstructions.td2
-rw-r--r--lib/Target/R600/EvergreenInstructions.td76
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp224
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h20
-rw-r--r--lib/Target/R600/LLVMBuild.txt5
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp57
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp3
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h34
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp22
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h18
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h12
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp9
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h6
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp58
-rw-r--r--lib/Target/R600/Makefile4
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp3
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp25
-rw-r--r--lib/Target/R600/R600Defines.h6
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp3
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp3
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp178
-rw-r--r--lib/Target/R600/R600ISelLowering.h6
-rw-r--r--lib/Target/R600/R600InstrFormats.td3
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp35
-rw-r--r--lib/Target/R600/R600InstrInfo.h13
-rw-r--r--lib/Target/R600/R600Instructions.td39
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h6
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp33
-rw-r--r--lib/Target/R600/R600MachineScheduler.h4
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp8
-rw-r--r--lib/Target/R600/R600Packetizer.cpp15
-rw-r--r--lib/Target/R600/R600RegisterInfo.h6
-rw-r--r--lib/Target/R600/SIDefines.h41
-rw-r--r--lib/Target/R600/SIFixSGPRCopies.cpp70
-rw-r--r--lib/Target/R600/SIFixSGPRLiveRanges.cpp147
-rw-r--r--lib/Target/R600/SIISelLowering.cpp1507
-rw-r--r--lib/Target/R600/SIISelLowering.h56
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp13
-rw-r--r--lib/Target/R600/SIInstrFormats.td376
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp1521
-rw-r--r--lib/Target/R600/SIInstrInfo.h129
-rw-r--r--lib/Target/R600/SIInstrInfo.td1161
-rw-r--r--lib/Target/R600/SIInstructions.td2761
-rw-r--r--lib/Target/R600/SIIntrinsics.td85
-rw-r--r--lib/Target/R600/SILoadStoreOptimizer.cpp417
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp70
-rw-r--r--lib/Target/R600/SILowerI1Copies.cpp45
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp94
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h39
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp351
-rw-r--r--lib/Target/R600/SIRegisterInfo.h62
-rw-r--r--lib/Target/R600/SIRegisterInfo.td44
-rw-r--r--lib/Target/R600/SIShrinkInstructions.cpp271
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp2
-rw-r--r--lib/Target/README.txt84
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp8
-rw-r--r--lib/Target/Sparc/CMakeLists.txt5
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp9
-rw-r--r--lib/Target/Sparc/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp61
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp5
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp6
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h4
-rw-r--r--lib/Target/Sparc/Makefile2
-rw-r--r--lib/Target/Sparc/README.txt2
-rw-r--r--lib/Target/Sparc/Sparc.h6
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp5
-rw-r--r--lib/Target/Sparc/SparcCodeEmitter.cpp280
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp6
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h4
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp15
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp63
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h4
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td2
-rw-r--r--lib/Target/Sparc/SparcInstrVIS.td34
-rw-r--r--lib/Target/Sparc/SparcJITInfo.cpp326
-rw-r--r--lib/Target/Sparc/SparcJITInfo.h67
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.h4
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp6
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h4
-rw-r--r--lib/Target/Sparc/SparcRelocations.h56
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h4
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h25
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp18
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h29
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.h4
-rw-r--r--lib/Target/Sparc/SparcTargetStreamer.h4
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp8
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt2
-rw-r--r--lib/Target/SystemZ/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp21
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h4
-rw-r--r--lib/Target/SystemZ/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp15
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h4
-rw-r--r--lib/Target/SystemZ/Makefile1
-rw-r--r--lib/Target/SystemZ/SystemZ.h4
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.h4
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.h4
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.h4
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h4
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp12
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp54
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h11
-rw-r--r--lib/Target/SystemZ/SystemZInstrBuilder.h4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.h4
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h22
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h28
-rw-r--r--lib/Target/Target.cpp2
-rw-r--r--lib/Target/TargetLibraryInfo.cpp16
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp28
-rw-r--r--lib/Target/TargetMachine.cpp25
-rw-r--r--lib/Target/TargetMachineC.cpp15
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp13
-rw-r--r--lib/Target/X86/Android.mk3
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/X86/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp981
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h28
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp463
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParserCommon.h10
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h23
-rw-r--r--lib/Target/X86/CMakeLists.txt3
-rw-r--r--lib/Target/X86/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp80
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h16
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp121
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h4
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h27
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp28
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h15
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp266
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.h6
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp28
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h131
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp24
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp120
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp14
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp13
-rw-r--r--lib/Target/X86/README.txt177
-rw-r--r--lib/Target/X86/Utils/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp177
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h31
-rw-r--r--lib/Target/X86/X86.h14
-rw-r--r--lib/Target/X86/X86.td70
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp30
-rw-r--r--lib/Target/X86/X86AsmPrinter.h78
-rw-r--r--lib/Target/X86/X86AtomicExpandPass.cpp287
-rw-r--r--lib/Target/X86/X86CallingConv.h17
-rw-r--r--lib/Target/X86/X86CallingConv.td81
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp1498
-rw-r--r--lib/Target/X86/X86FastISel.cpp950
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp13
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp392
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp244
-rw-r--r--lib/Target/X86/X86FrameLowering.h9
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp216
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp6722
-rw-r--r--lib/Target/X86/X86ISelLowering.h249
-rw-r--r--lib/Target/X86/X86InstrAVX512.td2791
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td48
-rw-r--r--lib/Target/X86/X86InstrBuilder.h4
-rw-r--r--lib/Target/X86/X86InstrCompiler.td126
-rw-r--r--lib/Target/X86/X86InstrExtension.td14
-rw-r--r--lib/Target/X86/X86InstrFPStack.td3
-rw-r--r--lib/Target/X86/X86InstrFormats.td93
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td31
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp464
-rw-r--r--lib/Target/X86/X86InstrInfo.h9
-rw-r--r--lib/Target/X86/X86InstrInfo.td33
-rw-r--r--lib/Target/X86/X86InstrMMX.td25
-rw-r--r--lib/Target/X86/X86InstrSGX.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td627
-rw-r--r--lib/Target/X86/X86InstrSystem.td12
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h320
-rw-r--r--lib/Target/X86/X86JITInfo.cpp588
-rw-r--r--lib/Target/X86/X86JITInfo.h79
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp415
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h25
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp5
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp35
-rw-r--r--lib/Target/X86/X86RegisterInfo.h4
-rw-r--r--lib/Target/X86/X86RegisterInfo.td41
-rw-r--r--lib/Target/X86/X86Relocations.h52
-rw-r--r--lib/Target/X86/X86SchedHaswell.td1885
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td1
-rw-r--r--lib/Target/X86/X86Schedule.td21
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td6
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td341
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td2
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp37
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h9
-rw-r--r--lib/Target/X86/X86Subtarget.cpp99
-rw-r--r--lib/Target/X86/X86Subtarget.h100
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp91
-rw-r--r--lib/Target/X86/X86TargetMachine.h37
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp63
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h9
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp112
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/XCore/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp57
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h4
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp1
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h4
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp8
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h4
-rw-r--r--lib/Target/XCore/XCore.h4
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp4
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp26
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h6
-rw-r--r--lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp3
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp60
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp9
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h4
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td4
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.h4
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h6
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp17
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h4
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h4
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h22
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp11
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h27
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.cpp11
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.h7
-rw-r--r--lib/Target/XCore/XCoreTargetStreamer.h4
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.cpp6
-rw-r--r--lib/Transforms/Hello/CMakeLists.txt4
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp129
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp2
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp28
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp10
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp30
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp51
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp109
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp4
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp4
-rw-r--r--lib/Transforms/IPO/Inliner.cpp20
-rw-r--r--lib/Transforms/IPO/Internalize.cpp4
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp91
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp210
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp27
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h54
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp188
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp372
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp193
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp129
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp227
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp192
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp333
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp45
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp53
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp77
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h4
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp252
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp361
-rw-r--r--lib/Transforms/Instrumentation/Android.mk1
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt1
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp282
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp8
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.h6
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp14
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp1
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp140
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp294
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp42
-rw-r--r--lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h6
-rw-r--r--lib/Transforms/ObjCARC/Android.mk3
-rw-r--r--lib/Transforms/ObjCARC/CMakeLists.txt1
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp10
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.h10
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.cpp1
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h20
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp6
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h6
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp13
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp83
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp4
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.h6
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp92
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp428
-rw-r--r--lib/Transforms/Scalar/Android.mk3
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt2
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp4
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp18
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp9
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp29
-rw-r--r--lib/Transforms/Scalar/GVN.cpp72
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp179
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp131
-rw-r--r--lib/Transforms/Scalar/LICM.cpp64
-rw-r--r--lib/Transforms/Scalar/LLVMBuild.txt2
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp25
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp5
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp58
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp48
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp72
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp155
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp23
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp66
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp604
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp4
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp469
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp8
-rw-r--r--lib/Transforms/Scalar/SROA.cpp539
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp570
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp19
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp35
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp33
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp391
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp27
-rw-r--r--lib/Transforms/Scalar/Sink.cpp11
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp6
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp44
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp6
-rw-r--r--lib/Transforms/Utils/Android.mk2
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp29
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp24
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp56
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt7
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp9
-rw-r--r--lib/Transforms/Utils/CtorUtils.cpp71
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp9
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp7
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp486
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp16
-rw-r--r--lib/Transforms/Utils/Local.cpp136
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp51
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp81
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp257
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp48
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp6
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp2
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp35
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp786
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp23
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp3659
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp525
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp6
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp37
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp921
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp1304
1392 files changed, 99030 insertions, 55934 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 5cde979..5171a45 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -196,17 +196,21 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
if (!Arg->getType()->isPointerTy())
continue;
ModRefResult ArgMask;
- Location CS1Loc =
- getArgLocation(CS1, (unsigned) std::distance(CS1.arg_begin(), I),
- ArgMask);
- if ((getModRefInfo(CS2, CS1Loc) & ArgMask) != NoModRef) {
- R = Mask;
+ Location CS1Loc = getArgLocation(
+ CS1, (unsigned)std::distance(CS1.arg_begin(), I), ArgMask);
+ // ArgMask indicates what CS1 might do to CS1Loc; if CS1 might Mod
+ // CS1Loc, then we care about either a Mod or a Ref by CS2. If CS1
+ // might Ref, then we care only about a Mod by CS2.
+ ModRefResult ArgR = getModRefInfo(CS2, CS1Loc);
+ if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) ||
+ ((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef))
+ R = ModRefResult((R | ArgMask) & Mask);
+
+ if (R == Mask)
break;
- }
}
}
- if (R == NoModRef)
- return R;
+ return R;
}
// If this is the end of the chain, don't forward.
@@ -247,61 +251,73 @@ AliasAnalysis::getModRefBehavior(const Function *F) {
//===----------------------------------------------------------------------===//
AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+ AAMDNodes AATags;
+ LI->getAAMetadata(AATags);
+
return Location(LI->getPointerOperand(),
- getTypeStoreSize(LI->getType()),
- LI->getMetadata(LLVMContext::MD_tbaa));
+ getTypeStoreSize(LI->getType()), AATags);
}
AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+ AAMDNodes AATags;
+ SI->getAAMetadata(AATags);
+
return Location(SI->getPointerOperand(),
- getTypeStoreSize(SI->getValueOperand()->getType()),
- SI->getMetadata(LLVMContext::MD_tbaa));
+ getTypeStoreSize(SI->getValueOperand()->getType()), AATags);
}
AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
- return Location(VI->getPointerOperand(),
- UnknownSize,
- VI->getMetadata(LLVMContext::MD_tbaa));
+ AAMDNodes AATags;
+ VI->getAAMetadata(AATags);
+
+ return Location(VI->getPointerOperand(), UnknownSize, AATags);
}
AliasAnalysis::Location
AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) {
+ AAMDNodes AATags;
+ CXI->getAAMetadata(AATags);
+
return Location(CXI->getPointerOperand(),
getTypeStoreSize(CXI->getCompareOperand()->getType()),
- CXI->getMetadata(LLVMContext::MD_tbaa));
+ AATags);
}
AliasAnalysis::Location
AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) {
+ AAMDNodes AATags;
+ RMWI->getAAMetadata(AATags);
+
return Location(RMWI->getPointerOperand(),
- getTypeStoreSize(RMWI->getValOperand()->getType()),
- RMWI->getMetadata(LLVMContext::MD_tbaa));
+ getTypeStoreSize(RMWI->getValOperand()->getType()), AATags);
}
-AliasAnalysis::Location
+AliasAnalysis::Location
AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
uint64_t Size = UnknownSize;
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
Size = C->getValue().getZExtValue();
- // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+ AAMDNodes AATags;
+ MTI->getAAMetadata(AATags);
- return Location(MTI->getRawSource(), Size, TBAATag);
+ return Location(MTI->getRawSource(), Size, AATags);
}
-AliasAnalysis::Location
+AliasAnalysis::Location
AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
uint64_t Size = UnknownSize;
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
Size = C->getValue().getZExtValue();
- // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
-
- return Location(MTI->getRawDest(), Size, TBAATag);
+ AAMDNodes AATags;
+ MTI->getAAMetadata(AATags);
+
+ return Location(MTI->getRawDest(), Size, AATags);
}
@@ -383,53 +399,6 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
return ModRef;
}
-namespace {
- /// Only find pointer captures which happen before the given instruction. Uses
- /// the dominator tree to determine whether one instruction is before another.
- /// Only support the case where the Value is defined in the same basic block
- /// as the given instruction and the use.
- struct CapturesBefore : public CaptureTracker {
- CapturesBefore(const Instruction *I, DominatorTree *DT)
- : BeforeHere(I), DT(DT), Captured(false) {}
-
- void tooManyUses() override { Captured = true; }
-
- bool shouldExplore(const Use *U) override {
- Instruction *I = cast<Instruction>(U->getUser());
- BasicBlock *BB = I->getParent();
- // We explore this usage only if the usage can reach "BeforeHere".
- // If use is not reachable from entry, there is no need to explore.
- if (BeforeHere != I && !DT->isReachableFromEntry(BB))
- return false;
- // If the value is defined in the same basic block as use and BeforeHere,
- // there is no need to explore the use if BeforeHere dominates use.
- // Check whether there is a path from I to BeforeHere.
- if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
- !isPotentiallyReachable(I, BeforeHere, DT))
- return false;
- return true;
- }
-
- bool captured(const Use *U) override {
- Instruction *I = cast<Instruction>(U->getUser());
- BasicBlock *BB = I->getParent();
- // Same logic as in shouldExplore.
- if (BeforeHere != I && !DT->isReachableFromEntry(BB))
- return false;
- if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
- !isPotentiallyReachable(I, BeforeHere, DT))
- return false;
- Captured = true;
- return true;
- }
-
- const Instruction *BeforeHere;
- DominatorTree *DT;
-
- bool Captured;
- };
-}
-
// FIXME: this is really just shoring-up a deficiency in alias analysis.
// BasicAA isn't willing to spend linear time determining whether an alloca
// was captured before or after this particular call, while we are. However,
@@ -449,9 +418,9 @@ AliasAnalysis::callCapturesBefore(const Instruction *I,
if (!CS.getInstruction() || CS.getInstruction() == Object)
return AliasAnalysis::ModRef;
- CapturesBefore CB(I, DT);
- llvm::PointerMayBeCaptured(Object, &CB);
- if (CB.Captured)
+ if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
+ /* StoreCaptures */ true, I, DT,
+ /* include Object */ true))
return AliasAnalysis::ModRef;
unsigned ArgNo = 0;
@@ -470,7 +439,7 @@ AliasAnalysis::callCapturesBefore(const Instruction *I,
// assume that the call could touch the pointer, even though it doesn't
// escape.
if (isNoAlias(AliasAnalysis::Location(*CI),
- AliasAnalysis::Location(Object)))
+ AliasAnalysis::Location(Object)))
continue;
if (CS.doesNotAccessMemory(ArgNo))
continue;
@@ -577,3 +546,13 @@ bool llvm::isIdentifiedObject(const Value *V) {
return A->hasNoAliasAttr() || A->hasByValAttr();
return false;
}
+
+/// isIdentifiedFunctionLocal - Return true if V is umabigously identified
+/// at the function-level. Different IdentifiedFunctionLocals can't alias.
+/// Further, an IdentifiedFunctionLocal can not alias with any function
+/// arguments other than itself, which is not necessarily true for
+/// IdentifiedObjects.
+bool llvm::isIdentifiedFunctionLocal(const Value *V)
+{
+ return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V);
+}
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index d9fa5a5..fe4bd4c 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -43,7 +43,7 @@ static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
-static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden);
+static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
namespace {
class AAEval : public FunctionPass {
@@ -153,9 +153,9 @@ bool AAEval::runOnFunction(Function &F) {
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
if (I->getType()->isPointerTy()) // Add all pointer instructions.
Pointers.insert(&*I);
- if (EvalTBAA && isa<LoadInst>(&*I))
+ if (EvalAAMD && isa<LoadInst>(&*I))
Loads.insert(&*I);
- if (EvalTBAA && isa<StoreInst>(&*I))
+ if (EvalAAMD && isa<StoreInst>(&*I))
Stores.insert(&*I);
Instruction &Inst = *I;
if (CallSite CS = cast<Value>(&Inst)) {
@@ -213,7 +213,7 @@ bool AAEval::runOnFunction(Function &F) {
}
}
- if (EvalTBAA) {
+ if (EvalAAMD) {
// iterate over all pairs of load, store
for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end();
I1 != E; ++I1) {
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index a45fe23..45442b0 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -47,18 +47,21 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
// If the pointers are not a must-alias pair, this set becomes a may alias.
if (AA.alias(AliasAnalysis::Location(L->getValue(),
L->getSize(),
- L->getTBAAInfo()),
+ L->getAAInfo()),
AliasAnalysis::Location(R->getValue(),
R->getSize(),
- R->getTBAAInfo()))
+ R->getAAInfo()))
!= AliasAnalysis::MustAlias)
AliasTy = MayAlias;
}
+ bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
if (UnknownInsts.empty()) { // Merge call sites...
- if (!AS.UnknownInsts.empty())
+ if (ASHadUnknownInsts) {
std::swap(UnknownInsts, AS.UnknownInsts);
- } else if (!AS.UnknownInsts.empty()) {
+ addRef();
+ }
+ } else if (ASHadUnknownInsts) {
UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end());
AS.UnknownInsts.clear();
}
@@ -76,6 +79,8 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
AS.PtrListEnd = &AS.PtrList;
assert(*AS.PtrListEnd == nullptr && "End of list is not null?");
}
+ if (ASHadUnknownInsts)
+ AS.dropRef(AST);
}
void AliasSetTracker::removeAliasSet(AliasSet *AS) {
@@ -92,7 +97,7 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
}
void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
- uint64_t Size, const MDNode *TBAAInfo,
+ uint64_t Size, const AAMDNodes &AAInfo,
bool KnownMustAlias) {
assert(!Entry.hasAliasSet() && "Entry already in set!");
@@ -102,17 +107,17 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
AliasAnalysis &AA = AST.getAliasAnalysis();
AliasAnalysis::AliasResult Result =
AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
- P->getTBAAInfo()),
- AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+ P->getAAInfo()),
+ AliasAnalysis::Location(Entry.getValue(), Size, AAInfo));
if (Result != AliasAnalysis::MustAlias)
AliasTy = MayAlias;
else // First entry of must alias must have maximum size!
- P->updateSizeAndTBAAInfo(Size, TBAAInfo);
+ P->updateSizeAndAAInfo(Size, AAInfo);
assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
}
Entry.setAliasSet(this);
- Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+ Entry.updateSizeAndAAInfo(Size, AAInfo);
// Add it to the end of the list...
assert(*PtrListEnd == nullptr && "End of list is not null?");
@@ -123,6 +128,8 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
}
void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
+ if (UnknownInsts.empty())
+ addRef();
UnknownInsts.push_back(I);
if (!I->mayWriteToMemory()) {
@@ -140,7 +147,7 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
/// alias one of the members in the set.
///
bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
AliasAnalysis &AA) const {
if (AliasTy == MustAlias) {
assert(UnknownInsts.empty() && "Illegal must alias set!");
@@ -151,23 +158,23 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
assert(SomePtr && "Empty must-alias set??");
return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
SomePtr->getSize(),
- SomePtr->getTBAAInfo()),
- AliasAnalysis::Location(Ptr, Size, TBAAInfo));
+ SomePtr->getAAInfo()),
+ AliasAnalysis::Location(Ptr, Size, AAInfo));
}
// If this is a may-alias set, we have to check all of the pointers in the set
// to be sure it doesn't alias the set...
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+ if (AA.alias(AliasAnalysis::Location(Ptr, Size, AAInfo),
AliasAnalysis::Location(I.getPointer(), I.getSize(),
- I.getTBAAInfo())))
+ I.getAAInfo())))
return true;
// Check the unknown instructions...
if (!UnknownInsts.empty()) {
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
if (AA.getModRefInfo(UnknownInsts[i],
- AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+ AliasAnalysis::Location(Ptr, Size, AAInfo)) !=
AliasAnalysis::NoModRef)
return true;
}
@@ -190,7 +197,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
for (iterator I = begin(), E = end(); I != E; ++I)
if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(),
I.getSize(),
- I.getTBAAInfo())) !=
+ I.getAAInfo())) !=
AliasAnalysis::NoModRef)
return true;
@@ -216,15 +223,16 @@ void AliasSetTracker::clear() {
///
AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
uint64_t Size,
- const MDNode *TBAAInfo) {
+ const AAMDNodes &AAInfo) {
AliasSet *FoundSet = nullptr;
- for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+ for (iterator I = begin(), E = end(); I != E;) {
+ iterator Cur = I++;
+ if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
if (!FoundSet) { // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
+ FoundSet = Cur; // Remember it.
} else { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
}
@@ -235,25 +243,30 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
/// this alias set, false otherwise. This does not modify the AST object or
/// alias sets.
bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
- const MDNode *TBAAInfo) const {
+ const AAMDNodes &AAInfo) const {
for (const_iterator I = begin(), E = end(); I != E; ++I)
- if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, AAInfo, AA))
return true;
return false;
}
-
+bool AliasSetTracker::containsUnknown(Instruction *Inst) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesUnknownInst(Inst, AA))
+ return true;
+ return false;
+}
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = nullptr;
- for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
+ for (iterator I = begin(), E = end(); I != E;) {
+ iterator Cur = I++;
+ if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
continue;
-
if (!FoundSet) // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
- else if (!I->Forward) // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ FoundSet = Cur; // Remember it.
+ else if (!Cur->Forward) // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
return FoundSet;
}
@@ -264,67 +277,75 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
/// getAliasSetForPointer - Return the alias set that the specified pointer
/// lives in.
AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
bool *New) {
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
// Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
- Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+ Entry.updateSizeAndAAInfo(Size, AAInfo);
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
- if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, AAInfo)) {
// Add it to the alias set it aliases.
- AS->addPointer(*this, Entry, Size, TBAAInfo);
+ AS->addPointer(*this, Entry, Size, AAInfo);
return *AS;
}
if (New) *New = true;
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
- AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
+ AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
return AliasSets.back();
}
-bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
bool NewPtr;
- addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
+ addPointer(Ptr, Size, AAInfo, AliasSet::NoModRef, NewPtr);
return NewPtr;
}
bool AliasSetTracker::add(LoadInst *LI) {
if (LI->getOrdering() > Monotonic) return addUnknown(LI);
+
+ AAMDNodes AAInfo;
+ LI->getAAMetadata(AAInfo);
+
AliasSet::AccessType ATy = AliasSet::Refs;
bool NewPtr;
AliasSet &AS = addPointer(LI->getOperand(0),
AA.getTypeStoreSize(LI->getType()),
- LI->getMetadata(LLVMContext::MD_tbaa),
- ATy, NewPtr);
+ AAInfo, ATy, NewPtr);
if (LI->isVolatile()) AS.setVolatile();
return NewPtr;
}
bool AliasSetTracker::add(StoreInst *SI) {
if (SI->getOrdering() > Monotonic) return addUnknown(SI);
+
+ AAMDNodes AAInfo;
+ SI->getAAMetadata(AAInfo);
+
AliasSet::AccessType ATy = AliasSet::Mods;
bool NewPtr;
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
AA.getTypeStoreSize(Val->getType()),
- SI->getMetadata(LLVMContext::MD_tbaa),
- ATy, NewPtr);
+ AAInfo, ATy, NewPtr);
if (SI->isVolatile()) AS.setVolatile();
return NewPtr;
}
bool AliasSetTracker::add(VAArgInst *VAAI) {
+ AAMDNodes AAInfo;
+ VAAI->getAAMetadata(AAInfo);
+
bool NewPtr;
addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize,
- VAAI->getMetadata(LLVMContext::MD_tbaa),
- AliasSet::ModRef, NewPtr);
+ AAInfo, AliasSet::ModRef, NewPtr);
return NewPtr;
}
@@ -382,7 +403,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
bool X;
for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
- ASI.getTBAAInfo(),
+ ASI.getAAInfo(),
(AliasSet::AccessType)AS.AccessTy, X);
if (AS.isVolatile()) NewAS.setVolatile();
}
@@ -393,6 +414,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
/// tracker.
void AliasSetTracker::remove(AliasSet &AS) {
// Drop all call sites.
+ if (!AS.UnknownInsts.empty())
+ AS.dropRef(*this);
AS.UnknownInsts.clear();
// Clear the alias set.
@@ -419,8 +442,8 @@ void AliasSetTracker::remove(AliasSet &AS) {
}
bool
-AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
- AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+ AliasSet *AS = findAliasSetForPointer(Ptr, Size, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
@@ -428,8 +451,11 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
bool AliasSetTracker::remove(LoadInst *LI) {
uint64_t Size = AA.getTypeStoreSize(LI->getType());
- const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
- AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
+
+ AAMDNodes AAInfo;
+ LI->getAAMetadata(AAInfo);
+
+ AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
@@ -437,17 +463,22 @@ bool AliasSetTracker::remove(LoadInst *LI) {
bool AliasSetTracker::remove(StoreInst *SI) {
uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
- const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
- AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
+
+ AAMDNodes AAInfo;
+ SI->getAAMetadata(AAInfo);
+
+ AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
}
bool AliasSetTracker::remove(VAArgInst *VAAI) {
+ AAMDNodes AAInfo;
+ VAAI->getAAMetadata(AAInfo);
+
AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
- AliasAnalysis::UnknownSize,
- VAAI->getMetadata(LLVMContext::MD_tbaa));
+ AliasAnalysis::UnknownSize, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
@@ -489,10 +520,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
if (Inst->mayReadOrWriteMemory()) {
// Scan all the alias sets to see if this call site is contained.
- for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward) continue;
-
- I->removeUnknownInst(Inst);
+ for (iterator I = begin(), E = end(); I != E;) {
+ iterator Cur = I++;
+ if (!Cur->Forward)
+ Cur->removeUnknownInst(*this, Inst);
}
}
}
@@ -536,7 +567,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
I = PointerMap.find_as(From);
AliasSet *AS = I->second->getAliasSet(*this);
AS->addPointer(*this, Entry, I->second->getSize(),
- I->second->getTBAAInfo(),
+ I->second->getAAInfo(),
true);
}
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index ade940a..f64bf0e 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGPrinterPass(Registry);
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
+ initializeCFLAliasAnalysisPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDelinearizationPass(Registry);
initializeDominanceFrontierPass(Registry);
@@ -57,7 +58,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
initializePostDominatorTreePass(Registry);
- initializeRegionInfoPass(Registry);
+ initializeRegionInfoPassPass(Registry);
initializeRegionViewerPass(Registry);
initializeRegionPrinterPass(Registry);
initializeRegionOnlyViewerPass(Registry);
@@ -66,6 +67,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeScalarEvolutionAliasAnalysisPass(Registry);
initializeTargetTransformInfoAnalysisGroup(Registry);
initializeTypeBasedAliasAnalysisPass(Registry);
+ initializeScopedNoAliasAAPass(Registry);
}
void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 4e435a1..8770fa7 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -7,12 +7,15 @@ analysis_SRC_FILES := \
AliasDebugger.cpp \
AliasSetTracker.cpp \
Analysis.cpp \
+ AssumptionTracker.cpp \
BasicAliasAnalysis.cpp \
BlockFrequencyInfo.cpp \
BlockFrequencyInfoImpl.cpp \
BranchProbabilityInfo.cpp \
CFG.cpp \
CFGPrinter.cpp \
+ CFLAliasAnalysis.cpp \
+ CGSCCPassManager.cpp \
CaptureTracking.cpp \
CodeMetrics.cpp \
ConstantFolding.cpp \
@@ -21,7 +24,7 @@ analysis_SRC_FILES := \
DependenceAnalysis.cpp \
DomPrinter.cpp \
DominanceFrontier.cpp \
- CGSCCPassManager.cpp \
+ FunctionTargetTransformInfo.cpp \
IVUsers.cpp \
InstCount.cpp \
InstructionSimplify.cpp \
@@ -51,6 +54,7 @@ analysis_SRC_FILES := \
ScalarEvolutionAliasAnalysis.cpp \
ScalarEvolutionExpander.cpp \
ScalarEvolutionNormalization.cpp \
+ ScopedNoAliasAA.cpp \
SparsePropagation.cpp \
TargetTransformInfo.cpp \
Trace.cpp \
diff --git a/lib/Analysis/AssumptionTracker.cpp b/lib/Analysis/AssumptionTracker.cpp
new file mode 100644
index 0000000..775ce1d
--- /dev/null
+++ b/lib/Analysis/AssumptionTracker.cpp
@@ -0,0 +1,110 @@
+//===- AssumptionTracker.cpp - Track @llvm.assume -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that keeps track of @llvm.assume intrinsics in
+// the functions of a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void AssumptionTracker::FunctionCallbackVH::deleted() {
+ AT->forgetCachedAssumptions(cast<Function>(getValPtr()));
+ // 'this' now dangles!
+}
+
+void AssumptionTracker::forgetCachedAssumptions(Function *F) {
+ auto I = CachedAssumeCalls.find_as(F);
+ if (I != CachedAssumeCalls.end())
+ CachedAssumeCalls.erase(I);
+}
+
+void AssumptionTracker::CallCallbackVH::deleted() {
+ assert(F && "delete callback called on dummy handle");
+ FunctionCallsMap::iterator I = AT->CachedAssumeCalls.find_as(F);
+ assert(I != AT->CachedAssumeCalls.end() &&
+ "Function cleared from the map without removing the values?");
+
+ I->second->erase(*this);
+ // 'this' now dangles!
+}
+
+AssumptionTracker::FunctionCallsMap::iterator
+AssumptionTracker::scanFunction(Function *F) {
+ auto IP = CachedAssumeCalls.insert(std::make_pair(
+ FunctionCallbackVH(F, this), llvm::make_unique<CallHandleSet>()));
+ assert(IP.second && "Scanning function already in the map?");
+
+ FunctionCallsMap::iterator I = IP.first;
+
+ // Go through all instructions in all blocks, add all calls to @llvm.assume
+ // to our cache.
+ for (BasicBlock &B : *F)
+ for (Instruction &II : B)
+ if (match(&II, m_Intrinsic<Intrinsic::assume>()))
+ I->second->insert(CallCallbackVH(&II, this));
+
+ return I;
+}
+
+void AssumptionTracker::verifyAnalysis() const {
+#ifndef NDEBUG
+ for (const auto &I : CachedAssumeCalls) {
+ for (const BasicBlock &B : cast<Function>(*I.first))
+ for (const Instruction &II : B) {
+ if (match(&II, m_Intrinsic<Intrinsic::assume>())) {
+ assert(I.second->find_as(&II) != I.second->end() &&
+ "Assumption in scanned function not in cache");
+ }
+ }
+ }
+#endif
+}
+
+void AssumptionTracker::registerAssumption(CallInst *CI) {
+ assert(match(CI, m_Intrinsic<Intrinsic::assume>()) &&
+ "Registered call does not call @llvm.assume");
+ assert(CI->getParent() &&
+ "Cannot register @llvm.assume call not in a basic block");
+
+ Function *F = CI->getParent()->getParent();
+ assert(F && "Cannot register @llvm.assume call not in a function");
+
+ FunctionCallsMap::iterator I = CachedAssumeCalls.find_as(F);
+ if (I == CachedAssumeCalls.end()) {
+ // If this function has not already been scanned, then don't do anything
+ // here. This intrinsic will be found, if it still exists, if the list of
+ // assumptions in this function is requested at some later point. This
+ // maintains the following invariant: if a function is present in the
+ // cache, then its list of assumption intrinsic calls is complete.
+ return;
+ }
+
+ I->second->insert(CallCallbackVH(CI, this));
+}
+
+AssumptionTracker::AssumptionTracker() : ImmutablePass(ID) {
+ initializeAssumptionTrackerPass(*PassRegistry::getPassRegistry());
+}
+
+AssumptionTracker::~AssumptionTracker() {}
+
+INITIALIZE_PASS(AssumptionTracker, "assumption-tracker", "Assumption Tracker",
+ false, true)
+char AssumptionTracker::ID = 0;
+
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index c50dd4a..9aba0d3 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -156,17 +157,6 @@ static bool isObjectSize(const Value *V, uint64_t Size,
return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
}
-/// isIdentifiedFunctionLocal - Return true if V is umabigously identified
-/// at the function-level. Different IdentifiedFunctionLocals can't alias.
-/// Further, an IdentifiedFunctionLocal can not alias with any function
-/// arguments other than itself, which is not necessarily true for
-/// IdentifiedObjects.
-static bool isIdentifiedFunctionLocal(const Value *V)
-{
- return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V);
-}
-
-
//===----------------------------------------------------------------------===//
// GetElementPtr Instruction Decomposition and Analysis
//===----------------------------------------------------------------------===//
@@ -205,7 +195,9 @@ namespace {
/// represented in the result.
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
ExtensionKind &Extension,
- const DataLayout &DL, unsigned Depth) {
+ const DataLayout &DL, unsigned Depth,
+ AssumptionTracker *AT,
+ DominatorTree *DT) {
assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
@@ -215,6 +207,14 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
return V;
}
+ if (ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
+ // if it's a constant, just convert it to an offset
+ // and remove the variable.
+ Offset += Const->getValue();
+ assert(Scale == 0 && "Constant values don't have a scale");
+ return V;
+ }
+
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
switch (BOp->getOpcode()) {
@@ -222,23 +222,24 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL))
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0,
+ AT, BOp, DT))
break;
// FALL THROUGH.
case Instruction::Add:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1);
+ DL, Depth+1, AT, DT);
Offset += RHSC->getValue();
return V;
case Instruction::Mul:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1);
+ DL, Depth+1, AT, DT);
Offset *= RHSC->getValue();
Scale *= RHSC->getValue();
return V;
case Instruction::Shl:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1);
+ DL, Depth+1, AT, DT);
Offset <<= RHSC->getValue().getLimitedValue();
Scale <<= RHSC->getValue().getLimitedValue();
return V;
@@ -259,9 +260,12 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
- DL, Depth+1);
+ DL, Depth+1, AT, DT);
Scale = Scale.zext(OldWidth);
- Offset = Offset.zext(OldWidth);
+
+ // We have to sign-extend even if Extension == EK_ZeroExt as we can't
+ // decompose a sign extension (i.e. zext(x - 1) != zext(x) - zext(-1)).
+ Offset = Offset.sext(OldWidth);
return Result;
}
@@ -289,7 +293,8 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
static const Value *
DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
SmallVectorImpl<VariableGEPIndex> &VarIndices,
- bool &MaxLookupReached, const DataLayout *DL) {
+ bool &MaxLookupReached, const DataLayout *DL,
+ AssumptionTracker *AT, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
MaxLookupReached = false;
@@ -309,7 +314,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
return V;
}
- if (Op->getOpcode() == Instruction::BitCast) {
+ if (Op->getOpcode() == Instruction::BitCast ||
+ Op->getOpcode() == Instruction::AddrSpaceCast) {
V = Op->getOperand(0);
continue;
}
@@ -319,7 +325,10 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// If it's not a GEP, hand it off to SimplifyInstruction to see if it
// can come up with something. This matches what GetUnderlyingObject does.
if (const Instruction *I = dyn_cast<Instruction>(V))
- // TODO: Get a DominatorTree and use it here.
+ // TODO: Get a DominatorTree and AssumptionTracker and use them here
+ // (these are both now available in this function, but this should be
+ // updated when GetUnderlyingObject is updated). TLI should be
+ // provided also.
if (const Value *Simplified =
SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
V = Simplified;
@@ -378,7 +387,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
- *DL, 0);
+ *DL, 0, AT, DT);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
@@ -459,6 +468,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -466,8 +476,8 @@ namespace {
assert(AliasCache.empty() && "AliasCache must be cleared after use!");
assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
- AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
- LocB.Ptr, LocB.Size, LocB.TBAATag);
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags,
+ LocB.Ptr, LocB.Size, LocB.AATags);
// AliasCache rarely has more than 1 or 2 elements, always use
// shrink_and_clear so it quickly returns to the inline capacity of the
// SmallDenseMap if it ever grows larger.
@@ -481,10 +491,7 @@ namespace {
const Location &Loc) override;
ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- // The AliasAnalysis base class has some smarts, lets use them.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
- }
+ ImmutableCallSite CS2) override;
/// pointsToConstantMemory - Chase pointers until we find a (constant
/// global) or not.
@@ -554,28 +561,28 @@ namespace {
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
// instruction against another.
AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
- const MDNode *V1TBAAInfo,
+ const AAMDNodes &V1AAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo,
+ const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2);
// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
// instruction against another.
AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
- const MDNode *PNTBAAInfo,
+ const AAMDNodes &PNAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo);
+ const AAMDNodes &V2AAInfo);
/// aliasSelect - Disambiguate a Select instruction against another value.
AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
- const MDNode *SITBAAInfo,
+ const AAMDNodes &SIAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo);
+ const AAMDNodes &V2AAInfo);
AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
- const MDNode *V1TBAATag,
+ AAMDNodes V1AATag,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAATag);
+ AAMDNodes V2AATag);
};
} // End of anonymous namespace
@@ -584,6 +591,7 @@ char BasicAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
@@ -606,7 +614,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
Worklist.push_back(Loc.Ptr);
do {
const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
- if (!Visited.insert(V)) {
+ if (!Visited.insert(V).second) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
}
@@ -798,6 +806,14 @@ BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
return Loc;
}
+static bool isAssumeIntrinsic(ImmutableCallSite CS) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II && II->getIntrinsicID() == Intrinsic::assume)
+ return true;
+
+ return false;
+}
+
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
@@ -850,10 +866,29 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
return NoModRef;
}
+ // While the assume intrinsic is marked as arbitrarily writing so that
+ // proper control dependencies will be maintained, it never aliases any
+ // particular memory location.
+ if (isAssumeIntrinsic(CS))
+ return NoModRef;
+
// The AliasAnalysis base class has some smarts, lets use them.
return AliasAnalysis::getModRefInfo(CS, Loc);
}
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // While the assume intrinsic is marked as arbitrarily writing so that
+ // proper control dependencies will be maintained, it never aliases any
+ // particular memory location.
+ if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2))
+ return NoModRef;
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
@@ -861,30 +896,35 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
///
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
- const MDNode *V1TBAAInfo,
+ const AAMDNodes &V1AAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo,
+ const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1,
const Value *UnderlyingV2) {
int64_t GEP1BaseOffset;
bool GEP1MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
// If we have two gep instructions with must-alias or not-alias'ing base
// pointers, figure out if the indexes to the GEP tell us anything about the
// derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Do the base pointers alias?
- AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
- UnderlyingV2, UnknownSize, nullptr);
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(),
+ UnderlyingV2, UnknownSize, AAMDNodes());
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
- V1TBAAInfo, UnderlyingV2,
- V2Size, V2TBAAInfo);
+ V1AAInfo, UnderlyingV2,
+ V2Size, V2AAInfo);
if (PreciseBaseAlias == NoAlias) {
// See if the computed offset from the common pointer tells us about the
// relation of the resulting pointer.
@@ -893,10 +933,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL);
+ GEP2MaxLookupReached, DL, AT, DT);
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL);
+ GEP1MaxLookupReached, DL, AT, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -925,14 +965,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL);
+ GEP1MaxLookupReached, DL, AT, DT);
int64_t GEP2BaseOffset;
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL);
+ GEP2MaxLookupReached, DL, AT, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -959,8 +999,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
if (V1Size == UnknownSize && V2Size == UnknownSize)
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
- V2, V2Size, V2TBAAInfo);
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(),
+ V2, V2Size, V2AAInfo);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -971,7 +1011,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL);
+ GEP1MaxLookupReached, DL, AT, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -1022,12 +1062,45 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
}
}
- // Try to distinguish something like &A[i][1] against &A[42][0].
- // Grab the least significant bit set in any of the scales.
if (!GEP1VariableIndices.empty()) {
uint64_t Modulo = 0;
- for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
- Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+ bool AllPositive = true;
+ for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
+
+ // Try to distinguish something like &A[i][1] against &A[42][0].
+ // Grab the least significant bit set in any of the scales. We
+ // don't need std::abs here (even if the scale's negative) as we'll
+ // be ^'ing Modulo with itself later.
+ Modulo |= (uint64_t) GEP1VariableIndices[i].Scale;
+
+ if (AllPositive) {
+ // If the Value could change between cycles, then any reasoning about
+ // the Value this cycle may not hold in the next cycle. We'll just
+ // give up if we can't determine conditions that hold for every cycle:
+ const Value *V = GEP1VariableIndices[i].V;
+
+ bool SignKnownZero, SignKnownOne;
+ ComputeSignBit(
+ const_cast<Value *>(V),
+ SignKnownZero, SignKnownOne,
+ DL, 0, AT, nullptr, DT);
+
+ // Zero-extension widens the variable, and so forces the sign
+ // bit to zero.
+ bool IsZExt = GEP1VariableIndices[i].Extension == EK_ZeroExt;
+ SignKnownZero |= IsZExt;
+ SignKnownOne &= !IsZExt;
+
+ // If the variable begins with a zero then we know it's
+ // positive, regardless of whether the value is signed or
+ // unsigned.
+ int64_t Scale = GEP1VariableIndices[i].Scale;
+ AllPositive =
+ (SignKnownZero && Scale >= 0) ||
+ (SignKnownOne && Scale < 0);
+ }
+ }
+
Modulo = Modulo ^ (Modulo & (Modulo - 1));
// We can compute the difference between the two addresses
@@ -1037,6 +1110,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
if (V1Size != UnknownSize && V2Size != UnknownSize &&
ModOffset >= V2Size && V1Size <= Modulo - ModOffset)
return NoAlias;
+
+ // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
+ // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
+ // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
+ if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t) GEP1BaseOffset)
+ return NoAlias;
}
// Statically, we can see that the base objects are the same, but the
@@ -1066,33 +1145,33 @@ MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
/// instruction against another.
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
- const MDNode *SITBAAInfo,
+ const AAMDNodes &SIAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo) {
+ const AAMDNodes &V2AAInfo) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
AliasResult Alias =
- aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
- SI2->getTrueValue(), V2Size, V2TBAAInfo);
+ aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
+ SI2->getTrueValue(), V2Size, V2AAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
- SI2->getFalseValue(), V2Size, V2TBAAInfo);
+ aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
+ SI2->getFalseValue(), V2Size, V2AAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
- aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
@@ -1100,9 +1179,9 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
// against another.
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
- const MDNode *PNTBAAInfo,
+ const AAMDNodes &PNAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo) {
+ const AAMDNodes &V2AAInfo) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
@@ -1112,8 +1191,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
// on corresponding edges.
if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
if (PN2->getParent() == PN->getParent()) {
- LocPair Locs(Location(PN, PNSize, PNTBAAInfo),
- Location(V2, V2Size, V2TBAAInfo));
+ LocPair Locs(Location(PN, PNSize, PNAAInfo),
+ Location(V2, V2Size, V2AAInfo));
if (PN > V2)
std::swap(Locs.first, Locs.second);
// Analyse the PHIs' inputs under the assumption that the PHIs are
@@ -1131,9 +1210,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
- aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size, V2TBAAInfo);
+ V2Size, V2AAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1156,12 +1235,12 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
// sides are PHI nodes. In which case, this is O(m x n) time where 'm'
// and 'n' are the number of PHI sources.
return MayAlias;
- if (UniqueSrc.insert(PV1))
+ if (UniqueSrc.insert(PV1).second)
V1Srcs.push_back(PV1);
}
- AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
- V1Srcs[0], PNSize, PNTBAAInfo);
+ AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo,
+ V1Srcs[0], PNSize, PNAAInfo);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
if (Alias == MayAlias)
@@ -1172,8 +1251,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
- AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
- V, PNSize, PNTBAAInfo);
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo,
+ V, PNSize, PNAAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1187,9 +1266,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
//
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
- const MDNode *V1TBAAInfo,
+ AAMDNodes V1AAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo) {
+ AAMDNodes V2AAInfo) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size == 0 || V2Size == 0)
@@ -1269,8 +1348,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
- LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
- Location(V2, V2Size, V2TBAAInfo));
+ LocPair Locs(Location(V1, V1Size, V1AAInfo),
+ Location(V2, V2Size, V2AAInfo));
if (V1 > V2)
std::swap(Locs.first, Locs.second);
std::pair<AliasCacheTy::iterator, bool> Pair =
@@ -1284,32 +1363,32 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1, V2);
std::swap(V1Size, V2Size);
std::swap(O1, O2);
- std::swap(V1TBAAInfo, V2TBAAInfo);
+ std::swap(V1AAInfo, V2AAInfo);
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
- AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2);
+ AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
- std::swap(V1TBAAInfo, V2TBAAInfo);
+ std::swap(V1AAInfo, V2AAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
- AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
- V2, V2Size, V2TBAAInfo);
+ AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
+ V2, V2Size, V2AAInfo);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
- std::swap(V1TBAAInfo, V2TBAAInfo);
+ std::swap(V1AAInfo, V2AAInfo);
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
- AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
- V2, V2Size, V2TBAAInfo);
+ AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo,
+ V2, V2Size, V2AAInfo);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
@@ -1322,8 +1401,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
return AliasCache[Locs] = PartialAlias;
AliasResult Result =
- AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
- Location(V2, V2Size, V2TBAAInfo));
+ AliasAnalysis::alias(Location(V1, V1Size, V1AAInfo),
+ Location(V2, V2Size, V2AAInfo));
return AliasCache[Locs] = Result;
}
@@ -1348,10 +1427,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
- for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
- PE = VisitedPhiBBs.end();
- PI != PE; ++PI)
- if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
+ for (auto *P : VisitedPhiBBs)
+ if (isPotentiallyReachable(P->begin(), Inst, DT, LI))
return false;
return true;
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 4fd2c11..06b8acd 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -14,18 +14,12 @@
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Support/raw_ostream.h"
-#include <deque>
using namespace llvm;
using namespace llvm::bfi_detail;
#define DEBUG_TYPE "block-freq"
-//===----------------------------------------------------------------------===//
-//
-// BlockMass implementation.
-//
-//===----------------------------------------------------------------------===//
ScaledNumber<uint64_t> BlockMass::toScaled() const {
if (isFull())
return ScaledNumber<uint64_t>(1, 0);
@@ -46,11 +40,6 @@ raw_ostream &BlockMass::print(raw_ostream &OS) const {
return OS;
}
-//===----------------------------------------------------------------------===//
-//
-// BlockFrequencyInfoImpl implementation.
-//
-//===----------------------------------------------------------------------===//
namespace {
typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
@@ -87,7 +76,8 @@ struct DitheringDistributer {
BlockMass takeMass(uint32_t Weight);
};
-}
+
+} // end namespace
DitheringDistributer::DitheringDistributer(Distribution &Dist,
const BlockMass &Mass) {
@@ -121,11 +111,7 @@ void Distribution::add(const BlockNode &Node, uint64_t Amount,
Total = NewTotal;
// Save the weight.
- Weight W;
- W.TargetNode = Node;
- W.Amount = Amount;
- W.Type = Type;
- Weights.push_back(W);
+ Weights.push_back(Weight(Type, Node, Amount));
}
static void combineWeight(Weight &W, const Weight &OtherW) {
@@ -615,7 +601,8 @@ static void findIrreducibleHeaders(
break;
}
}
- assert(Headers.size() >= 2 && "Should be irreducible");
+ assert(Headers.size() >= 2 &&
+ "Expected irreducible CFG; -loop-info is likely invalid");
if (Headers.size() == InSCC.size()) {
// Every block is a header.
std::sort(Headers.begin(), Headers.end());
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 8ef5302..25e7bc0 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -45,7 +45,7 @@ void llvm::FindFunctionBackedges(const Function &F,
bool FoundNew = false;
while (I != succ_end(ParentBB)) {
BB = *I++;
- if (Visited.insert(BB)) {
+ if (Visited.insert(BB).second) {
FoundNew = true;
break;
}
@@ -141,7 +141,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
SmallSet<const BasicBlock*, 64> Visited;
do {
BasicBlock *BB = Worklist.pop_back_val();
- if (!Visited.insert(BB))
+ if (!Visited.insert(BB).second)
continue;
if (BB == StopBB)
return true;
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index c2c19d6..89787f82 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -79,11 +79,11 @@ namespace {
bool runOnFunction(Function &F) override {
std::string Filename = "cfg." + F.getName().str() + ".dot";
errs() << "Writing '" << Filename << "'...";
-
- std::string ErrorInfo;
- raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text);
- if (ErrorInfo.empty())
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+ if (!EC)
WriteGraph(File, (const Function*)&F);
else
errs() << " error opening file for writing!";
@@ -114,10 +114,10 @@ namespace {
std::string Filename = "cfg." + F.getName().str() + ".dot";
errs() << "Writing '" << Filename << "'...";
- std::string ErrorInfo;
- raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text);
-
- if (ErrorInfo.empty())
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+ if (!EC)
WriteGraph(File, (const Function*)&F, true);
else
errs() << " error opening file for writing!";
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
new file mode 100644
index 0000000..5f1b3d3
--- /dev/null
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -0,0 +1,1013 @@
+//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a CFL-based context-insensitive alias analysis
+// algorithm. It does not depend on types. The algorithm is a mixture of the one
+// described in "Demand-driven alias analysis for C" by Xin Zheng and Radu
+// Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to
+// Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the
+// papers, we build a graph of the uses of a variable, where each node is a
+// memory location, and each edge is an action that happened on that memory
+// location. The "actions" can be one of Dereference, Reference, Assign, or
+// Assign.
+//
+// Two variables are considered as aliasing iff you can reach one value's node
+// from the other value's node and the language formed by concatenating all of
+// the edge labels (actions) conforms to a context-free grammar.
+//
+// Because this algorithm requires a graph search on each query, we execute the
+// algorithm outlined in "Fast algorithms..." (mentioned above)
+// in order to transform the graph into sets of variables that may alias in
+// ~nlogn time (n = number of variables.), which makes queries take constant
+// time.
+//===----------------------------------------------------------------------===//
+
+#include "StratifiedSets.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/None.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <forward_list>
+#include <tuple>
+
+using namespace llvm;
+
+// Try to go from a Value* to a Function*. Never returns nullptr.
+static Optional<Function *> parentFunctionOfValue(Value *);
+
+// Returns possible functions called by the Inst* into the given
+// SmallVectorImpl. Returns true if targets found, false otherwise.
+// This is templated because InvokeInst/CallInst give us the same
+// set of functions that we care about, and I don't like repeating
+// myself.
+template <typename Inst>
+static bool getPossibleTargets(Inst *, SmallVectorImpl<Function *> &);
+
+// Some instructions need to have their users tracked. Instructions like
+// `add` require you to get the users of the Instruction* itself, other
+// instructions like `store` require you to get the users of the first
+// operand. This function gets the "proper" value to track for each
+// type of instruction we support.
+static Optional<Value *> getTargetValue(Instruction *);
+
+// There are certain instructions (i.e. FenceInst, etc.) that we ignore.
+// This notes that we should ignore those.
+static bool hasUsefulEdges(Instruction *);
+
+const StratifiedIndex StratifiedLink::SetSentinel =
+ std::numeric_limits<StratifiedIndex>::max();
+
+namespace {
+// StratifiedInfo Attribute things.
+typedef unsigned StratifiedAttr;
+LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
+LLVM_CONSTEXPR unsigned AttrAllIndex = 0;
+LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1;
+LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2;
+LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
+LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
+
+LLVM_CONSTEXPR StratifiedAttr AttrNone = 0;
+LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone;
+
+// \brief StratifiedSets call for knowledge of "direction", so this is how we
+// represent that locally.
+enum class Level { Same, Above, Below };
+
+// \brief Edges can be one of four "weights" -- each weight must have an inverse
+// weight (Assign has Assign; Reference has Dereference).
+enum class EdgeType {
+ // The weight assigned when assigning from or to a value. For example, in:
+ // %b = getelementptr %a, 0
+ // ...The relationships are %b assign %a, and %a assign %b. This used to be
+ // two edges, but having a distinction bought us nothing.
+ Assign,
+
+ // The edge used when we have an edge going from some handle to a Value.
+ // Examples of this include:
+ // %b = load %a (%b Dereference %a)
+ // %b = extractelement %a, 0 (%a Dereference %b)
+ Dereference,
+
+ // The edge used when our edge goes from a value to a handle that may have
+ // contained it at some point. Examples:
+ // %b = load %a (%a Reference %b)
+ // %b = extractelement %a, 0 (%b Reference %a)
+ Reference
+};
+
+// \brief Encodes the notion of a "use"
+struct Edge {
+ // \brief Which value the edge is coming from
+ Value *From;
+
+ // \brief Which value the edge is pointing to
+ Value *To;
+
+ // \brief Edge weight
+ EdgeType Weight;
+
+ // \brief Whether we aliased any external values along the way that may be
+ // invisible to the analysis (i.e. landingpad for exceptions, calls for
+ // interprocedural analysis, etc.)
+ StratifiedAttrs AdditionalAttrs;
+
+ Edge(Value *From, Value *To, EdgeType W, StratifiedAttrs A)
+ : From(From), To(To), Weight(W), AdditionalAttrs(A) {}
+};
+
+// \brief Information we have about a function and would like to keep around
+struct FunctionInfo {
+ StratifiedSets<Value *> Sets;
+ // Lots of functions have < 4 returns. Adjust as necessary.
+ SmallVector<Value *, 4> ReturnedValues;
+
+ FunctionInfo(StratifiedSets<Value *> &&S,
+ SmallVector<Value *, 4> &&RV)
+ : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+};
+
+struct CFLAliasAnalysis;
+
+struct FunctionHandle : public CallbackVH {
+ FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA)
+ : CallbackVH(Fn), CFLAA(CFLAA) {
+ assert(Fn != nullptr);
+ assert(CFLAA != nullptr);
+ }
+
+ virtual ~FunctionHandle() {}
+
+ void deleted() override { removeSelfFromCache(); }
+ void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
+
+private:
+ CFLAliasAnalysis *CFLAA;
+
+ void removeSelfFromCache();
+};
+
+struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis {
+private:
+ /// \brief Cached mapping of Functions to their StratifiedSets.
+ /// If a function's sets are currently being built, it is marked
+ /// in the cache as an Optional without a value. This way, if we
+ /// have any kind of recursion, it is discernable from a function
+ /// that simply has empty sets.
+ DenseMap<Function *, Optional<FunctionInfo>> Cache;
+ std::forward_list<FunctionHandle> Handles;
+
+public:
+ static char ID;
+
+ CFLAliasAnalysis() : ImmutablePass(ID) {
+ initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual ~CFLAliasAnalysis() {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AliasAnalysis::getAnalysisUsage(AU);
+ }
+
+ void *getAdjustedAnalysisPointer(const void *ID) override {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis *)this;
+ return this;
+ }
+
+ /// \brief Inserts the given Function into the cache.
+ void scan(Function *Fn);
+
+ void evict(Function *Fn) { Cache.erase(Fn); }
+
+ /// \brief Ensures that the given function is available in the cache.
+ /// Returns the appropriate entry from the cache.
+ const Optional<FunctionInfo> &ensureCached(Function *Fn) {
+ auto Iter = Cache.find(Fn);
+ if (Iter == Cache.end()) {
+ scan(Fn);
+ Iter = Cache.find(Fn);
+ assert(Iter != Cache.end());
+ assert(Iter->second.hasValue());
+ }
+ return Iter->second;
+ }
+
+ AliasResult query(const Location &LocA, const Location &LocB);
+
+ AliasResult alias(const Location &LocA, const Location &LocB) override {
+ if (LocA.Ptr == LocB.Ptr) {
+ if (LocA.Size == LocB.Size) {
+ return MustAlias;
+ } else {
+ return PartialAlias;
+ }
+ }
+
+ // Comparisons between global variables and other constants should be
+ // handled by BasicAA.
+ if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
+ return MayAlias;
+ }
+
+ return query(LocA, LocB);
+ }
+
+ void initializePass() override { InitializeAliasAnalysis(this); }
+};
+
+void FunctionHandle::removeSelfFromCache() {
+ assert(CFLAA != nullptr);
+ auto *Val = getValPtr();
+ CFLAA->evict(cast<Function>(Val));
+ setValPtr(nullptr);
+}
+
+// \brief Gets the edges our graph should have, based on an Instruction*
+class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
+ CFLAliasAnalysis &AA;
+ SmallVectorImpl<Edge> &Output;
+
+public:
+ GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output)
+ : AA(AA), Output(Output) {}
+
+ void visitInstruction(Instruction &) {
+ llvm_unreachable("Unsupported instruction encountered");
+ }
+
+ void visitCastInst(CastInst &Inst) {
+ Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign,
+ AttrNone));
+ }
+
+ void visitBinaryOperator(BinaryOperator &Inst) {
+ auto *Op1 = Inst.getOperand(0);
+ auto *Op2 = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, Op1, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, Op2, EdgeType::Assign, AttrNone));
+ }
+
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = Inst.getNewValOperand();
+ Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = Inst.getValOperand();
+ Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitPHINode(PHINode &Inst) {
+ for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) {
+ Value *Val = Inst.getIncomingValue(I);
+ Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone));
+ }
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &Inst) {
+ auto *Op = Inst.getPointerOperand();
+ Output.push_back(Edge(&Inst, Op, EdgeType::Assign, AttrNone));
+ for (auto I = Inst.idx_begin(), E = Inst.idx_end(); I != E; ++I)
+ Output.push_back(Edge(&Inst, *I, EdgeType::Assign, AttrNone));
+ }
+
+ void visitSelectInst(SelectInst &Inst) {
+ auto *Condition = Inst.getCondition();
+ Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone));
+ auto *TrueVal = Inst.getTrueValue();
+ Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone));
+ auto *FalseVal = Inst.getFalseValue();
+ Output.push_back(Edge(&Inst, FalseVal, EdgeType::Assign, AttrNone));
+ }
+
+ void visitAllocaInst(AllocaInst &) {}
+
+ void visitLoadInst(LoadInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = &Inst;
+ Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone));
+ }
+
+ void visitStoreInst(StoreInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = Inst.getValueOperand();
+ Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitVAArgInst(VAArgInst &Inst) {
+ // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it does
+ // two things:
+ // 1. Loads a value from *((T*)*Ptr).
+ // 2. Increments (stores to) *Ptr by some target-specific amount.
+ // For now, we'll handle this like a landingpad instruction (by placing the
+ // result in its own group, and having that group alias externals).
+ auto *Val = &Inst;
+ Output.push_back(Edge(Val, Val, EdgeType::Assign, AttrAll));
+ }
+
+ static bool isFunctionExternal(Function *Fn) {
+ return Fn->isDeclaration() || !Fn->hasLocalLinkage();
+ }
+
+ // Gets whether the sets at Index1 above, below, or equal to the sets at
+ // Index2. Returns None if they are not in the same set chain.
+ static Optional<Level> getIndexRelation(const StratifiedSets<Value *> &Sets,
+ StratifiedIndex Index1,
+ StratifiedIndex Index2) {
+ if (Index1 == Index2)
+ return Level::Same;
+
+ const auto *Current = &Sets.getLink(Index1);
+ while (Current->hasBelow()) {
+ if (Current->Below == Index2)
+ return Level::Below;
+ Current = &Sets.getLink(Current->Below);
+ }
+
+ Current = &Sets.getLink(Index1);
+ while (Current->hasAbove()) {
+ if (Current->Above == Index2)
+ return Level::Above;
+ Current = &Sets.getLink(Current->Above);
+ }
+
+ return NoneType();
+ }
+
+ bool
+ tryInterproceduralAnalysis(const SmallVectorImpl<Function *> &Fns,
+ Value *FuncValue,
+ const iterator_range<User::op_iterator> &Args) {
+ const unsigned ExpectedMaxArgs = 8;
+ const unsigned MaxSupportedArgs = 50;
+ assert(Fns.size() > 0);
+
+ // I put this here to give us an upper bound on time taken by IPA. Is it
+ // really (realistically) needed? Keep in mind that we do have an n^2 algo.
+ if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs)
+ return false;
+
+ // Exit early if we'll fail anyway
+ for (auto *Fn : Fns) {
+ if (isFunctionExternal(Fn) || Fn->isVarArg())
+ return false;
+ auto &MaybeInfo = AA.ensureCached(Fn);
+ if (!MaybeInfo.hasValue())
+ return false;
+ }
+
+ SmallVector<Value *, ExpectedMaxArgs> Arguments(Args.begin(), Args.end());
+ SmallVector<StratifiedInfo, ExpectedMaxArgs> Parameters;
+ for (auto *Fn : Fns) {
+ auto &Info = *AA.ensureCached(Fn);
+ auto &Sets = Info.Sets;
+ auto &RetVals = Info.ReturnedValues;
+
+ Parameters.clear();
+ for (auto &Param : Fn->args()) {
+ auto MaybeInfo = Sets.find(&Param);
+ // Did a new parameter somehow get added to the function/slip by?
+ if (!MaybeInfo.hasValue())
+ return false;
+ Parameters.push_back(*MaybeInfo);
+ }
+
+ // Adding an edge from argument -> return value for each parameter that
+ // may alias the return value
+ for (unsigned I = 0, E = Parameters.size(); I != E; ++I) {
+ auto &ParamInfo = Parameters[I];
+ auto &ArgVal = Arguments[I];
+ bool AddEdge = false;
+ StratifiedAttrs Externals;
+ for (unsigned X = 0, XE = RetVals.size(); X != XE; ++X) {
+ auto MaybeInfo = Sets.find(RetVals[X]);
+ if (!MaybeInfo.hasValue())
+ return false;
+
+ auto &RetInfo = *MaybeInfo;
+ auto RetAttrs = Sets.getLink(RetInfo.Index).Attrs;
+ auto ParamAttrs = Sets.getLink(ParamInfo.Index).Attrs;
+ auto MaybeRelation =
+ getIndexRelation(Sets, ParamInfo.Index, RetInfo.Index);
+ if (MaybeRelation.hasValue()) {
+ AddEdge = true;
+ Externals |= RetAttrs | ParamAttrs;
+ }
+ }
+ if (AddEdge)
+ Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign,
+ StratifiedAttrs().flip()));
+ }
+
+ if (Parameters.size() != Arguments.size())
+ return false;
+
+ // Adding edges between arguments for arguments that may end up aliasing
+ // each other. This is necessary for functions such as
+ // void foo(int** a, int** b) { *a = *b; }
+ // (Technically, the proper sets for this would be those below
+ // Arguments[I] and Arguments[X], but our algorithm will produce
+ // extremely similar, and equally correct, results either way)
+ for (unsigned I = 0, E = Arguments.size(); I != E; ++I) {
+ auto &MainVal = Arguments[I];
+ auto &MainInfo = Parameters[I];
+ auto &MainAttrs = Sets.getLink(MainInfo.Index).Attrs;
+ for (unsigned X = I + 1; X != E; ++X) {
+ auto &SubInfo = Parameters[X];
+ auto &SubVal = Arguments[X];
+ auto &SubAttrs = Sets.getLink(SubInfo.Index).Attrs;
+ auto MaybeRelation =
+ getIndexRelation(Sets, MainInfo.Index, SubInfo.Index);
+
+ if (!MaybeRelation.hasValue())
+ continue;
+
+ auto NewAttrs = SubAttrs | MainAttrs;
+ Output.push_back(Edge(MainVal, SubVal, EdgeType::Assign, NewAttrs));
+ }
+ }
+ }
+ return true;
+ }
+
+ template <typename InstT> void visitCallLikeInst(InstT &Inst) {
+ SmallVector<Function *, 4> Targets;
+ if (getPossibleTargets(&Inst, Targets)) {
+ if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands()))
+ return;
+ // Cleanup from interprocedural analysis
+ Output.clear();
+ }
+
+ for (Value *V : Inst.arg_operands())
+ Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll));
+ }
+
+ void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); }
+
+ void visitInvokeInst(InvokeInst &Inst) { visitCallLikeInst(Inst); }
+
+ // Because vectors/aggregates are immutable and unaddressable,
+ // there's nothing we can do to coax a value out of them, other
+ // than calling Extract{Element,Value}. We can effectively treat
+ // them as pointers to arbitrary memory locations we can store in
+ // and load from.
+ void visitExtractElementInst(ExtractElementInst &Inst) {
+ auto *Ptr = Inst.getVectorOperand();
+ auto *Val = &Inst;
+ Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone));
+ }
+
+ void visitInsertElementInst(InsertElementInst &Inst) {
+ auto *Vec = Inst.getOperand(0);
+ auto *Val = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, Vec, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitLandingPadInst(LandingPadInst &Inst) {
+ // Exceptions come from "nowhere", from our analysis' perspective.
+ // So we place the instruction its own group, noting that said group may
+ // alias externals
+ Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));
+ }
+
+ void visitInsertValueInst(InsertValueInst &Inst) {
+ auto *Agg = Inst.getOperand(0);
+ auto *Val = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, Agg, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitExtractValueInst(ExtractValueInst &Inst) {
+ auto *Ptr = Inst.getAggregateOperand();
+ Output.push_back(Edge(&Inst, Ptr, EdgeType::Reference, AttrNone));
+ }
+
+ void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
+ auto *From1 = Inst.getOperand(0);
+ auto *From2 = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, From1, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, From2, EdgeType::Assign, AttrNone));
+ }
+};
+
+// For a given instruction, we need to know which Value* to get the
+// users of in order to build our graph. In some cases (i.e. add),
+// we simply need the Instruction*. In other cases (i.e. store),
+// finding the users of the Instruction* is useless; we need to find
+// the users of the first operand. This handles determining which
+// value to follow for us.
+//
+// Note: we *need* to keep this in sync with GetEdgesVisitor. Add
+// something to GetEdgesVisitor, add it here -- remove something from
+// GetEdgesVisitor, remove it here.
+class GetTargetValueVisitor
+ : public InstVisitor<GetTargetValueVisitor, Value *> {
+public:
+ Value *visitInstruction(Instruction &Inst) { return &Inst; }
+
+ Value *visitStoreInst(StoreInst &Inst) { return Inst.getPointerOperand(); }
+
+ Value *visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
+ return Inst.getPointerOperand();
+ }
+
+ Value *visitAtomicRMWInst(AtomicRMWInst &Inst) {
+ return Inst.getPointerOperand();
+ }
+
+ Value *visitInsertElementInst(InsertElementInst &Inst) {
+ return Inst.getOperand(0);
+ }
+
+ Value *visitInsertValueInst(InsertValueInst &Inst) {
+ return Inst.getAggregateOperand();
+ }
+};
+
+// Set building requires a weighted bidirectional graph.
+template <typename EdgeTypeT> class WeightedBidirectionalGraph {
+public:
+ typedef std::size_t Node;
+
+private:
+ const static Node StartNode = Node(0);
+
+ struct Edge {
+ EdgeTypeT Weight;
+ Node Other;
+
+ Edge(const EdgeTypeT &W, const Node &N)
+ : Weight(W), Other(N) {}
+
+ bool operator==(const Edge &E) const {
+ return Weight == E.Weight && Other == E.Other;
+ }
+
+ bool operator!=(const Edge &E) const { return !operator==(E); }
+ };
+
+ struct NodeImpl {
+ std::vector<Edge> Edges;
+ };
+
+ std::vector<NodeImpl> NodeImpls;
+
+ bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); }
+
+ const NodeImpl &getNode(Node N) const { return NodeImpls[N]; }
+ NodeImpl &getNode(Node N) { return NodeImpls[N]; }
+
+public:
+ // ----- Various Edge iterators for the graph ----- //
+
+ // \brief Iterator for edges. Because this graph is bidirected, we don't
+ // allow modificaiton of the edges using this iterator. Additionally, the
+ // iterator becomes invalid if you add edges to or from the node you're
+ // getting the edges of.
+ struct EdgeIterator : public std::iterator<std::forward_iterator_tag,
+ std::tuple<EdgeTypeT, Node *>> {
+ EdgeIterator(const typename std::vector<Edge>::const_iterator &Iter)
+ : Current(Iter) {}
+
+ EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {}
+
+ EdgeIterator &operator++() {
+ ++Current;
+ return *this;
+ }
+
+ EdgeIterator operator++(int) {
+ EdgeIterator Copy(Current);
+ operator++();
+ return Copy;
+ }
+
+ std::tuple<EdgeTypeT, Node> &operator*() {
+ Store = std::make_tuple(Current->Weight, Current->Other);
+ return Store;
+ }
+
+ bool operator==(const EdgeIterator &Other) const {
+ return Current == Other.Current;
+ }
+
+ bool operator!=(const EdgeIterator &Other) const {
+ return !operator==(Other);
+ }
+
+ private:
+ typename std::vector<Edge>::const_iterator Current;
+ std::tuple<EdgeTypeT, Node> Store;
+ };
+
+ // Wrapper for EdgeIterator with begin()/end() calls.
+ struct EdgeIterable {
+ EdgeIterable(const std::vector<Edge> &Edges)
+ : BeginIter(Edges.begin()), EndIter(Edges.end()) {}
+
+ EdgeIterator begin() { return EdgeIterator(BeginIter); }
+
+ EdgeIterator end() { return EdgeIterator(EndIter); }
+
+ private:
+ typename std::vector<Edge>::const_iterator BeginIter;
+ typename std::vector<Edge>::const_iterator EndIter;
+ };
+
+ // ----- Actual graph-related things ----- //
+
+ WeightedBidirectionalGraph() {}
+
+ WeightedBidirectionalGraph(WeightedBidirectionalGraph<EdgeTypeT> &&Other)
+ : NodeImpls(std::move(Other.NodeImpls)) {}
+
+ WeightedBidirectionalGraph<EdgeTypeT> &
+ operator=(WeightedBidirectionalGraph<EdgeTypeT> &&Other) {
+ NodeImpls = std::move(Other.NodeImpls);
+ return *this;
+ }
+
+ Node addNode() {
+ auto Index = NodeImpls.size();
+ auto NewNode = Node(Index);
+ NodeImpls.push_back(NodeImpl());
+ return NewNode;
+ }
+
+ void addEdge(Node From, Node To, const EdgeTypeT &Weight,
+ const EdgeTypeT &ReverseWeight) {
+ assert(inbounds(From));
+ assert(inbounds(To));
+ auto &FromNode = getNode(From);
+ auto &ToNode = getNode(To);
+ FromNode.Edges.push_back(Edge(Weight, To));
+ ToNode.Edges.push_back(Edge(ReverseWeight, From));
+ }
+
+ EdgeIterable edgesFor(const Node &N) const {
+ const auto &Node = getNode(N);
+ return EdgeIterable(Node.Edges);
+ }
+
+ bool empty() const { return NodeImpls.empty(); }
+ std::size_t size() const { return NodeImpls.size(); }
+
+ // \brief Gets an arbitrary node in the graph as a starting point for
+ // traversal.
+ Node getEntryNode() {
+ assert(inbounds(StartNode));
+ return StartNode;
+ }
+};
+
+typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
+typedef DenseMap<Value *, GraphT::Node> NodeMapT;
+}
+
+// -- Setting up/registering CFLAA pass -- //
+char CFLAliasAnalysis::ID = 0;
+
+INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa",
+ "CFL-Based AA implementation", false, true, false)
+
+ImmutablePass *llvm::createCFLAliasAnalysisPass() {
+ return new CFLAliasAnalysis();
+}
+
+//===----------------------------------------------------------------------===//
+// Function declarations that require types defined in the namespace above
+//===----------------------------------------------------------------------===//
+
+// Given an argument number, returns the appropriate Attr index to set.
+static StratifiedAttr argNumberToAttrIndex(StratifiedAttr);
+
+// Given a Value, potentially return which AttrIndex it maps to.
+static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);
+
+// Gets the inverse of a given EdgeType.
+static EdgeType flipWeight(EdgeType);
+
+// Gets edges of the given Instruction*, writing them to the SmallVector*.
+static void argsToEdges(CFLAliasAnalysis &, Instruction *,
+ SmallVectorImpl<Edge> &);
+
+// Gets the "Level" that one should travel in StratifiedSets
+// given an EdgeType.
+static Level directionOfEdgeType(EdgeType);
+
+// Builds the graph needed for constructing the StratifiedSets for the
+// given function
+static void buildGraphFrom(CFLAliasAnalysis &, Function *,
+ SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
+
+// Builds the graph + StratifiedSets for a function.
+static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
+
+static Optional<Function *> parentFunctionOfValue(Value *Val) {
+ if (auto *Inst = dyn_cast<Instruction>(Val)) {
+ auto *Bb = Inst->getParent();
+ return Bb->getParent();
+ }
+
+ if (auto *Arg = dyn_cast<Argument>(Val))
+ return Arg->getParent();
+ return NoneType();
+}
+
+template <typename Inst>
+static bool getPossibleTargets(Inst *Call,
+ SmallVectorImpl<Function *> &Output) {
+ if (auto *Fn = Call->getCalledFunction()) {
+ Output.push_back(Fn);
+ return true;
+ }
+
+ // TODO: If the call is indirect, we might be able to enumerate all potential
+ // targets of the call and return them, rather than just failing.
+ return false;
+}
+
+static Optional<Value *> getTargetValue(Instruction *Inst) {
+ GetTargetValueVisitor V;
+ return V.visit(Inst);
+}
+
+static bool hasUsefulEdges(Instruction *Inst) {
+ bool IsNonInvokeTerminator =
+ isa<TerminatorInst>(Inst) && !isa<InvokeInst>(Inst);
+ return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && !IsNonInvokeTerminator;
+}
+
+static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
+ if (isa<GlobalValue>(Val))
+ return AttrGlobalIndex;
+
+ if (auto *Arg = dyn_cast<Argument>(Val))
+ if (!Arg->hasNoAliasAttr())
+ return argNumberToAttrIndex(Arg->getArgNo());
+ return NoneType();
+}
+
+static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) {
+ if (ArgNum > AttrMaxNumArgs)
+ return AttrAllIndex;
+ return ArgNum + AttrFirstArgIndex;
+}
+
+static EdgeType flipWeight(EdgeType Initial) {
+ switch (Initial) {
+ case EdgeType::Assign:
+ return EdgeType::Assign;
+ case EdgeType::Dereference:
+ return EdgeType::Reference;
+ case EdgeType::Reference:
+ return EdgeType::Dereference;
+ }
+ llvm_unreachable("Incomplete coverage of EdgeType enum");
+}
+
+static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
+ SmallVectorImpl<Edge> &Output) {
+ GetEdgesVisitor v(Analysis, Output);
+ v.visit(Inst);
+}
+
+static Level directionOfEdgeType(EdgeType Weight) {
+ switch (Weight) {
+ case EdgeType::Reference:
+ return Level::Above;
+ case EdgeType::Dereference:
+ return Level::Below;
+ case EdgeType::Assign:
+ return Level::Same;
+ }
+ llvm_unreachable("Incomplete switch coverage");
+}
+
+// Aside: We may remove graph construction entirely, because it doesn't really
+// buy us much that we don't already have. I'd like to add interprocedural
+// analysis prior to this however, in case that somehow requires the graph
+// produced by this for efficient execution
+static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+ SmallVectorImpl<Value *> &ReturnedValues,
+ NodeMapT &Map, GraphT &Graph) {
+ const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
+ auto Pair = Map.insert(std::make_pair(Val, GraphT::Node()));
+ auto &Iter = Pair.first;
+ if (Pair.second) {
+ auto NewNode = Graph.addNode();
+ Iter->second = NewNode;
+ }
+ return Iter->second;
+ };
+
+ SmallVector<Edge, 8> Edges;
+ for (auto &Bb : Fn->getBasicBlockList()) {
+ for (auto &Inst : Bb.getInstList()) {
+ // We don't want the edges of most "return" instructions, but we *do* want
+ // to know what can be returned.
+ if (auto *Ret = dyn_cast<ReturnInst>(&Inst))
+ ReturnedValues.push_back(Ret);
+
+ if (!hasUsefulEdges(&Inst))
+ continue;
+
+ Edges.clear();
+ argsToEdges(Analysis, &Inst, Edges);
+
+ // In the case of an unused alloca (or similar), edges may be empty. Note
+ // that it exists so we can potentially answer NoAlias.
+ if (Edges.empty()) {
+ auto MaybeVal = getTargetValue(&Inst);
+ assert(MaybeVal.hasValue());
+ auto *Target = *MaybeVal;
+ findOrInsertNode(Target);
+ continue;
+ }
+
+ for (const Edge &E : Edges) {
+ auto To = findOrInsertNode(E.To);
+ auto From = findOrInsertNode(E.From);
+ auto FlippedWeight = flipWeight(E.Weight);
+ auto Attrs = E.AdditionalAttrs;
+ Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
+ std::make_pair(FlippedWeight, Attrs));
+ }
+ }
+ }
+}
+
+static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
+ NodeMapT Map;
+ GraphT Graph;
+ SmallVector<Value *, 4> ReturnedValues;
+
+ buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph);
+
+ DenseMap<GraphT::Node, Value *> NodeValueMap;
+ NodeValueMap.resize(Map.size());
+ for (const auto &Pair : Map)
+ NodeValueMap.insert(std::make_pair(Pair.second, Pair.first));
+
+ const auto findValueOrDie = [&NodeValueMap](GraphT::Node Node) {
+ auto ValIter = NodeValueMap.find(Node);
+ assert(ValIter != NodeValueMap.end());
+ return ValIter->second;
+ };
+
+ StratifiedSetsBuilder<Value *> Builder;
+
+ SmallVector<GraphT::Node, 16> Worklist;
+ for (auto &Pair : Map) {
+ Worklist.clear();
+
+ auto *Value = Pair.first;
+ Builder.add(Value);
+ auto InitialNode = Pair.second;
+ Worklist.push_back(InitialNode);
+ while (!Worklist.empty()) {
+ auto Node = Worklist.pop_back_val();
+ auto *CurValue = findValueOrDie(Node);
+ if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue))
+ continue;
+
+ for (const auto &EdgeTuple : Graph.edgesFor(Node)) {
+ auto Weight = std::get<0>(EdgeTuple);
+ auto Label = Weight.first;
+ auto &OtherNode = std::get<1>(EdgeTuple);
+ auto *OtherValue = findValueOrDie(OtherNode);
+
+ if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue))
+ continue;
+
+ bool Added;
+ switch (directionOfEdgeType(Label)) {
+ case Level::Above:
+ Added = Builder.addAbove(CurValue, OtherValue);
+ break;
+ case Level::Below:
+ Added = Builder.addBelow(CurValue, OtherValue);
+ break;
+ case Level::Same:
+ Added = Builder.addWith(CurValue, OtherValue);
+ break;
+ }
+
+ if (Added) {
+ auto Aliasing = Weight.second;
+ if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
+ Aliasing.set(*MaybeCurIndex);
+ if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
+ Aliasing.set(*MaybeOtherIndex);
+ Builder.noteAttributes(CurValue, Aliasing);
+ Builder.noteAttributes(OtherValue, Aliasing);
+ Worklist.push_back(OtherNode);
+ }
+ }
+ }
+ }
+
+ // There are times when we end up with parameters not in our graph (i.e. if
+ // it's only used as the condition of a branch). Other bits of code depend on
+ // things that were present during construction being present in the graph.
+ // So, we add all present arguments here.
+ for (auto &Arg : Fn->args()) {
+ Builder.add(&Arg);
+ }
+
+ return FunctionInfo(Builder.build(), std::move(ReturnedValues));
+}
+
+void CFLAliasAnalysis::scan(Function *Fn) {
+ auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
+ (void)InsertPair;
+ assert(InsertPair.second &&
+ "Trying to scan a function that has already been cached");
+
+ FunctionInfo Info(buildSetsFrom(*this, Fn));
+ Cache[Fn] = std::move(Info);
+ Handles.push_front(FunctionHandle(Fn, this));
+}
+
+AliasAnalysis::AliasResult
+CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
+ const AliasAnalysis::Location &LocB) {
+ auto *ValA = const_cast<Value *>(LocA.Ptr);
+ auto *ValB = const_cast<Value *>(LocB.Ptr);
+
+ Function *Fn = nullptr;
+ auto MaybeFnA = parentFunctionOfValue(ValA);
+ auto MaybeFnB = parentFunctionOfValue(ValB);
+ if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
+ llvm_unreachable("Don't know how to extract the parent function "
+ "from values A or B");
+ }
+
+ if (MaybeFnA.hasValue()) {
+ Fn = *MaybeFnA;
+ assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
+ "Interprocedural queries not supported");
+ } else {
+ Fn = *MaybeFnB;
+ }
+
+ assert(Fn != nullptr);
+ auto &MaybeInfo = ensureCached(Fn);
+ assert(MaybeInfo.hasValue());
+
+ auto &Sets = MaybeInfo->Sets;
+ auto MaybeA = Sets.find(ValA);
+ if (!MaybeA.hasValue())
+ return AliasAnalysis::MayAlias;
+
+ auto MaybeB = Sets.find(ValB);
+ if (!MaybeB.hasValue())
+ return AliasAnalysis::MayAlias;
+
+ auto SetA = *MaybeA;
+ auto SetB = *MaybeB;
+
+ if (SetA.Index == SetB.Index)
+ return AliasAnalysis::PartialAlias;
+
+ auto AttrsA = Sets.getLink(SetA.Index).Attrs;
+ auto AttrsB = Sets.getLink(SetB.Index).Attrs;
+ // Stratified set attributes are used as markets to signify whether a member
+ // of a StratifiedSet (or a member of a set above the current set) has
+ // interacted with either arguments or globals. "Interacted with" meaning
+ // its value may be different depending on the value of an argument or
+ // global. The thought behind this is that, because arguments and globals
+ // may alias each other, if AttrsA and AttrsB have touched args/globals,
+ // we must conservatively say that they alias. However, if at least one of
+ // the sets has no values that could legally be altered by changing the value
+ // of an argument or global, then we don't have to be as conservative.
+ if (AttrsA.any() && AttrsB.any())
+ return AliasAnalysis::MayAlias;
+
+ return AliasAnalysis::NoAlias;
+}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index d1632fd..4e9664f 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -5,12 +5,14 @@ add_llvm_library(LLVMAnalysis
AliasDebugger.cpp
AliasSetTracker.cpp
Analysis.cpp
+ AssumptionTracker.cpp
BasicAliasAnalysis.cpp
BlockFrequencyInfo.cpp
BlockFrequencyInfoImpl.cpp
BranchProbabilityInfo.cpp
CFG.cpp
CFGPrinter.cpp
+ CFLAliasAnalysis.cpp
CGSCCPassManager.cpp
CaptureTracking.cpp
CostModel.cpp
@@ -20,6 +22,7 @@ add_llvm_library(LLVMAnalysis
DependenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
+ FunctionTargetTransformInfo.cpp
IVUsers.cpp
InstCount.cpp
InstructionSimplify.cpp
@@ -53,6 +56,7 @@ add_llvm_library(LLVMAnalysis
TargetTransformInfo.cpp
Trace.cpp
TypeBasedAliasAnalysis.cpp
+ ScopedNoAliasAA.cpp
ValueTracking.cpp
)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 3708e60..a271729 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -20,8 +20,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
using namespace llvm;
@@ -49,6 +51,65 @@ namespace {
bool Captured;
};
+
+ /// Only find pointer captures which happen before the given instruction. Uses
+ /// the dominator tree to determine whether one instruction is before another.
+ /// Only support the case where the Value is defined in the same basic block
+ /// as the given instruction and the use.
+ struct CapturesBefore : public CaptureTracker {
+ CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT,
+ bool IncludeI)
+ : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
+ IncludeI(IncludeI), Captured(false) {}
+
+ void tooManyUses() override { Captured = true; }
+
+ bool shouldExplore(const Use *U) override {
+ Instruction *I = cast<Instruction>(U->getUser());
+ if (BeforeHere == I && !IncludeI)
+ return false;
+
+ BasicBlock *BB = I->getParent();
+ // We explore this usage only if the usage can reach "BeforeHere".
+ // If use is not reachable from entry, there is no need to explore.
+ if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+ return false;
+ // If the value is defined in the same basic block as use and BeforeHere,
+ // there is no need to explore the use if BeforeHere dominates use.
+ // Check whether there is a path from I to BeforeHere.
+ if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
+ !isPotentiallyReachable(I, BeforeHere, DT))
+ return false;
+ return true;
+ }
+
+ bool captured(const Use *U) override {
+ if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
+ return false;
+
+ Instruction *I = cast<Instruction>(U->getUser());
+ if (BeforeHere == I && !IncludeI)
+ return false;
+
+ BasicBlock *BB = I->getParent();
+ // Same logic as in shouldExplore.
+ if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+ return false;
+ if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
+ !isPotentiallyReachable(I, BeforeHere, DT))
+ return false;
+ Captured = true;
+ return true;
+ }
+
+ const Instruction *BeforeHere;
+ DominatorTree *DT;
+
+ bool ReturnCaptures;
+ bool IncludeI;
+
+ bool Captured;
+ };
}
/// PointerMayBeCaptured - Return true if this pointer value may be captured
@@ -74,6 +135,32 @@ bool llvm::PointerMayBeCaptured(const Value *V,
return SCT.Captured;
}
+/// PointerMayBeCapturedBefore - Return true if this pointer value may be
+/// captured by the enclosing function (which is required to exist). If a
+/// DominatorTree is provided, only captures which happen before the given
+/// instruction are considered. This routine can be expensive, so consider
+/// caching the results. The boolean ReturnCaptures specifies whether
+/// returning the value (or part of it) from the function counts as capturing
+/// it or not. The boolean StoreCaptures specified whether storing the value
+/// (or part of it) into memory anywhere automatically counts as capturing it
+/// or not.
+bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
+ bool StoreCaptures, const Instruction *I,
+ DominatorTree *DT, bool IncludeI) {
+ assert(!isa<GlobalValue>(V) &&
+ "It doesn't make sense to ask whether a global is captured.");
+
+ if (!DT)
+ return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+
+ // TODO: See comment in PointerMayBeCaptured regarding what could be done
+ // with StoreCaptures.
+
+ CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
+ PointerMayBeCaptured(V, &CB);
+ return CB.Captured;
+}
+
/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
/// a cache. Then we can move the code from BasicAliasAnalysis into
/// that path, and remove this threshold.
@@ -152,7 +239,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
if (Count++ >= Threshold)
return Tracker->tooManyUses();
- if (Visited.insert(&UU))
+ if (Visited.insert(&UU).second)
if (Tracker->shouldExplore(&UU))
Worklist.push_back(&UU);
}
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 4c8a093..f29e4a2 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -11,23 +11,101 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "code-metrics"
using namespace llvm;
+static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
+ SmallPtrSetImpl<const Value*> &EphValues) {
+ SmallPtrSet<const Value *, 32> Visited;
+
+ // Make sure that all of the items in WorkSet are in our EphValues set.
+ EphValues.insert(WorkSet.begin(), WorkSet.end());
+
+ // Note: We don't speculate PHIs here, so we'll miss instruction chains kept
+ // alive only by ephemeral values.
+
+ while (!WorkSet.empty()) {
+ const Value *V = WorkSet.front();
+ WorkSet.erase(WorkSet.begin());
+
+ if (!Visited.insert(V).second)
+ continue;
+
+ // If all uses of this value are ephemeral, then so is this value.
+ bool FoundNEUse = false;
+ for (const User *I : V->users())
+ if (!EphValues.count(I)) {
+ FoundNEUse = true;
+ break;
+ }
+
+ if (FoundNEUse)
+ continue;
+
+ EphValues.insert(V);
+ DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
+
+ if (const User *U = dyn_cast<User>(V))
+ for (const Value *J : U->operands()) {
+ if (isSafeToSpeculativelyExecute(J))
+ WorkSet.push_back(J);
+ }
+ }
+}
+
+// Find all ephemeral values.
+void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
+ SmallPtrSetImpl<const Value*> &EphValues) {
+ SmallVector<const Value *, 16> WorkSet;
+
+ for (auto &I : AT->assumptions(L->getHeader()->getParent())) {
+ // Filter out call sites outside of the loop so we don't to a function's
+ // worth of work for each of its loops (and, in the common case, ephemeral
+ // values in the loop are likely due to @llvm.assume calls in the loop).
+ if (!L->contains(I->getParent()))
+ continue;
+
+ WorkSet.push_back(I);
+ }
+
+ completeEphemeralValues(WorkSet, EphValues);
+}
+
+void CodeMetrics::collectEphemeralValues(const Function *F, AssumptionTracker *AT,
+ SmallPtrSetImpl<const Value*> &EphValues) {
+ SmallVector<const Value *, 16> WorkSet;
+
+ for (auto &I : AT->assumptions(const_cast<Function*>(F)))
+ WorkSet.push_back(I);
+
+ completeEphemeralValues(WorkSet, EphValues);
+}
+
/// analyzeBasicBlock - Fill in the current structure with information gleaned
/// from the specified block.
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const Value*> &EphValues) {
++NumBlocks;
unsigned NumInstsBeforeThisBB = NumInsts;
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
+ // Skip ephemeral values.
+ if (EphValues.count(II))
+ continue;
+
// Special handling for calls.
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
ImmutableCallSite CS(cast<Instruction>(II));
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index eb3e2c6..fd8f2ae 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -47,15 +47,16 @@ using namespace llvm;
// Constant Folding internal helper functions
//===----------------------------------------------------------------------===//
-/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
-/// DataLayout. This always returns a non-null constant, but it may be a
+/// Constant fold bitcast, symbolically evaluating it with DataLayout.
+/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
static Constant *FoldBitCast(Constant *C, Type *DestTy,
const DataLayout &TD) {
// Catch the obvious splat cases.
if (C->isNullValue() && !DestTy->isX86_MMXTy())
return Constant::getNullValue(DestTy);
- if (C->isAllOnesValue() && !DestTy->isX86_MMXTy())
+ if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+ !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
return Constant::getAllOnesValue(DestTy);
// Handle a vector->integer cast.
@@ -197,7 +198,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
unsigned Ratio = NumDstElt/NumSrcElt;
- unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+ unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy);
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
@@ -213,6 +214,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
ConstantInt::get(Src->getType(), ShiftAmt));
ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+ // Truncate the element to an integer with the same pointer size and
+ // convert the element back to a pointer using a inttoptr.
+ if (DstEltTy->isPointerTy()) {
+ IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
+ Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
+ Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
+ continue;
+ }
+
// Truncate and remember this piece.
Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
}
@@ -222,9 +232,8 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
}
-/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
-/// from a global, return the global and the constant. Because of
-/// constantexprs, this function is recursive.
+/// If this constant is a constant offset from a global, return the global and
+/// the constant. Because of constantexprs, this function is recursive.
static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt &Offset, const DataLayout &TD) {
// Trivial case, constant is the global.
@@ -240,7 +249,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
// Look through ptr->int and ptr->ptr casts.
if (CE->getOpcode() == Instruction::PtrToInt ||
- CE->getOpcode() == Instruction::BitCast)
+ CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::AddrSpaceCast)
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
@@ -263,10 +273,10 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
return true;
}
-/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
-/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the
-/// pointer to copy results into and BytesLeft is the number of bytes left in
-/// the CurPtr buffer. TD is the target data.
+/// Recursive helper to read bits out of global. C is the constant being copied
+/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
+/// results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer. TD is the target data.
static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
unsigned char *CurPtr, unsigned BytesLeft,
const DataLayout &TD) {
@@ -517,9 +527,8 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
return nullptr;
}
-/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
-/// produce if it is constant and determinable. If this is not determinable,
-/// return null.
+/// Return the value that a load from C would produce if it is constant and
+/// determinable. If this is not determinable, return null.
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
const DataLayout *TD) {
// First, try the easy cases:
@@ -609,7 +618,7 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
return nullptr;
}
-/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// One of Op0/Op1 is a constant expression.
/// Attempt to symbolically evaluate the result of a binary operator merging
/// these together. If target data info is available, it is provided as DL,
/// otherwise DL is null.
@@ -666,9 +675,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
return nullptr;
}
-/// CastGEPIndices - If array indices are not pointer-sized integers,
-/// explicitly cast them so that they aren't implicitly casted by the
-/// getelementptr.
+/// If array indices are not pointer-sized integers, explicitly cast them so
+/// that they aren't implicitly casted by the getelementptr.
static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
@@ -723,8 +731,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
return Ptr;
}
-/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
-/// constant expression, do so.
+/// If we can symbolically evaluate the GEP constant expression, do so.
static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
@@ -886,7 +893,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Constant Folding public APIs
//===----------------------------------------------------------------------===//
-/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// Try to constant fold the specified instruction.
/// If successful, the constant result is returned, if not, null is returned.
/// Note that this fails if not all of the operands are constant. Otherwise,
/// this function can only fail when attempting to fold instructions like loads
@@ -966,7 +973,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
static Constant *
ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
const TargetLibraryInfo *TLI,
- SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
+ SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
SmallVector<Constant *, 8> Ops;
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
++i) {
@@ -974,7 +981,7 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
// Recursively fold the ConstantExpr's operands. If we have already folded
// a ConstantExpr, we don't have to process it again.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
- if (FoldedOps.insert(NewCE))
+ if (FoldedOps.insert(NewCE).second)
NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
}
Ops.push_back(NewC);
@@ -986,7 +993,7 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
}
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// Attempt to fold the constant expression
/// using the specified DataLayout. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
@@ -996,7 +1003,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
}
-/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// Attempt to constant fold an instruction with the
/// specified opcode and operands. If successful, the constant result is
/// returned, if not, null is returned. Note that this function can fail when
/// attempting to fold instructions like loads and stores, which have no
@@ -1101,10 +1108,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
}
}
-/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// Attempt to constant fold a compare
/// instruction (icmp/fcmp) with the specified operands. If it fails, it
/// returns a constant expression of the specified operands.
-///
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *Ops0, Constant *Ops1,
const DataLayout *TD,
@@ -1191,9 +1197,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
}
-/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
-/// getelementptr constantexpr, return the constant value being addressed by the
-/// constant expression, or null if something is funny and we can't decide.
+/// Given a constant and a getelementptr constantexpr, return the constant value
+/// being addressed by the constant expression, or null if something is funny
+/// and we can't decide.
Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
ConstantExpr *CE) {
if (!CE->getOperand(1)->isNullValue())
@@ -1209,10 +1215,9 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
return C;
}
-/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
-/// indices (with an *implied* zero pointer index that is not in the list),
-/// return the constant value being addressed by a virtual load, or null if
-/// something is funny and we can't decide.
+/// Given a constant and getelementptr indices (with an *implied* zero pointer
+/// index that is not in the list), return the constant value being addressed by
+/// a virtual load, or null if something is funny and we can't decide.
Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
ArrayRef<Constant*> Indices) {
// Loop over all of the operands, tracking down which value we are
@@ -1230,11 +1235,12 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// Constant Folding for Calls
//
-/// canConstantFoldCallTo - Return true if its even possible to fold a call to
-/// the specified function.
+/// Return true if it's even possible to fold a call to the specified function.
bool llvm::canConstantFoldCallTo(const Function *F) {
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
case Intrinsic::log:
case Intrinsic::log2:
case Intrinsic::log10:
@@ -1320,7 +1326,7 @@ static Constant *GetConstantFoldFPValue(double V, Type *Ty) {
}
namespace {
-/// llvm_fenv_clearexcept - Clear the floating-point exception state.
+/// Clear the floating-point exception state.
static inline void llvm_fenv_clearexcept() {
#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
feclearexcept(FE_ALL_EXCEPT);
@@ -1328,7 +1334,7 @@ static inline void llvm_fenv_clearexcept() {
errno = 0;
}
-/// llvm_fenv_testexcept - Test if a floating-point exception was raised.
+/// Test if a floating-point exception was raised.
static inline bool llvm_fenv_testexcept() {
int errno_val = errno;
if (errno_val == ERANGE || errno_val == EDOM)
@@ -1365,14 +1371,13 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
return GetConstantFoldFPValue(V, Ty);
}
-/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
-/// conversion of a constant floating point. If roundTowardZero is false, the
-/// default IEEE rounding is used (toward nearest, ties to even). This matches
-/// the behavior of the non-truncating SSE instructions in the default rounding
-/// mode. The desired integer type Ty is used to select how many bits are
-/// available for the result. Returns null if the conversion cannot be
-/// performed, otherwise returns the Constant value resulting from the
-/// conversion.
+/// Attempt to fold an SSE floating point to integer conversion of a constant
+/// floating point. If roundTowardZero is false, the default IEEE rounding is
+/// used (toward nearest, ties to even). This matches the behavior of the
+/// non-truncating SSE instructions in the default rounding mode. The desired
+/// integer type Ty is used to select how many bits are available for the
+/// result. Returns null if the conversion cannot be performed, otherwise
+/// returns the Constant value resulting from the conversion.
static Constant *ConstantFoldConvertToInt(const APFloat &Val,
bool roundTowardZero, Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
@@ -1519,8 +1524,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
(Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
if (V >= -0.0)
return ConstantFoldFP(sqrt, V, Ty);
- else // Undefined
- return Constant::getNullValue(Ty);
+ else {
+ // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which
+ // all guarantee or favor returning NaN - the square root of a
+ // negative number is not defined for the LLVM sqrt intrinsic.
+ // This is because the intrinsic should only be emitted in place of
+ // libm's sqrt function when using "no-nans-fp-math".
+ return UndefValue::get(Ty);
+ }
}
break;
case 's':
@@ -1626,6 +1637,19 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
V1.copySign(V2);
return ConstantFP::get(Ty->getContext(), V1);
}
+
+ if (IntrinsicID == Intrinsic::minnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
+ }
+
+ if (IntrinsicID == Intrinsic::maxnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
+ }
+
if (!TLI)
return nullptr;
if (Name == "pow" && TLI->has(LibFunc::pow))
@@ -1761,7 +1785,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
return ConstantVector::get(Result);
}
-/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
Constant *
llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index d0784f1..092df5c 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -163,16 +163,15 @@ void dumpExampleDependence(raw_ostream &OS, Function *F,
DstI != DstE; ++DstI) {
if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
OS << "da analyze - ";
- if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) {
+ if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
D->dump(OS);
for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
if (D->isSplitable(Level)) {
OS << "da analyze - split level = " << Level;
- OS << ", iteration = " << *DA->getSplitIteration(D, Level);
+ OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
OS << "!\n";
}
}
- delete D;
}
else
OS << "none!\n";
@@ -782,6 +781,25 @@ void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
}
}
+void DependenceAnalysis::unifySubscriptType(Subscript *Pair) {
+ const SCEV *Src = Pair->Src;
+ const SCEV *Dst = Pair->Dst;
+ IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+ IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+ if (SrcTy == nullptr || DstTy == nullptr) {
+ assert(SrcTy == DstTy && "This function only unify integer types and "
+ "expect Src and Dst share the same type "
+ "otherwise.");
+ return;
+ }
+ if (SrcTy->getBitWidth() > DstTy->getBitWidth()) {
+ // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst).
+ Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy);
+ } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) {
+ // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src).
+ Pair->Src = SE->getSignExtendExpr(Src, DstTy);
+ }
+}
// removeMatchingExtensions - Examines a subscript pair.
// If the source and destination are identically sign (or zero)
@@ -794,9 +812,11 @@ void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
(isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
- if (SrcCast->getType() == DstCast->getType()) {
- Pair->Src = SrcCast->getOperand();
- Pair->Dst = DstCast->getOperand();
+ const SCEV *SrcCastOp = SrcCast->getOperand();
+ const SCEV *DstCastOp = DstCast->getOperand();
+ if (SrcCastOp->getType() == DstCastOp->getType()) {
+ Pair->Src = SrcCastOp;
+ Pair->Dst = DstCastOp;
}
}
}
@@ -2957,15 +2977,11 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
AddRec->getNoWrapFlags());
}
if (SE->isLoopInvariant(AddRec, TargetLoop))
- return SE->getAddRecExpr(AddRec,
- Value,
- TargetLoop,
- SCEV::FlagAnyWrap);
- return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
- TargetLoop, Value),
- AddRec->getStepRecurrence(*SE),
- AddRec->getLoop(),
- AddRec->getNoWrapFlags());
+ return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
+ return SE->getAddRecExpr(
+ addToCoefficient(AddRec->getStart(), TargetLoop, Value),
+ AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
}
@@ -3183,7 +3199,7 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
const SCEV *DstSCEV,
SmallVectorImpl<Subscript> &Pair,
- const SCEV *ElementSize) const {
+ const SCEV *ElementSize) {
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV));
const SCEVUnknown *DstBase =
@@ -3238,6 +3254,7 @@ bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
for (int i = 0; i < size; ++i) {
Pair[i].Src = SrcSubscripts[i];
Pair[i].Dst = DstSubscripts[i];
+ unifySubscriptType(&Pair[i]);
// FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
// delinearization has found, and add these constraints to the dependence
@@ -3277,9 +3294,9 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
//
// Care is required to keep the routine below, getSplitIteration(),
// up to date with respect to this routine.
-Dependence *DependenceAnalysis::depends(Instruction *Src,
- Instruction *Dst,
- bool PossiblyLoopIndependent) {
+std::unique_ptr<Dependence>
+DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
+ bool PossiblyLoopIndependent) {
if (Src == Dst)
PossiblyLoopIndependent = false;
@@ -3291,7 +3308,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
DEBUG(dbgs() << "can only handle simple loads and stores\n");
- return new Dependence(Src, Dst);
+ return make_unique<Dependence>(Src, Dst);
}
Value *SrcPtr = getPointerOperand(Src);
@@ -3302,7 +3319,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
case AliasAnalysis::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
DEBUG(dbgs() << "can't analyze may or partial alias\n");
- return new Dependence(Src, Dst);
+ return make_unique<Dependence>(Src, Dst);
case AliasAnalysis::NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
DEBUG(dbgs() << "no alias\n");
@@ -3346,6 +3363,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
++SrcIdx, ++DstIdx, ++P) {
Pair[P].Src = SE->getSCEV(*SrcIdx);
Pair[P].Dst = SE->getSCEV(*DstIdx);
+ unifySubscriptType(&Pair[P]);
}
}
else {
@@ -3675,9 +3693,9 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
return nullptr;
}
- FullDependence *Final = new FullDependence(Result);
+ auto Final = make_unique<FullDependence>(Result);
Result.DV = nullptr;
- return Final;
+ return std::move(Final);
}
@@ -3729,13 +3747,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
//
// breaks the dependence and allows us to vectorize/parallelize
// both loops.
-const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
+const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
unsigned SplitLevel) {
- assert(Dep && "expected a pointer to a Dependence");
- assert(Dep->isSplitable(SplitLevel) &&
+ assert(Dep.isSplitable(SplitLevel) &&
"Dep should be splitable at SplitLevel");
- Instruction *Src = Dep->getSrc();
- Instruction *Dst = Dep->getDst();
+ Instruction *Src = Dep.getSrc();
+ Instruction *Dst = Dep.getDst();
assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
assert(isLoadOrStore(Src));
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 74594f8..7ba91bc 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -8,133 +8,50 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DominanceFrontier.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/DominanceFrontierImpl.h"
+
using namespace llvm;
+namespace llvm {
+template class DominanceFrontierBase<BasicBlock>;
+template class ForwardDominanceFrontierBase<BasicBlock>;
+}
+
char DominanceFrontier::ID = 0;
+
INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
"Dominance Frontier Construction", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
"Dominance Frontier Construction", true, true)
-namespace {
- class DFCalculateWorkObject {
- public:
- DFCalculateWorkObject(BasicBlock *B, BasicBlock *P,
- const DomTreeNode *N,
- const DomTreeNode *PN)
- : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
- BasicBlock *currentBB;
- BasicBlock *parentBB;
- const DomTreeNode *Node;
- const DomTreeNode *parentNode;
- };
+DominanceFrontier::DominanceFrontier()
+ : FunctionPass(ID),
+ Base() {
+ initializeDominanceFrontierPass(*PassRegistry::getPassRegistry());
}
-void DominanceFrontier::anchor() { }
-
-const DominanceFrontier::DomSetType &
-DominanceFrontier::calculate(const DominatorTree &DT,
- const DomTreeNode *Node) {
- BasicBlock *BB = Node->getBlock();
- DomSetType *Result = nullptr;
-
- std::vector<DFCalculateWorkObject> workList;
- SmallPtrSet<BasicBlock *, 32> visited;
-
- workList.push_back(DFCalculateWorkObject(BB, nullptr, Node, nullptr));
- do {
- DFCalculateWorkObject *currentW = &workList.back();
- assert (currentW && "Missing work object.");
-
- BasicBlock *currentBB = currentW->currentBB;
- BasicBlock *parentBB = currentW->parentBB;
- const DomTreeNode *currentNode = currentW->Node;
- const DomTreeNode *parentNode = currentW->parentNode;
- assert (currentBB && "Invalid work object. Missing current Basic Block");
- assert (currentNode && "Invalid work object. Missing current Node");
- DomSetType &S = Frontiers[currentBB];
-
- // Visit each block only once.
- if (visited.count(currentBB) == 0) {
- visited.insert(currentBB);
-
- // Loop over CFG successors to calculate DFlocal[currentNode]
- for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
- SI != SE; ++SI) {
- // Does Node immediately dominate this successor?
- if (DT[*SI]->getIDom() != currentNode)
- S.insert(*SI);
- }
- }
-
- // At this point, S is DFlocal. Now we union in DFup's of our children...
- // Loop through and visit the nodes that Node immediately dominates (Node's
- // children in the IDomTree)
- bool visitChild = false;
- for (DomTreeNode::const_iterator NI = currentNode->begin(),
- NE = currentNode->end(); NI != NE; ++NI) {
- DomTreeNode *IDominee = *NI;
- BasicBlock *childBB = IDominee->getBlock();
- if (visited.count(childBB) == 0) {
- workList.push_back(DFCalculateWorkObject(childBB, currentBB,
- IDominee, currentNode));
- visitChild = true;
- }
- }
-
- // If all children are visited or there is any child then pop this block
- // from the workList.
- if (!visitChild) {
-
- if (!parentBB) {
- Result = &S;
- break;
- }
-
- DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
- DomSetType &parentSet = Frontiers[parentBB];
- for (; CDFI != CDFE; ++CDFI) {
- if (!DT.properlyDominates(parentNode, DT[*CDFI]))
- parentSet.insert(*CDFI);
- }
- workList.pop_back();
- }
+void DominanceFrontier::releaseMemory() {
+ Base.releaseMemory();
+}
- } while (!workList.empty());
+bool DominanceFrontier::runOnFunction(Function &) {
+ releaseMemory();
+ Base.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+ return false;
+}
- return *Result;
+void DominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<DominatorTreeWrapperPass>();
}
-void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
- OS << " DomFrontier for BB ";
- if (I->first)
- I->first->printAsOperand(OS, false);
- else
- OS << " <<exit node>>";
- OS << " is:\t";
-
- const std::set<BasicBlock*> &BBs = I->second;
-
- for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
- I != E; ++I) {
- OS << ' ';
- if (*I)
- (*I)->printAsOperand(OS, false);
- else
- OS << "<<exit node>>";
- }
- OS << "\n";
- }
+void DominanceFrontier::print(raw_ostream &OS, const Module *) const {
+ Base.print(OS);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void DominanceFrontierBase::dump() const {
+void DominanceFrontier::dump() const {
print(dbgs());
}
#endif
-
diff --git a/lib/Analysis/FunctionTargetTransformInfo.cpp b/lib/Analysis/FunctionTargetTransformInfo.cpp
new file mode 100644
index 0000000..a686bec
--- /dev/null
+++ b/lib/Analysis/FunctionTargetTransformInfo.cpp
@@ -0,0 +1,50 @@
+//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass wraps a TargetTransformInfo in a FunctionPass so that it can
+// forward along the current Function so that we can make target specific
+// decisions based on the particular subtarget specified for each Function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/FunctionTargetTransformInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-tti"
+static const char ftti_name[] = "Function TargetTransformInfo";
+INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
+char FunctionTargetTransformInfo::ID = 0;
+
+namespace llvm {
+FunctionPass *createFunctionTargetTransformInfoPass() {
+ return new FunctionTargetTransformInfo();
+}
+}
+
+FunctionTargetTransformInfo::FunctionTargetTransformInfo()
+ : FunctionPass(ID), Fn(nullptr), TTI(nullptr) {
+ initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry());
+}
+
+void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetTransformInfo>();
+}
+
+void FunctionTargetTransformInfo::releaseMemory() {}
+
+bool FunctionTargetTransformInfo::runOnFunction(Function &F) {
+ Fn = &F;
+ TTI = &getAnalysis<TargetTransformInfo>();
+ return false;
+}
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index caec253..67cf7f8 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -267,7 +267,7 @@ INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction",
char CallGraphWrapperPass::ID = 0;
-void CallGraphWrapperPass::releaseMemory() { G.reset(nullptr); }
+void CallGraphWrapperPass::releaseMemory() { G.reset(); }
void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
if (!G) {
@@ -282,6 +282,3 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
#endif
-
-// Enuse that users of CallGraph.h also link with this file
-DEFINING_FILE_FOR(CallGraph)
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index c27edbf..665aa7f 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -243,7 +243,14 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
assert(!CallSites.count(I->first) &&
"Call site occurs in node multiple times");
- CallSites.insert(std::make_pair(I->first, I->second));
+
+ CallSite CS(I->first);
+ if (CS) {
+ Function *Callee = CS.getCalledFunction();
+ // Ignore intrinsics because they're not really function calls.
+ if (!Callee || !(Callee->isIntrinsic()))
+ CallSites.insert(std::make_pair(I->first, I->second));
+ }
++I;
}
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 8807529..85db278 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -17,7 +17,9 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
@@ -49,6 +51,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
+ /// The cache of @llvm.assume intrinsics.
+ AssumptionTracker *AT;
+
// The called function.
Function &F;
@@ -104,7 +109,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
// Custom analysis routines.
- bool analyzeBlock(BasicBlock *BB);
+ bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
@@ -141,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
- Function &Callee, int Threshold)
- : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+ AssumptionTracker *AT, Function &Callee, int Threshold)
+ : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@@ -778,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);
+ CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
@@ -881,7 +886,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
+ SmallPtrSetImpl<const Value *> &EphValues) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,
@@ -893,6 +899,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ // Skip ephemeral values.
+ if (EphValues.count(I))
+ continue;
+
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@@ -967,7 +977,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
break;
}
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
- } while (Visited.insert(V));
+ } while (Visited.insert(V).second);
Type *IntPtrTy = DL->getIntPtrType(V->getContext());
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
@@ -1096,6 +1106,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
NumAllocaArgs = SROAArgValues.size();
+ // FIXME: If a caller has multiple calls to a callee, we end up recomputing
+ // the ephemeral values multiple times (and they're completely determined by
+ // the callee, so this is purely duplicate work).
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(&F, AT, EphValues);
+
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
// particular call site in order to get more accurate cost estimates. This
@@ -1129,7 +1145,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
- if (!analyzeBlock(BB)) {
+ if (!analyzeBlock(BB, EphValues)) {
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr)
return false;
@@ -1217,6 +1233,7 @@ void CallAnalyzer::dump() {
INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
@@ -1228,12 +1245,14 @@ InlineCostAnalysis::~InlineCostAnalysis() {}
void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetTransformInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
TTI = &getAnalysis<TargetTransformInfo>();
+ AT = &getAnalysis<AssumptionTracker>();
return false;
}
@@ -1290,7 +1309,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold);
+ CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 24655aa..6b5f370 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -84,7 +84,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
/// form.
static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
const LoopInfo *LI,
- SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
Loop *NearestLoop = nullptr;
for (DomTreeNode *Rung = DT->getNode(BB);
Rung; Rung = Rung->getIDom()) {
@@ -112,10 +112,10 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
bool IVUsers::AddUsersImpl(Instruction *I,
- SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
// Add this IV user to the Processed set before returning false to ensure that
// all IV users are members of the set. See IVUsers::isIVUserOrOperand.
- if (!Processed.insert(I))
+ if (!Processed.insert(I).second)
return true; // Instruction already handled.
if (!SE->isSCEVable(I->getType()))
@@ -145,7 +145,7 @@ bool IVUsers::AddUsersImpl(Instruction *I,
SmallPtrSet<Instruction *, 4> UniqueUsers;
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
- if (!UniqueUsers.insert(User))
+ if (!UniqueUsers.insert(User).second)
continue;
// Do not infinitely recurse on PHI nodes.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index bd42af1..f151a3a 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -41,14 +41,20 @@ enum { RecursionLimit = 3 };
STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumReassoc, "Number of reassociations");
+namespace {
struct Query {
const DataLayout *DL;
const TargetLibraryInfo *TLI;
const DominatorTree *DT;
+ AssumptionTracker *AT;
+ const Instruction *CxtI;
Query(const DataLayout *DL, const TargetLibraryInfo *tli,
- const DominatorTree *dt) : DL(DL), TLI(tli), DT(dt) {}
+ const DominatorTree *dt, AssumptionTracker *at = nullptr,
+ const Instruction *cxti = nullptr)
+ : DL(DL), TLI(tli), DT(dt), AT(at), CxtI(cxti) {}
};
+} // end anonymous namespace
static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
@@ -575,9 +581,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
- RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW,
+ Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
}
/// \brief Compute the base pointer and cumulative constant offsets for V.
@@ -624,7 +631,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
}
assert(V->getType()->getScalarType()->isPointerTy() &&
"Unexpected operand type!");
- } while (Visited.insert(V));
+ } while (Visited.insert(V).second);
Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
if (V->getType()->isVectorTy())
@@ -676,6 +683,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
+ // X - (0 - Y) -> X if the second sub is NUW.
+ // If Y != 0, 0 - Y is a poison value.
+ // If Y == 0, 0 - Y simplifies to 0.
+ if (BinaryOperator::isNeg(Op1)) {
+ if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) {
+ assert(BO->getOpcode() == Instruction::Sub &&
+ "Expected a subtraction operator!");
+ if (BO->hasNoUnsignedWrap())
+ return Op0;
+ }
+ }
+
// (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
// For example, (X + Y) - Y -> X; (Y + X) - Y -> X
Value *X = nullptr, *Y = nullptr, *Z = Op1;
@@ -769,9 +788,10 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
- RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW,
+ Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
}
/// Given operands for an FAdd, see if we can fold the result. If not, this
@@ -947,28 +967,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
FastMathFlags FMF,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
@@ -1028,6 +1057,16 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
(!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
return Constant::getNullValue(Op0->getType());
+ // (X /u C1) /u C2 -> 0 if C1 * C2 overflow
+ ConstantInt *C1, *C2;
+ if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
+ match(Op1, m_ConstantInt(C2))) {
+ bool Overflow;
+ C1->getValue().umul_ov(C2->getValue(), Overflow);
+ if (Overflow)
+ return Constant::getNullValue(Op0->getType());
+ }
+
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
@@ -1055,8 +1094,11 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
/// SimplifyUDivInst - Given operands for a UDiv, see if we can
@@ -1071,8 +1113,11 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
@@ -1090,8 +1135,11 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
/// SimplifyRem - Given operands for an SRem or URem, see if we can
@@ -1133,6 +1181,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
+ // (X % Y) % Y -> X % Y
+ if ((Opcode == Instruction::SRem &&
+ match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+ (Opcode == Instruction::URem &&
+ match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+ return Op0;
+
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
@@ -1160,8 +1215,11 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
/// SimplifyURemInst - Given operands for a URem, see if we can
@@ -1176,8 +1234,11 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
@@ -1195,8 +1256,11 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
/// isUndefShift - Returns true if a shift by \c Amount always yields undef.
@@ -1264,6 +1328,32 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
return nullptr;
}
+/// \brief Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
+ bool isExact, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // X >> X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // The low bit cannot be shifted out of an exact shift if it is set.
+ if (isExact) {
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ APInt Op0KnownZero(BitWidth, 0);
+ APInt Op0KnownOne(BitWidth, 0);
+ computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AT, Q.CxtI,
+ Q.DT);
+ if (Op0KnownOne[0])
+ return Op0;
+ }
+
+ return nullptr;
+}
+
/// SimplifyShlInst - Given operands for an Shl, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -1284,8 +1374,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
@@ -1293,12 +1384,9 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// fold the result. If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
- return V;
-
- // X >> X -> 0
- if (Op0 == Op1)
- return Constant::getNullValue(Op0->getType());
+ if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
+ MaxRecurse))
+ return V;
// undef >>l X -> 0
if (match(Op0, m_Undef()))
@@ -1306,8 +1394,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
// (X << A) >> A -> X
Value *X;
- if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
- cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+ if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
return X;
return nullptr;
@@ -1316,8 +1403,10 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT),
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
@@ -1325,13 +1414,10 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
/// fold the result. If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
+ MaxRecurse))
return V;
- // X >> X -> 0
- if (Op0 == Op1)
- return Constant::getNullValue(Op0->getType());
-
// all ones >>a X -> all ones
if (match(Op0, m_AllOnes()))
return Op0;
@@ -1342,21 +1428,75 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
// (X << A) >> A -> X
Value *X;
- if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
- cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
return X;
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AT, Q.CxtI, Q.DT);
+ if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+ return Op0;
+
return nullptr;
}
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT),
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
+// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
+// of possible values cannot be satisfied.
+static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+ ICmpInst::Predicate Pred0, Pred1;
+ ConstantInt *CI1, *CI2;
+ Value *V;
+ if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
+ m_ConstantInt(CI2))))
+ return nullptr;
+
+ if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+ return nullptr;
+
+ Type *ITy = Op0->getType();
+
+ auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ bool isNSW = AddInst->hasNoSignedWrap();
+ bool isNUW = AddInst->hasNoUnsignedWrap();
+
+ const APInt &CI1V = CI1->getValue();
+ const APInt &CI2V = CI2->getValue();
+ const APInt Delta = CI2V - CI1V;
+ if (CI1V.isStrictlyPositive()) {
+ if (Delta == 2) {
+ if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT)
+ return getFalse(ITy);
+ if (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT && isNSW)
+ return getFalse(ITy);
+ }
+ if (Delta == 1) {
+ if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_SGT)
+ return getFalse(ITy);
+ if (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGT && isNSW)
+ return getFalse(ITy);
+ }
+ }
+ if (CI1V.getBoolValue() && isNUW) {
+ if (Delta == 2)
+ if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Delta == 1)
+ if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ }
+
+ return nullptr;
+}
+
/// SimplifyAndInst - Given operands for an And, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
@@ -1407,12 +1547,21 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
- if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true))
+ if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT))
return Op0;
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true))
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT))
return Op1;
}
+ if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
+ if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
+ if (Value *V = SimplifyAndOfICmps(ICILHS, ICIRHS))
+ return V;
+ if (Value *V = SimplifyAndOfICmps(ICIRHS, ICILHS))
+ return V;
+ }
+ }
+
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
MaxRecurse))
@@ -1447,8 +1596,58 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
+}
+
+// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
+// contains all possible values.
+static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+ ICmpInst::Predicate Pred0, Pred1;
+ ConstantInt *CI1, *CI2;
+ Value *V;
+ if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
+ m_ConstantInt(CI2))))
+ return nullptr;
+
+ if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+ return nullptr;
+
+ Type *ITy = Op0->getType();
+
+ auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ bool isNSW = AddInst->hasNoSignedWrap();
+ bool isNUW = AddInst->hasNoUnsignedWrap();
+
+ const APInt &CI1V = CI1->getValue();
+ const APInt &CI2V = CI2->getValue();
+ const APInt Delta = CI2V - CI1V;
+ if (CI1V.isStrictlyPositive()) {
+ if (Delta == 2) {
+ if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
+ return getTrue(ITy);
+ if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+ return getTrue(ITy);
+ }
+ if (Delta == 1) {
+ if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE)
+ return getTrue(ITy);
+ if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+ return getTrue(ITy);
+ }
+ }
+ if (CI1V.getBoolValue() && isNUW) {
+ if (Delta == 2)
+ if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ if (Delta == 1)
+ if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+
+ return nullptr;
}
/// SimplifyOrInst - Given operands for an Or, see if we can
@@ -1508,6 +1707,15 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
(A == Op0 || B == Op0))
return Constant::getAllOnesValue(Op0->getType());
+ if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
+ if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
+ if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS))
+ return V;
+ if (Value *V = SimplifyOrOfICmps(ICIRHS, ICILHS))
+ return V;
+ }
+ }
+
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
@@ -1540,18 +1748,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
match(A, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
- if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+ if (V1 == B && MaskedValueIsZero(V2, C2->getValue(), Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT))
return A;
- if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+ if (V2 == B && MaskedValueIsZero(V1, C2->getValue(), Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT))
return A;
}
// Or commutes, try both ways.
if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
match(B, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
- if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+ if (V1 == A && MaskedValueIsZero(V2, C1->getValue(), Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT))
return B;
- if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+ if (V2 == A && MaskedValueIsZero(V1, C1->getValue(), Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT))
return B;
}
}
@@ -1568,8 +1780,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
/// SimplifyXorInst - Given operands for a Xor, see if we can
@@ -1623,8 +1837,10 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
static Type *GetCompareTy(Value *Op) {
@@ -1878,40 +2094,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, Q.DL))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, Q.DL))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getTrue(ITy);
if (LHSKnownNonNegative)
return getFalse(ITy);
break;
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getTrue(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL))
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getFalse(ITy);
if (LHSKnownNonNegative)
return getTrue(ITy);
break;
case ICmpInst::ICMP_SGT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getFalse(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL))
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
}
@@ -1958,13 +2180,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Lower = (-Upper) + 1;
}
} else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
- // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
APInt IntMin = APInt::getSignedMinValue(Width);
APInt IntMax = APInt::getSignedMaxValue(Width);
- APInt Val = CI2->getValue().abs();
- if (!Val.isMinValue()) {
+ APInt Val = CI2->getValue();
+ if (Val.isAllOnesValue()) {
+ // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+ // where CI2 != -1 and CI2 != 0 and CI2 != 1
+ Lower = IntMin + 1;
+ Upper = IntMax + 1;
+ } else if (Val.countLeadingZeros() < Width - 1) {
+ // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]
+ // where CI2 != -1 and CI2 != 0 and CI2 != 1
Lower = IntMin.sdiv(Val);
- Upper = IntMax.sdiv(Val) + 1;
+ Upper = IntMax.sdiv(Val);
+ if (Lower.sgt(Upper))
+ std::swap(Lower, Upper);
+ Upper = Upper + 1;
+ assert(Upper != Lower && "Upper part of range has wrapped!");
+ }
+ } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) {
+ // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)]
+ Lower = CI2->getValue();
+ Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+ } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) {
+ if (CI2->isNegative()) {
+ // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2]
+ unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1;
+ Lower = CI2->getValue().shl(ShiftAmount);
+ Upper = CI2->getValue() + 1;
+ } else {
+ // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1]
+ unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1;
+ Lower = CI2->getValue();
+ Upper = CI2->getValue().shl(ShiftAmount) + 1;
}
} else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
// 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
@@ -2174,25 +2422,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
- // If a bit is known to be zero for A and known to be one for B,
- // then A and B cannot be equal.
- if (ICmpInst::isEquality(Pred)) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- uint32_t BitWidth = CI->getBitWidth();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
- if (((LHSKnownOne & RHSKnownZero) != 0) ||
- ((LHSKnownZero & RHSKnownOne) != 0))
- return (Pred == ICmpInst::ICMP_EQ)
- ? ConstantInt::getFalse(CI->getContext())
- : ConstantInt::getTrue(CI->getContext());
- }
- }
-
// Special logic for binary operators.
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
@@ -2286,7 +2515,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2296,7 +2526,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2315,7 +2546,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2325,7 +2557,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL,
+ 0, Q.AT, Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2345,6 +2578,41 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
}
+ // handle:
+ // CI2 << X == CI
+ // CI2 << X != CI
+ //
+ // where CI2 is a power of 2 and CI isn't
+ if (auto *CI = dyn_cast<ConstantInt>(RHS)) {
+ const APInt *CI2Val, *CIVal = &CI->getValue();
+ if (LBO && match(LBO, m_Shl(m_APInt(CI2Val), m_Value())) &&
+ CI2Val->isPowerOf2()) {
+ if (!CIVal->isPowerOf2()) {
+ // CI2 << X can equal zero in some circumstances,
+ // this simplification is unsafe if CI is zero.
+ //
+ // We know it is safe if:
+ // - The shift is nsw, we can't shift out the one bit.
+ // - The shift is nuw, we can't shift out the one bit.
+ // - CI2 is one
+ // - CI isn't zero
+ if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
+ *CI2Val == 1 || !CI->isZero()) {
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(RHS->getContext());
+ if (Pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(RHS->getContext());
+ }
+ }
+ if (CIVal->isSignBit() && *CI2Val == 1) {
+ if (Pred == ICmpInst::ICMP_UGT)
+ return ConstantInt::getFalse(RHS->getContext());
+ if (Pred == ICmpInst::ICMP_ULE)
+ return ConstantInt::getTrue(RHS->getContext());
+ }
+ }
+ }
+
if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
LBO->getOperand(1) == RBO->getOperand(1)) {
switch (LBO->getOpcode()) {
@@ -2592,6 +2860,23 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // If a bit is known to be zero for A and known to be one for B,
+ // then A and B cannot be equal.
+ if (ICmpInst::isEquality(Pred)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ uint32_t BitWidth = CI->getBitWidth();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AT,
+ Q.CxtI, Q.DT);
+ const APInt &RHSVal = CI->getValue();
+ if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0))
+ return Pred == ICmpInst::ICMP_EQ
+ ? ConstantInt::getFalse(CI->getContext())
+ : ConstantInt::getTrue(CI->getContext());
+ }
+ }
+
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -2610,8 +2895,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ Instruction *CxtI) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
@@ -2707,8 +2994,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
@@ -2746,9 +3035,11 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (DL, TLI, DT),
- RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifySelectInst(Cond, TrueVal, FalseVal,
+ Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
}
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
@@ -2756,29 +3047,72 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
// The type of the GEP pointer operand.
PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
+ unsigned AS = PtrTy->getAddressSpace();
// getelementptr P -> P.
if (Ops.size() == 1)
return Ops[0];
- if (isa<UndefValue>(Ops[0])) {
- // Compute the (pointer) type returned by the GEP instruction.
- Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
- Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
- if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
- GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+ // Compute the (pointer) type returned by the GEP instruction.
+ Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+ Type *GEPTy = PointerType::get(LastType, AS);
+ if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
+ GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+
+ if (isa<UndefValue>(Ops[0]))
return UndefValue::get(GEPTy);
- }
if (Ops.size() == 2) {
// getelementptr P, 0 -> P.
if (match(Ops[1], m_Zero()))
return Ops[0];
- // getelementptr P, N -> P if P points to a type of zero size.
- if (Q.DL) {
- Type *Ty = PtrTy->getElementType();
- if (Ty->isSized() && Q.DL->getTypeAllocSize(Ty) == 0)
+
+ Type *Ty = PtrTy->getElementType();
+ if (Q.DL && Ty->isSized()) {
+ Value *P;
+ uint64_t C;
+ uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty);
+ // getelementptr P, N -> P if P points to a type of zero size.
+ if (TyAllocSize == 0)
return Ops[0];
+
+ // The following transforms are only safe if the ptrtoint cast
+ // doesn't truncate the pointers.
+ if (Ops[1]->getType()->getScalarSizeInBits() ==
+ Q.DL->getPointerSizeInBits(AS)) {
+ auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
+ if (match(P, m_Zero()))
+ return Constant::getNullValue(GEPTy);
+ Value *Temp;
+ if (match(P, m_PtrToInt(m_Value(Temp))))
+ if (Temp->getType() == GEPTy)
+ return Temp;
+ return nullptr;
+ };
+
+ // getelementptr V, (sub P, V) -> P if P points to a type of size 1.
+ if (TyAllocSize == 1 &&
+ match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0])))))
+ if (Value *R = PtrToIntOrZero(P))
+ return R;
+
+ // getelementptr V, (ashr (sub P, V), C) -> Q
+ // if P points to a type of size 1 << C.
+ if (match(Ops[1],
+ m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
+ m_ConstantInt(C))) &&
+ TyAllocSize == 1ULL << C)
+ if (Value *R = PtrToIntOrZero(P))
+ return R;
+
+ // getelementptr V, (sdiv (sub P, V), C) -> Q
+ // if P points to a type of size C.
+ if (match(Ops[1],
+ m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
+ m_SpecificInt(TyAllocSize))))
+ if (Value *R = PtrToIntOrZero(P))
+ return R;
+ }
}
}
@@ -2792,8 +3126,9 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT, AT, CxtI), RecursionLimit);
}
/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
@@ -2829,8 +3164,11 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
ArrayRef<unsigned> Idxs,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (DL, TLI, DT),
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs,
+ Query (DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
@@ -2877,8 +3215,11 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT,
+ AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
//=== Helper functions for higher up the class hierarchy.
@@ -2950,8 +3291,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
+ RecursionLimit);
}
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
@@ -2965,8 +3308,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
@@ -3041,23 +3385,26 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
User::op_iterator ArgEnd, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT),
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AT, CxtI),
RecursionLimit);
}
Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyCall(V, Args.begin(), Args.end(), Query(DL, TLI, DT),
- RecursionLimit);
+ const DominatorTree *DT, AssumptionTracker *AT,
+ const Instruction *CxtI) {
+ return ::SimplifyCall(V, Args.begin(), Args.end(),
+ Query(DL, TLI, DT, AT, CxtI), RecursionLimit);
}
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
+ const DominatorTree *DT,
+ AssumptionTracker *AT) {
Value *Result;
switch (I->getOpcode()) {
@@ -3066,109 +3413,122 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
break;
case Instruction::FAdd:
Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT);
+ I->getFastMathFlags(), DL, TLI, DT, AT, I);
break;
case Instruction::Add:
Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT);
+ DL, TLI, DT, AT, I);
break;
case Instruction::FSub:
Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT);
+ I->getFastMathFlags(), DL, TLI, DT, AT, I);
break;
case Instruction::Sub:
Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT);
+ DL, TLI, DT, AT, I);
break;
case Instruction::FMul:
Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT);
+ I->getFastMathFlags(), DL, TLI, DT, AT, I);
break;
case Instruction::Mul:
- Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::SDiv:
- Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::UDiv:
- Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::FDiv:
- Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::SRem:
- Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::URem:
- Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::FRem:
- Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::Shl:
Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT);
+ DL, TLI, DT, AT, I);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->isExact(),
- DL, TLI, DT);
+ DL, TLI, DT, AT, I);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->isExact(),
- DL, TLI, DT);
+ DL, TLI, DT, AT, I);
break;
case Instruction::And:
- Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::Or:
- Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AT, I);
break;
case Instruction::Xor:
- Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::ICmp:
Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::FCmp:
Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ I->getOperand(0), I->getOperand(1),
+ DL, TLI, DT, AT, I);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
- I->getOperand(2), DL, TLI, DT);
+ I->getOperand(2), DL, TLI, DT, AT, I);
break;
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
- Result = SimplifyGEPInst(Ops, DL, TLI, DT);
+ Result = SimplifyGEPInst(Ops, DL, TLI, DT, AT, I);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
IV->getInsertedValueOperand(),
- IV->getIndices(), DL, TLI, DT);
+ IV->getIndices(), DL, TLI, DT, AT, I);
break;
}
case Instruction::PHI:
- Result = SimplifyPHINode(cast<PHINode>(I), Query (DL, TLI, DT));
+ Result = SimplifyPHINode(cast<PHINode>(I), Query (DL, TLI, DT, AT, I));
break;
case Instruction::Call: {
CallSite CS(cast<CallInst>(I));
Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
- DL, TLI, DT);
+ DL, TLI, DT, AT, I);
break;
}
case Instruction::Trunc:
- Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT);
+ Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT,
+ AT, I);
break;
}
@@ -3192,7 +3552,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
+ const DominatorTree *DT,
+ AssumptionTracker *AT) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
@@ -3219,7 +3580,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
I = Worklist[Idx];
// See if this instruction simplifies.
- SimpleV = SimplifyInstruction(I, DL, TLI, DT);
+ SimpleV = SimplifyInstruction(I, DL, TLI, DT, AT);
if (!SimpleV)
continue;
@@ -3245,15 +3606,17 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
bool llvm::recursivelySimplifyInstruction(Instruction *I,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT);
+ const DominatorTree *DT,
+ AssumptionTracker *AT) {
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AT);
}
bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
+ const DominatorTree *DT,
+ AssumptionTracker *AT) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
- return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT);
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AT);
}
diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp
index b5b4265..7aae2a5 100644
--- a/lib/Analysis/JumpInstrTableInfo.cpp
+++ b/lib/Analysis/JumpInstrTableInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -28,7 +29,21 @@ ImmutablePass *llvm::createJumpInstrTableInfoPass() {
return new JumpInstrTableInfo();
}
-JumpInstrTableInfo::JumpInstrTableInfo() : ImmutablePass(ID), Tables() {
+ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) {
+ // This cast is always safe, since Bound is always in a subset of uint64_t.
+ uint64_t B = static_cast<uint64_t>(Bound);
+ return new JumpInstrTableInfo(B);
+}
+
+JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign)
+ : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) {
+ if (!llvm::isPowerOf2_64(ByteAlign)) {
+ // Note that we don't explicitly handle overflow here, since we handle the 0
+ // case explicitly when a caller actually tries to create jumptable entries,
+ // and this is the return value on overflow.
+ ByteAlignment = llvm::NextPowerOf2(ByteAlign);
+ }
+
initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index e073616..767da4e 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -48,7 +48,7 @@ static void findCallees(
}
for (Value *Op : C->operand_values())
- if (Visited.insert(cast<Constant>(Op)))
+ if (Visited.insert(cast<Constant>(Op)).second)
Worklist.push_back(cast<Constant>(Op));
}
}
@@ -66,7 +66,7 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
for (Instruction &I : BB)
for (Value *Op : I.operand_values())
if (Constant *C = dyn_cast<Constant>(Op))
- if (Visited.insert(C))
+ if (Visited.insert(C).second)
Worklist.push_back(C);
// We've collected all the constant (and thus potentially function or
@@ -113,7 +113,7 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
SmallPtrSet<Constant *, 16> Visited;
for (GlobalVariable &GV : M.globals())
if (GV.hasInitializer())
- if (Visited.insert(GV.getInitializer()))
+ if (Visited.insert(GV.getInitializer()).second)
Worklist.push_back(GV.getInitializer());
DEBUG(dbgs() << " Adding functions referenced by global initializers to the "
@@ -688,7 +688,7 @@ static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N,
SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) {
// Recurse depth first through the nodes.
for (LazyCallGraph::Node &ChildN : N)
- if (Printed.insert(&ChildN))
+ if (Printed.insert(&ChildN).second)
printNodes(OS, ChildN, Printed);
OS << " Call edges in function: " << N.getFunction().getName() << "\n";
@@ -717,7 +717,7 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
SmallPtrSet<LazyCallGraph::Node *, 16> Printed;
for (LazyCallGraph::Node &N : G)
- if (Printed.insert(&N))
+ if (Printed.insert(&N).second)
printNodes(OS, N, Printed);
for (LazyCallGraph::SCC &SCC : G.postorder_sccs())
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 9f919f7..c712c9f 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -15,12 +15,14 @@
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -38,6 +40,7 @@ using namespace PatternMatch;
char LazyValueInfo::ID = 0;
INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
@@ -338,6 +341,13 @@ namespace {
/// during a query. It basically emulates the callstack of the naive
/// recursive value lookup process.
std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+
+ /// A pointer to the cache of @llvm.assume calls.
+ AssumptionTracker *AT;
+ /// An optional DL pointer.
+ const DataLayout *DL;
+ /// An optional DT pointer.
+ DominatorTree *DT;
friend struct LVIValueHandle;
@@ -364,7 +374,8 @@ namespace {
LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
- LVILatticeVal &Result);
+ LVILatticeVal &Result,
+ Instruction *CxtI = nullptr);
bool hasBlockValue(Value *Val, BasicBlock *BB);
// These methods process one work item and may add more. A false value
@@ -377,6 +388,8 @@ namespace {
PHINode *PN, BasicBlock *BB);
bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
Instruction *BBI, BasicBlock *BB);
+ void mergeAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV,
+ Instruction *BBI);
void solve();
@@ -387,11 +400,18 @@ namespace {
public:
/// getValueInBlock - This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
- LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+ LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB,
+ Instruction *CxtI = nullptr);
+
+ /// getValueAt - This is the query interface to determine the lattice
+ /// value for the specified Value* at the specified instruction (generally
+ /// from an assume intrinsic).
+ LVILatticeVal getValueAt(Value *V, Instruction *CxtI);
/// getValueOnEdge - This is the query interface to determine the lattice
/// value for the specified Value* that is true on the specified edge.
- LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+ LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
+ Instruction *CxtI = nullptr);
/// threadEdge - This is the update interface to inform the cache that an
/// edge from PredBB to OldSucc has been threaded to be from PredBB to
@@ -408,6 +428,10 @@ namespace {
ValueCache.clear();
OverDefinedCache.clear();
}
+
+ LazyValueInfoCache(AssumptionTracker *AT,
+ const DataLayout *DL = nullptr,
+ DominatorTree *DT = nullptr) : AT(AT), DL(DL), DT(DT) {}
};
} // end anonymous namespace
@@ -500,7 +524,6 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
// cache needs updating, i.e. if we have solve a new value or not.
OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
- // If we've already computed this block's value, return it.
if (!BBLV.isUndefined()) {
DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
@@ -669,7 +692,10 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
BasicBlock *PhiBB = PN->getIncomingBlock(i);
Value *PhiVal = PN->getIncomingValue(i);
LVILatticeVal EdgeResult;
- EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+ // Note that we can provide PN as the context value to getEdgeValue, even
+ // though the results will be cached, because PN is the value being used as
+ // the cache key in the caller.
+ EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN);
if (EdgesMissing)
continue;
@@ -694,6 +720,36 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
return true;
}
+static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
+ LVILatticeVal &Result,
+ bool isTrueDest = true);
+
+// If we can determine a constant range for the value Val at the context
+// provided by the instruction BBI, then merge it into BBLV. If we did find a
+// constant range, return true.
+void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(
+ Value *Val, LVILatticeVal &BBLV, Instruction *BBI) {
+ BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
+ if (!BBI)
+ return;
+
+ for (auto &I : AT->assumptions(BBI->getParent()->getParent())) {
+ if (!isValidAssumeForContext(I, BBI, DL, DT))
+ continue;
+
+ Value *C = I->getArgOperand(0);
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(C)) {
+ LVILatticeVal Result;
+ if (getValueFromFromCondition(Val, ICI, Result)) {
+ if (BBLV.isOverdefined())
+ BBLV = Result;
+ else
+ BBLV.mergeIn(Result);
+ }
+ }
+ }
+}
+
bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
Instruction *BBI,
BasicBlock *BB) {
@@ -704,6 +760,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
}
LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+ mergeAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
if (!LHSVal.isConstantRange()) {
BBLV.markOverdefined();
return true;
@@ -775,6 +832,47 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
return true;
}
+bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
+ LVILatticeVal &Result, bool isTrueDest) {
+ if (ICI && isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality() && ICI->getOperand(0) == Val) {
+ // We know that V has the RHS constant if this is a true SETEQ or
+ // false SETNE.
+ if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+ Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ else
+ Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ return true;
+ }
+
+ // Recognize the range checking idiom that InstCombine produces.
+ // (X-C1) u< C2 --> [C1, C1+C2)
+ ConstantInt *NegOffset = nullptr;
+ if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
+ match(ICI->getOperand(0), m_Add(m_Specific(Val),
+ m_ConstantInt(NegOffset)));
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
+ if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
+ // Calculate the range of values that would satisfy the comparison.
+ ConstantRange CmpRange(CI->getValue());
+ ConstantRange TrueValues =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+ if (NegOffset) // Apply the offset from above.
+ TrueValues = TrueValues.subtract(NegOffset->getValue());
+
+ // If we're interested in the false dest, invert the condition.
+ if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+ Result = LVILatticeVal::getRange(TrueValues);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
/// Val is not constrained on the edge.
static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
@@ -801,41 +899,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
- if (ICI && isa<Constant>(ICI->getOperand(1))) {
- if (ICI->isEquality() && ICI->getOperand(0) == Val) {
- // We know that V has the RHS constant if this is a true SETEQ or
- // false SETNE.
- if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
- Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
- else
- Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
- return true;
- }
-
- // Recognize the range checking idiom that InstCombine produces.
- // (X-C1) u< C2 --> [C1, C1+C2)
- ConstantInt *NegOffset = nullptr;
- if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
- match(ICI->getOperand(0), m_Add(m_Specific(Val),
- m_ConstantInt(NegOffset)));
-
- ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
- if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
- // Calculate the range of values that would satisfy the comparison.
- ConstantRange CmpRange(CI->getValue());
- ConstantRange TrueValues =
- ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
-
- if (NegOffset) // Apply the offset from above.
- TrueValues = TrueValues.subtract(NegOffset->getValue());
-
- // If we're interested in the false dest, invert the condition.
- if (!isTrueDest) TrueValues = TrueValues.inverse();
-
- Result = LVILatticeVal::getRange(TrueValues);
- return true;
- }
- }
+ if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
+ return true;
}
}
@@ -869,7 +934,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at
/// the basic block if the edge does not constraint Val.
bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
- BasicBlock *BBTo, LVILatticeVal &Result) {
+ BasicBlock *BBTo, LVILatticeVal &Result,
+ Instruction *CxtI) {
// If already a constant, there is nothing to compute.
if (Constant *VC = dyn_cast<Constant>(Val)) {
Result = LVILatticeVal::get(VC);
@@ -891,6 +957,10 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
// Try to intersect ranges of the BB and the constraint on the edge.
LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+ mergeAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator());
+ // See note on the use of the CxtI with mergeAssumeBlockValueConstantRange,
+ // and caching, below.
+ mergeAssumeBlockValueConstantRange(Val, InBlock, CxtI);
if (!InBlock.isConstantRange())
return true;
@@ -907,30 +977,54 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
// if we couldn't compute the value on the edge, use the value from the BB
Result = getBlockValue(Val, BBFrom);
+ mergeAssumeBlockValueConstantRange(Val, Result, BBFrom->getTerminator());
+ // We can use the context instruction (generically the ultimate instruction
+ // the calling pass is trying to simplify) here, even though the result of
+ // this function is generally cached when called from the solve* functions
+ // (and that cached result might be used with queries using a different
+ // context instruction), because when this function is called from the solve*
+ // functions, the context instruction is not provided. When called from
+ // LazyValueInfoCache::getValueOnEdge, the context instruction is provided,
+ // but then the result is not cached.
+ mergeAssumeBlockValueConstantRange(Val, Result, CxtI);
return true;
}
-LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
+ Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
BlockValueStack.push(std::make_pair(BB, V));
solve();
LVILatticeVal Result = getBlockValue(V, BB);
+ mergeAssumeBlockValueConstantRange(V, Result, CxtI);
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
+ DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
+ << CxtI->getName() << "'\n");
+
+ LVILatticeVal Result;
+ mergeAssumeBlockValueConstantRange(V, Result, CxtI);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
LVILatticeVal LazyValueInfoCache::
-getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
+ Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
LVILatticeVal Result;
- if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+ if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
solve();
- bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+ bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result, CxtI);
(void)WasFastQuery;
assert(WasFastQuery && "More work to do after problem solved?");
}
@@ -1004,39 +1098,51 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
//===----------------------------------------------------------------------===//
/// getCache - This lazily constructs the LazyValueInfoCache.
-static LazyValueInfoCache &getCache(void *&PImpl) {
+static LazyValueInfoCache &getCache(void *&PImpl,
+ AssumptionTracker *AT,
+ const DataLayout *DL = nullptr,
+ DominatorTree *DT = nullptr) {
if (!PImpl)
- PImpl = new LazyValueInfoCache();
+ PImpl = new LazyValueInfoCache(AT, DL, DT);
return *static_cast<LazyValueInfoCache*>(PImpl);
}
bool LazyValueInfo::runOnFunction(Function &F) {
- if (PImpl)
- getCache(PImpl).clear();
+ AT = &getAnalysis<AssumptionTracker>();
+
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
+ if (PImpl)
+ getCache(PImpl, AT, DL, DT).clear();
+
// Fully lazy.
return false;
}
void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetLibraryInfo>();
}
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
- delete &getCache(PImpl);
+ delete &getCache(PImpl, AT);
PImpl = nullptr;
}
}
-Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
- LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
+ Instruction *CxtI) {
+ LVILatticeVal Result =
+ getCache(PImpl, AT, DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1051,8 +1157,10 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
/// getConstantOnEdge - Determine whether the specified value is known to be a
/// constant on the specified edge. Return null if not.
Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
- BasicBlock *ToBB) {
- LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+ BasicBlock *ToBB,
+ Instruction *CxtI) {
+ LVILatticeVal Result =
+ getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1064,51 +1172,47 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
return nullptr;
}
-/// getPredicateOnEdge - Determine whether the specified value comparison
-/// with a constant is known to be true or false on the specified CFG edge.
-/// Pred is a CmpInst predicate.
-LazyValueInfo::Tristate
-LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
- BasicBlock *FromBB, BasicBlock *ToBB) {
- LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
-
+static LazyValueInfo::Tristate
+getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result,
+ const DataLayout *DL, TargetLibraryInfo *TLI) {
+
// If we know the value is a constant, evaluate the conditional.
Constant *Res = nullptr;
if (Result.isConstant()) {
Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
TLI);
if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
- return ResCI->isZero() ? False : True;
- return Unknown;
+ return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
+ return LazyValueInfo::Unknown;
}
if (Result.isConstantRange()) {
ConstantInt *CI = dyn_cast<ConstantInt>(C);
- if (!CI) return Unknown;
+ if (!CI) return LazyValueInfo::Unknown;
ConstantRange CR = Result.getConstantRange();
if (Pred == ICmpInst::ICMP_EQ) {
if (!CR.contains(CI->getValue()))
- return False;
+ return LazyValueInfo::False;
if (CR.isSingleElement() && CR.contains(CI->getValue()))
- return True;
+ return LazyValueInfo::True;
} else if (Pred == ICmpInst::ICMP_NE) {
if (!CR.contains(CI->getValue()))
- return True;
+ return LazyValueInfo::True;
if (CR.isSingleElement() && CR.contains(CI->getValue()))
- return False;
+ return LazyValueInfo::False;
}
// Handle more complex predicates.
ConstantRange TrueValues =
ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
if (TrueValues.contains(CR))
- return True;
+ return LazyValueInfo::True;
if (TrueValues.inverse().contains(CR))
- return False;
- return Unknown;
+ return LazyValueInfo::False;
+ return LazyValueInfo::Unknown;
}
if (Result.isNotConstant()) {
@@ -1120,26 +1224,48 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
Result.getNotConstant(), C, DL,
TLI);
if (Res->isNullValue())
- return False;
+ return LazyValueInfo::False;
} else if (Pred == ICmpInst::ICMP_NE) {
// !C1 != C -> true iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
Result.getNotConstant(), C, DL,
TLI);
if (Res->isNullValue())
- return True;
+ return LazyValueInfo::True;
}
- return Unknown;
+ return LazyValueInfo::Unknown;
}
- return Unknown;
+ return LazyValueInfo::Unknown;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+ BasicBlock *FromBB, BasicBlock *ToBB,
+ Instruction *CxtI) {
+ LVILatticeVal Result =
+ getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
+ return getPredicateResult(Pred, C, Result, DL, TLI);
+}
+
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
+ Instruction *CxtI) {
+ LVILatticeVal Result =
+ getCache(PImpl, AT, DL, DT).getValueAt(V, CxtI);
+
+ return getPredicateResult(Pred, C, Result, DL, TLI);
}
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+ if (PImpl) getCache(PImpl, AT, DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
}
void LazyValueInfo::eraseBlock(BasicBlock *BB) {
- if (PImpl) getCache(PImpl).eraseBlock(BB);
+ if (PImpl) getCache(PImpl, AT, DL, DT).eraseBlock(BB);
}
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 7d4e254..23639e7 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -18,7 +18,7 @@
#include "llvm/IR/Function.h"
using namespace llvm;
-/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This
+/// This impl pointer in ~LibCallInfo is actually a StringMap. This
/// helper does the cast.
static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
@@ -38,7 +38,7 @@ const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
}
-/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// Return the LibCallFunctionInfo object corresponding to
/// the specified function if we have it. If not, return null.
const LibCallFunctionInfo *
LibCallInfo::getFunctionInfo(const Function *F) const {
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index b14f329..8ee9b8a 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -37,6 +37,7 @@
#include "llvm/Analysis/Lint.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -96,11 +97,12 @@ namespace {
Value *findValue(Value *V, bool OffsetOk) const;
Value *findValueImpl(Value *V, bool OffsetOk,
- SmallPtrSet<Value *, 4> &Visited) const;
+ SmallPtrSetImpl<Value *> &Visited) const;
public:
Module *Mod;
AliasAnalysis *AA;
+ AssumptionTracker *AT;
DominatorTree *DT;
const DataLayout *DL;
TargetLibraryInfo *TLI;
@@ -118,6 +120,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetLibraryInfo>();
AU.addRequired<DominatorTreeWrapperPass>();
}
@@ -151,6 +154,7 @@ namespace {
char Lint::ID = 0;
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -175,6 +179,7 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
bool Lint::runOnFunction(Function &F) {
Mod = F.getParent();
AA = &getAnalysis<AliasAnalysis>();
+ AT = &getAnalysis<AssumptionTracker>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
@@ -504,7 +509,8 @@ void Lint::visitShl(BinaryOperator &I) {
"Undefined result: Shift count out of range", &I);
}
-static bool isZero(Value *V, const DataLayout *DL) {
+static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
+ AssumptionTracker *AT) {
// Assume undef could be zero.
if (isa<UndefValue>(V))
return true;
@@ -513,7 +519,8 @@ static bool isZero(Value *V, const DataLayout *DL) {
if (!VecTy) {
unsigned BitWidth = V->getType()->getIntegerBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL);
+ computeKnownBits(V, KnownZero, KnownOne, DL,
+ 0, AT, dyn_cast<Instruction>(V), DT);
return KnownZero.isAllOnesValue();
}
@@ -543,22 +550,22 @@ static bool isZero(Value *V, const DataLayout *DL) {
}
void Lint::visitSDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AT),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitUDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AT),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitSRem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AT),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitURem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AT),
"Undefined behavior: Division by zero", &I);
}
@@ -622,9 +629,9 @@ Value *Lint::findValue(Value *V, bool OffsetOk) const {
/// findValueImpl - Implementation helper for findValue.
Value *Lint::findValueImpl(Value *V, bool OffsetOk,
- SmallPtrSet<Value *, 4> &Visited) const {
+ SmallPtrSetImpl<Value *> &Visited) const {
// Detect self-referential values.
- if (!Visited.insert(V))
+ if (!Visited.insert(V).second)
return UndefValue::get(V->getType());
// TODO: Look through sext or zext cast, when the result is known to
@@ -638,7 +645,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
BasicBlock *BB = L->getParent();
SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
for (;;) {
- if (!VisitedBlocks.insert(BB)) break;
+ if (!VisitedBlocks.insert(BB).second)
+ break;
if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
BB, BBI, 6, AA))
return findValueImpl(U, OffsetOk, Visited);
@@ -678,7 +686,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT))
+ if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AT))
return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 005d309..bb0d60e 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -22,25 +22,29 @@
#include "llvm/IR/Operator.h"
using namespace llvm;
-/// AreEquivalentAddressValues - Test if A and B will obviously have the same
-/// value. This includes recognizing that %t0 and %t1 will have the same
+/// \brief Test if A and B will obviously have the same value.
+///
+/// This includes recognizing that %t0 and %t1 will have the same
/// value in code like this:
+/// \code
/// %t0 = getelementptr \@a, 0, 3
/// store i32 0, i32* %t0
/// %t1 = getelementptr \@a, 0, 3
/// %t2 = load i32* %t1
+/// \endcode
///
static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
// Test if the values are trivially equivalent.
- if (A == B) return true;
+ if (A == B)
+ return true;
// Test if the values come from identical arithmetic instructions.
// Use isIdenticalToWhenDefined instead of isIdenticalTo because
// this function is only used when one address use dominates the
// other, which means that they'll always either have the same
// value or one of them will have an undefined value.
- if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
- isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+ if (isa<BinaryOperator>(A) || isa<CastInst>(A) || isa<PHINode>(A) ||
+ isa<GetElementPtrInst>(A))
if (const Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
@@ -49,15 +53,19 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap. If it is not obviously safe to load from the
-/// specified pointer, we do a quick local scan of the basic block containing
-/// ScanFrom, to determine if the address is already accessed.
+/// \brief Check if executing a load of this pointer value cannot trap.
+///
+/// If it is not obviously safe to load from the specified pointer, we do
+/// a quick local scan of the basic block containing \c ScanFrom, to determine
+/// if the address is already accessed.
+///
+/// This uses the pointee type to determine how many bytes need to be safe to
+/// load from the pointer.
bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
- unsigned Align, const DataLayout *TD) {
+ unsigned Align, const DataLayout *DL) {
int64_t ByteOffset = 0;
Value *Base = V;
- Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD);
+ Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
if (ByteOffset < 0) // out of bounds
return false;
@@ -69,26 +77,29 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
BaseType = AI->getAllocatedType();
BaseAlign = AI->getAlignment();
} else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
- // Global variables are safe to load from but their size cannot be
- // guaranteed if they are overridden.
+ // Global variables are not necessarily safe to load from if they are
+ // overridden. Their size may change or they may be weak and require a test
+ // to determine if they were in fact provided.
if (!GV->mayBeOverridden()) {
BaseType = GV->getType()->getElementType();
BaseAlign = GV->getAlignment();
}
}
- if (BaseType && BaseType->isSized()) {
- if (TD && BaseAlign == 0)
- BaseAlign = TD->getPrefTypeAlignment(BaseType);
+ PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
- if (Align <= BaseAlign) {
- if (!TD)
- return true; // Loading directly from an alloca or global is OK.
+ // If we found a base allocated type from either an alloca or global variable,
+ // try to see if we are definitively within the allocated region. We need to
+ // know the size of the base type and the loaded type to do anything in this
+ // case, so only try this when we have the DataLayout available.
+ if (BaseType && BaseType->isSized() && DL) {
+ if (BaseAlign == 0)
+ BaseAlign = DL->getPrefTypeAlignment(BaseType);
+ if (Align <= BaseAlign) {
// Check if the load is within the bounds of the underlying object.
- PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
- if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+ if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
return true;
}
@@ -101,6 +112,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
// the load entirely).
BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+ // We can at least always strip pointer casts even though we can't use the
+ // base here.
+ V = V->stripPointerCasts();
+
while (BBI != E) {
--BBI;
@@ -110,46 +125,62 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
!isa<DbgInfoIntrinsic>(BBI))
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
- }
+ Value *AccessedPtr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ AccessedPtr = LI->getPointerOperand();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ AccessedPtr = SI->getPointerOperand();
+ else
+ continue;
+
+ // Handle trivial cases even w/o DataLayout or other work.
+ if (AccessedPtr == V)
+ return true;
+
+ if (!DL)
+ continue;
+
+ auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
+ if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
+ LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
+ return true;
}
return false;
}
-/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
-/// instruction before ScanFrom) checking to see if we have the value at the
+/// \brief Scan the ScanBB block backwards to see if we have the value at the
/// memory address *Ptr locally available within a small number of instructions.
-/// If the value is available, return it.
///
-/// If not, return the iterator for the last validated instruction that the
-/// value would be live through. If we scanned the entire block and didn't find
-/// something that invalidates *Ptr or provides it, ScanFrom would be left at
-/// begin() and this returns null. ScanFrom could also be left
+/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum
+/// instructions to scan in the block. If it is set to \c 0, it will scan the whole
+/// block.
+///
+/// If the value is available, this function returns it. If not, it returns the
+/// iterator for the last validated instruction that the value would be live
+/// through. If we scanned the entire block and didn't find something that
+/// invalidates \c *Ptr or provides it, \c ScanFrom is left at the last
+/// instruction processed and this returns null.
///
-/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
-/// it is set to 0, it will scan the whole block. You can also optionally
-/// specify an alias analysis implementation, which makes this more precise.
+/// You can also optionally specify an alias analysis implementation, which
+/// makes this more precise.
///
-/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
-/// load or store is recorded there. If there is no TBAA tag or if no access
-/// is found, it is left unmodified.
+/// If \c AATags is non-null and a load or store is found, the AA tags from the
+/// load or store are recorded there. If there are no AA tags or if no access is
+/// found, it is left unmodified.
Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AliasAnalysis *AA,
- MDNode **TBAATag) {
- if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+ AliasAnalysis *AA, AAMDNodes *AATags) {
+ if (MaxInstsToScan == 0)
+ MaxInstsToScan = ~0U;
+
+ Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
// If we're using alias analysis to disambiguate get the size of *Ptr.
- uint64_t AccessSize = 0;
- if (AA) {
- Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- AccessSize = AA->getTypeStoreSize(AccessTy);
- }
-
+ uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0;
+
+ Value *StrippedPtr = Ptr->stripPointerCasts();
+
while (ScanFrom != ScanBB->begin()) {
// We must ignore debug info directives when counting (otherwise they
// would affect codegen).
@@ -159,62 +190,71 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// Restore ScanFrom to expected value in case next test succeeds
ScanFrom++;
-
+
// Don't scan huge blocks.
- if (MaxInstsToScan-- == 0) return nullptr;
-
+ if (MaxInstsToScan-- == 0)
+ return nullptr;
+
--ScanFrom;
// If this is a load of Ptr, the loaded value is available.
// (This is true even if the load is volatile or atomic, although
// those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
- if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+ if (AreEquivalentAddressValues(
+ LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
+ CastInst::isBitCastable(LI->getType(), AccessTy)) {
+ if (AATags)
+ LI->getAAMetadata(*AATags);
return LI;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
// If this is a store through Ptr, the value is available!
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
- if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
- if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
+ if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
+ CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) {
+ if (AATags)
+ SI->getAAMetadata(*AATags);
return SI->getOperand(0);
}
-
- // If Ptr is an alloca and this is a store to a different alloca, ignore
- // the store. This is a trivial form of alias analysis that is important
- // for reg2mem'd code.
- if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
- (isa<AllocaInst>(SI->getOperand(1)) ||
- isa<GlobalVariable>(SI->getOperand(1))))
+
+ // If both StrippedPtr and StorePtr reach all the way to an alloca or
+ // global and they are different, ignore the store. This is a trivial form
+ // of alias analysis that is important for reg2mem'd code.
+ if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
+ (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)) &&
+ StrippedPtr != StorePtr)
continue;
-
+
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
if (AA &&
- (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
-
+
// Otherwise the store that may or may not alias the pointer, bail out.
++ScanFrom;
return nullptr;
}
-
+
// If this is some other instruction that may clobber Ptr, bail out.
if (Inst->mayWriteToMemory()) {
// If alias analysis claims that it really won't modify the load,
// ignore it.
if (AA &&
- (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
-
+
// May modify the pointer, bail out.
++ScanFrom;
return nullptr;
}
}
-
+
// Got to the start of the block, we didn't find it, but are done for this
// block.
return nullptr;
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 46c0eaa..b1f62c4 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -307,7 +308,8 @@ bool Loop::isAnnotatedParallel() const {
// directly or indirectly through another list metadata (in case of
// nested parallel loops). The loop identifier metadata refers to
// itself so we can check both cases with the same routine.
- MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access");
+ MDNode *loopIdMD =
+ II->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
if (!loopIdMD)
return false;
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 7bd866e..190abc7 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -76,6 +76,9 @@ void LPPassManager::deleteLoopFromQueue(Loop *L) {
LI->updateUnloop(L);
+ // Notify passes that the loop is being deleted.
+ deleteSimpleAnalysisLoop(L);
+
// If L is current loop then skip rest of the passes and let
// runOnFunction remove L from LQ. Otherwise, remove L from LQ now
// and continue applying other passes on CurrentLoop.
@@ -164,6 +167,14 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
}
}
+/// Invoke deleteAnalysisLoop hook for all passes.
+void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->deleteAnalysisLoop(L);
+ }
+}
+
// Recurse through all subloops and all loops into LQ.
static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 64d339f..08b41fe 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -332,7 +332,11 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
TLIFn == LibFunc::ZdlPv || // operator delete(void*)
TLIFn == LibFunc::ZdaPv) // operator delete[](void*)
ExpectedNumParams = 1;
- else if (TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+ else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint)
+ TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong)
+ TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+ TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint)
+ TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong)
TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow)
ExpectedNumParams = 2;
else
@@ -412,7 +416,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If we have already seen this instruction, bail out. Cycles can happen in
// unreachable code after constant propagation.
- if (!SeenInsts.insert(I))
+ if (!SeenInsts.insert(I).second)
return unknown();
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
@@ -648,7 +652,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
// Record the pointers that were handled in this run, so that they can be
// cleaned later if something fails. We also use this set to break cycles that
// can occur in dead code.
- if (!SeenVals.insert(V)) {
+ if (!SeenVals.insert(V).second) {
Result = unknown();
} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
Result = visitGEPOperator(*GEP);
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 9eaf109..187eada 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
@@ -48,13 +49,17 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
// Limit for the number of instructions to scan in a block.
-static const int BlockScanLimit = 100;
+static const unsigned int BlockScanLimit = 100;
+
+// Limit on the number of memdep results to process.
+static const unsigned int NumResultsLimit = 100;
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -83,11 +88,13 @@ void MemoryDependenceAnalysis::releaseMemory() {
///
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredTransitive<AliasAnalysis>();
}
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
+ AT = &getAnalysis<AssumptionTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
@@ -158,29 +165,32 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
return AliasAnalysis::Mod;
}
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ AAMDNodes AAInfo;
+
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
+ II->getAAMetadata(AAInfo);
Loc = AliasAnalysis::Location(II->getArgOperand(1),
cast<ConstantInt>(II->getArgOperand(0))
- ->getZExtValue(),
- II->getMetadata(LLVMContext::MD_tbaa));
+ ->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return AliasAnalysis::Mod;
case Intrinsic::invariant_end:
+ II->getAAMetadata(AAInfo);
Loc = AliasAnalysis::Location(II->getArgOperand(2),
cast<ConstantInt>(II->getArgOperand(1))
- ->getZExtValue(),
- II->getMetadata(LLVMContext::MD_tbaa));
+ ->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return AliasAnalysis::Mod;
default:
break;
}
+ }
// Otherwise, just do the coarse-grained thing that always works.
if (Inst->mayWriteToMemory())
@@ -367,6 +377,36 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
bool isInvariantLoad = false;
+
+ // We must be careful with atomic accesses, as they may allow another thread
+ // to touch this location, cloberring it. We are conservative: if the
+ // QueryInst is not a simple (non-atomic) memory access, we automatically
+ // return getClobber.
+ // If it is simple, we know based on the results of
+ // "Compiler testing via a theory of sound optimisations in the C11/C++11
+ // memory model" in PLDI 2013, that a non-atomic location can only be
+ // clobbered between a pair of a release and an acquire action, with no
+ // access to the location in between.
+ // Here is an example for giving the general intuition behind this rule.
+ // In the following code:
+ // store x 0;
+ // release action; [1]
+ // acquire action; [4]
+ // %val = load x;
+ // It is unsafe to replace %val by 0 because another thread may be running:
+ // acquire action; [2]
+ // store x 42;
+ // release action; [3]
+ // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
+ // being 42. A key property of this program however is that if either
+ // 1 or 4 were missing, there would be a race between the store of 42
+ // either the store of 0 or the load (making the whole progam racy).
+ // The paper mentionned above shows that the same property is respected
+ // by every program that can detect any optimisation of that kind: either
+ // it is racy (undefined) or there is a release followed by an acquire
+ // between the pair of accesses under consideration.
+ bool HasSeenAcquire = false;
+
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
@@ -404,10 +444,37 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
+ // One exception is atomic loads: a value can depend on an atomic load that it
+ // does not alias with when this atomic load indicates that another thread may
+ // be accessing the location.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Atomic loads have complications involved.
+ // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
+ // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
+ // release store will know to return getClobber.
// FIXME: This is overly conservative.
- if (!LI->isUnordered())
+ if (!LI->isUnordered()) {
+ if (!QueryInst)
+ return MemDepResult::getClobber(LI);
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+ if (!QueryLI->isSimple())
+ return MemDepResult::getClobber(LI);
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+ if (!QuerySI->isSimple())
+ return MemDepResult::getClobber(LI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(LI);
+ }
+
+ if (isAtLeastAcquire(LI->getOrdering()))
+ HasSeenAcquire = true;
+ }
+
+ // FIXME: this is overly conservative.
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses can for example be reordered
+ // with volatile accesses.
+ if (LI->isVolatile())
return MemDepResult::getClobber(LI);
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
@@ -466,8 +533,32 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
+ // A Monotonic store is OK if the query inst is itself not atomic.
+ // A Release (or higher) store further requires that no acquire load
+ // has been seen.
// FIXME: This is overly conservative.
- if (!SI->isUnordered())
+ if (!SI->isUnordered()) {
+ if (!QueryInst)
+ return MemDepResult::getClobber(SI);
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+ if (!QueryLI->isSimple())
+ return MemDepResult::getClobber(SI);
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+ if (!QuerySI->isSimple())
+ return MemDepResult::getClobber(SI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(SI);
+ }
+
+ if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
+ return MemDepResult::getClobber(SI);
+ }
+
+ // FIXME: this is overly conservative.
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses can for example be reordered
+ // with volatile accesses.
+ if (SI->isVolatile())
return MemDepResult::getClobber(SI);
// If alias analysis can tell that this store is guaranteed to not modify
@@ -685,7 +776,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.pop_back();
// Already processed this block?
- if (!Visited.insert(DirtyBB))
+ if (!Visited.insert(DirtyBB).second)
continue;
// Do a binary search to see if we already have an entry for this block in
@@ -775,7 +866,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
"Can't get pointer deps of a non-pointer!");
Result.clear();
- PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL);
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AT);
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
@@ -861,7 +952,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
return Dep;
}
-/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
/// number of elements in the array that are already properly ordered. This is
/// optimized for the case when only a few entries are added.
static void
@@ -922,10 +1013,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Set up a temporary NLPI value. If the map doesn't yet have an entry for
// CacheKey, this value will be inserted as the associated value. Otherwise,
// it'll be ignored, and we'll have to check to see if the cached size and
- // tbaa tag are consistent with the current query.
+ // aa tags are consistent with the current query.
NonLocalPointerInfo InitialNLPI;
InitialNLPI.Size = Loc.Size;
- InitialNLPI.TBAATag = Loc.TBAATag;
+ InitialNLPI.AATags = Loc.AATags;
// Get the NLPI for CacheKey, inserting one into the map if it doesn't
// already have one.
@@ -955,21 +1046,21 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SkipFirstBlock);
}
- // If the query's TBAATag is inconsistent with the cached one,
+ // If the query's AATags are inconsistent with the cached one,
// conservatively throw out the cached data and restart the query with
// no tag if needed.
- if (CacheInfo->TBAATag != Loc.TBAATag) {
- if (CacheInfo->TBAATag) {
+ if (CacheInfo->AATags != Loc.AATags) {
+ if (CacheInfo->AATags) {
CacheInfo->Pair = BBSkipFirstBlockPair();
- CacheInfo->TBAATag = nullptr;
+ CacheInfo->AATags = AAMDNodes();
for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
if (Instruction *Inst = DI->getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
CacheInfo->NonLocalDeps.clear();
}
- if (Loc.TBAATag)
- return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+ if (Loc.AATags)
+ return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(),
isLoad, StartBB, Result, Visited,
SkipFirstBlock);
}
@@ -1045,6 +1136,25 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
+ // If we do process a large number of blocks it becomes very expensive and
+ // likely it isn't worth worrying about
+ if (Result.size() > NumResultsLimit) {
+ Worklist.clear();
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the
+ // cache value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ NumSortedEntries = Cache->size();
+ }
+ // Since we bail out, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set". Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ return true;
+ }
+
// Skip the first block if we have it.
if (!SkipFirstBlock) {
// Analyze the dependency of *Pointer in FromBB. See if we already have
@@ -1369,14 +1479,11 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
// RemInst can't be the terminator if it has local stuff depending on it.
- assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
"Nothing can locally depend on a terminator");
- for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
- E = ReverseDeps.end(); I != E; ++I) {
- Instruction *InstDependingOnRemInst = *I;
+ for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
assert(InstDependingOnRemInst != RemInst &&
"Already removed our local dep info");
@@ -1402,12 +1509,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseNonLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
- for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
- I != E; ++I) {
- assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+ for (Instruction *I : ReverseDepIt->second) {
+ assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
- PerInstNLInfo &INLD = NonLocalDeps[*I];
+ PerInstNLInfo &INLD = NonLocalDeps[I];
// The information is now dirty!
INLD.second = true;
@@ -1419,7 +1524,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
DI->setResult(NewDirtyVal);
if (Instruction *NextI = NewDirtyVal.getInst())
- ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
}
}
@@ -1438,12 +1543,9 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
ReverseNonLocalPtrDeps.find(RemInst);
if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
- SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
- for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
- E = Set.end(); I != E; ++I) {
- ValueIsLoadPair P = *I;
+ for (ValueIsLoadPair P : ReversePtrDepIt->second) {
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
@@ -1484,8 +1586,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
DEBUG(verifyRemoved(RemInst));
}
/// verifyRemoved - Verify that the specified instruction does not occur
-/// in our internal data structures.
+/// in our internal data structures. This function verifies by asserting in
+/// debug builds.
void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+#ifndef NDEBUG
for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
E = LocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
@@ -1514,18 +1618,16 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
E = ReverseLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
E = ReverseNonLocalDeps.end();
I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseNonLocalPtrDepTy::const_iterator
@@ -1533,11 +1635,10 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in rev NLPD map");
- for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
- E = I->second.end(); II != E; ++II)
- assert(*II != ValueIsLoadPair(D, false) &&
- *II != ValueIsLoadPair(D, true) &&
+ for (ValueIsLoadPair P : I->second)
+ assert(P != ValueIsLoadPair(D, false) &&
+ P != ValueIsLoadPair(D, true) &&
"Inst occurs in ReverseNonLocalPtrDeps map");
}
-
+#endif
}
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 139fa38..c214d3c 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -57,8 +57,9 @@ namespace {
Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
ModRefResult &Mask) override {
Mask = ModRef;
- return Location(CS.getArgument(ArgIdx), UnknownSize,
- CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa));
+ AAMDNodes AATags;
+ CS->getAAMetadata(AATags);
+ return Location(CS.getArgument(ArgIdx), UnknownSize, AATags);
}
ModRefResult getModRefInfo(ImmutableCallSite CS,
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index bfe8642..b3d060a 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT)) {
+ if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AT)) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
@@ -283,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
}
// See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT)) {
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AT)) {
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
@@ -369,7 +369,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
SmallVectorImpl<Instruction*> &NewInsts) {
// See if we have a version of this value already available and dominating
// PredBB. If so, there is no need to insert a new instance of it.
- PHITransAddr Tmp(InVal, DL);
+ PHITransAddr Tmp(InVal, DL, AT);
if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
return Tmp.getAddr();
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
index 1b0f359..68c7535 100644
--- a/lib/Analysis/PtrUseVisitor.cpp
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) {
for (Use &U : I.uses()) {
- if (VisitedUses.insert(&U)) {
+ if (VisitedUses.insert(&U).second) {
UseToVisit NewU = {
UseToVisit::UseAndIsOffsetKnownPair(&U, IsOffsetKnown),
Offset
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 7f88ae1..08ebf0d 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -25,21 +26,26 @@ using namespace llvm;
#define DEBUG_TYPE "region"
+namespace llvm {
+template class RegionBase<RegionTraits<Function>>;
+template class RegionNodeBase<RegionTraits<Function>>;
+template class RegionInfoBase<RegionTraits<Function>>;
+}
+
+STATISTIC(numRegions, "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
// Always verify if expensive checking is enabled.
-#ifdef XDEBUG
-static bool VerifyRegionInfo = true;
-#else
-static bool VerifyRegionInfo = false;
-#endif
static cl::opt<bool,true>
-VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
- cl::desc("Verify region info (time consuming)"));
+VerifyRegionInfoX(
+ "verify-region-info",
+ cl::location(RegionInfoBase<RegionTraits<Function>>::VerifyRegionInfo),
+ cl::desc("Verify region info (time consuming)"));
-STATISTIC(numRegions, "The # of regions");
-STATISTIC(numSimpleRegions, "The # of simple regions");
-static cl::opt<enum Region::PrintStyle> printStyle("print-region-style",
+static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
+ cl::location(RegionInfo::printStyle),
cl::Hidden,
cl::desc("style of printing regions"),
cl::values(
@@ -49,812 +55,110 @@ static cl::opt<enum Region::PrintStyle> printStyle("print-region-style",
clEnumValN(Region::PrintRN, "rn",
"print regions in detail with element_iterator"),
clEnumValEnd));
-//===----------------------------------------------------------------------===//
-/// Region Implementation
-Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
- DominatorTree *dt, Region *Parent)
- : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
-
-Region::~Region() {
- // Free the cached nodes.
- for (BBNodeMapT::iterator it = BBNodeMap.begin(),
- ie = BBNodeMap.end(); it != ie; ++it)
- delete it->second;
-
- // Only clean the cache for this Region. Caches of child Regions will be
- // cleaned when the child Regions are deleted.
- BBNodeMap.clear();
-}
-
-void Region::replaceEntry(BasicBlock *BB) {
- entry.setPointer(BB);
-}
-
-void Region::replaceExit(BasicBlock *BB) {
- assert(exit && "No exit to replace!");
- exit = BB;
-}
-
-void Region::replaceEntryRecursive(BasicBlock *NewEntry) {
- std::vector<Region *> RegionQueue;
- BasicBlock *OldEntry = getEntry();
-
- RegionQueue.push_back(this);
- while (!RegionQueue.empty()) {
- Region *R = RegionQueue.back();
- RegionQueue.pop_back();
-
- R->replaceEntry(NewEntry);
- for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
- if ((*RI)->getEntry() == OldEntry)
- RegionQueue.push_back(RI->get());
- }
-}
-
-void Region::replaceExitRecursive(BasicBlock *NewExit) {
- std::vector<Region *> RegionQueue;
- BasicBlock *OldExit = getExit();
-
- RegionQueue.push_back(this);
- while (!RegionQueue.empty()) {
- Region *R = RegionQueue.back();
- RegionQueue.pop_back();
-
- R->replaceExit(NewExit);
- for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
- if ((*RI)->getExit() == OldExit)
- RegionQueue.push_back(RI->get());
- }
-}
-
-bool Region::contains(const BasicBlock *B) const {
- BasicBlock *BB = const_cast<BasicBlock*>(B);
-
- if (!DT->getNode(BB))
- return false;
-
- BasicBlock *entry = getEntry(), *exit = getExit();
-
- // Toplevel region.
- if (!exit)
- return true;
-
- return (DT->dominates(entry, BB)
- && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
-}
-
-bool Region::contains(const Loop *L) const {
- // BBs that are not part of any loop are element of the Loop
- // described by the NULL pointer. This loop is not part of any region,
- // except if the region describes the whole function.
- if (!L)
- return getExit() == nullptr;
-
- if (!contains(L->getHeader()))
- return false;
-
- SmallVector<BasicBlock *, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
-
- for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
- BE = ExitingBlocks.end(); BI != BE; ++BI)
- if (!contains(*BI))
- return false;
-
- return true;
-}
-
-Loop *Region::outermostLoopInRegion(Loop *L) const {
- if (!contains(L))
- return nullptr;
-
- while (L && contains(L->getParentLoop())) {
- L = L->getParentLoop();
- }
-
- return L;
-}
-
-Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
- assert(LI && BB && "LI and BB cannot be null!");
- Loop *L = LI->getLoopFor(BB);
- return outermostLoopInRegion(L);
-}
-
-BasicBlock *Region::getEnteringBlock() const {
- BasicBlock *entry = getEntry();
- BasicBlock *Pred;
- BasicBlock *enteringBlock = nullptr;
-
- for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
- ++PI) {
- Pred = *PI;
- if (DT->getNode(Pred) && !contains(Pred)) {
- if (enteringBlock)
- return nullptr;
-
- enteringBlock = Pred;
- }
- }
-
- return enteringBlock;
-}
-
-BasicBlock *Region::getExitingBlock() const {
- BasicBlock *exit = getExit();
- BasicBlock *Pred;
- BasicBlock *exitingBlock = nullptr;
-
- if (!exit)
- return nullptr;
-
- for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
- ++PI) {
- Pred = *PI;
- if (contains(Pred)) {
- if (exitingBlock)
- return nullptr;
-
- exitingBlock = Pred;
- }
- }
-
- return exitingBlock;
-}
-
-bool Region::isSimple() const {
- return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
-}
-
-std::string Region::getNameStr() const {
- std::string exitName;
- std::string entryName;
-
- if (getEntry()->getName().empty()) {
- raw_string_ostream OS(entryName);
-
- getEntry()->printAsOperand(OS, false);
- } else
- entryName = getEntry()->getName();
-
- if (getExit()) {
- if (getExit()->getName().empty()) {
- raw_string_ostream OS(exitName);
-
- getExit()->printAsOperand(OS, false);
- } else
- exitName = getExit()->getName();
- } else
- exitName = "<Function Return>";
-
- return entryName + " => " + exitName;
-}
-
-void Region::verifyBBInRegion(BasicBlock *BB) const {
- if (!contains(BB))
- llvm_unreachable("Broken region found!");
-
- BasicBlock *entry = getEntry(), *exit = getExit();
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (!contains(*SI) && exit != *SI)
- llvm_unreachable("Broken region found!");
-
- if (entry != BB)
- for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
- if (!contains(*SI))
- llvm_unreachable("Broken region found!");
-}
-
-void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
- BasicBlock *exit = getExit();
-
- visited->insert(BB);
-
- verifyBBInRegion(BB);
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (*SI != exit && visited->find(*SI) == visited->end())
- verifyWalk(*SI, visited);
-}
-
-void Region::verifyRegion() const {
- // Only do verification when user wants to, otherwise this expensive
- // check will be invoked by PassManager.
- if (!VerifyRegionInfo) return;
-
- std::set<BasicBlock*> visited;
- verifyWalk(getEntry(), &visited);
-}
-
-void Region::verifyRegionNest() const {
- for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
- (*RI)->verifyRegionNest();
-
- verifyRegion();
-}
-
-Region::element_iterator Region::element_begin() {
- return GraphTraits<Region*>::nodes_begin(this);
-}
-
-Region::element_iterator Region::element_end() {
- return GraphTraits<Region*>::nodes_end(this);
-}
-
-Region::const_element_iterator Region::element_begin() const {
- return GraphTraits<const Region*>::nodes_begin(this);
-}
-
-Region::const_element_iterator Region::element_end() const {
- return GraphTraits<const Region*>::nodes_end(this);
-}
-
-Region* Region::getSubRegionNode(BasicBlock *BB) const {
- Region *R = RI->getRegionFor(BB);
-
- if (!R || R == this)
- return nullptr;
-
- // If we pass the BB out of this region, that means our code is broken.
- assert(contains(R) && "BB not in current region!");
-
- while (contains(R->getParent()) && R->getParent() != this)
- R = R->getParent();
-
- if (R->getEntry() != BB)
- return nullptr;
-
- return R;
-}
-
-RegionNode* Region::getBBNode(BasicBlock *BB) const {
- assert(contains(BB) && "Can get BB node out of this region!");
-
- BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
-
- if (at != BBNodeMap.end())
- return at->second;
-
- RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
- BBNodeMap.insert(std::make_pair(BB, NewNode));
- return NewNode;
-}
-
-RegionNode* Region::getNode(BasicBlock *BB) const {
- assert(contains(BB) && "Can get BB node out of this region!");
- if (Region* Child = getSubRegionNode(BB))
- return Child->getNode();
-
- return getBBNode(BB);
-}
-
-void Region::transferChildrenTo(Region *To) {
- for (iterator I = begin(), E = end(); I != E; ++I) {
- (*I)->parent = To;
- To->children.push_back(std::move(*I));
- }
- children.clear();
-}
-
-void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
- assert(!SubRegion->parent && "SubRegion already has a parent!");
- assert(std::find_if(begin(), end(), [&](const std::unique_ptr<Region> &R) {
- return R.get() == SubRegion;
- }) == children.end() &&
- "Subregion already exists!");
-
- SubRegion->parent = this;
- children.push_back(std::unique_ptr<Region>(SubRegion));
-
- if (!moveChildren)
- return;
-
- assert(SubRegion->children.size() == 0
- && "SubRegions that contain children are not supported");
-
- for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
- if (!(*I)->isSubRegion()) {
- BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
-
- if (SubRegion->contains(BB))
- RI->setRegionFor(BB, SubRegion);
- }
-
- std::vector<std::unique_ptr<Region>> Keep;
- for (iterator I = begin(), E = end(); I != E; ++I)
- if (SubRegion->contains(I->get()) && I->get() != SubRegion) {
- (*I)->parent = SubRegion;
- SubRegion->children.push_back(std::move(*I));
- } else
- Keep.push_back(std::move(*I));
-
- children.clear();
- children.insert(children.begin(),
- std::move_iterator<RegionSet::iterator>(Keep.begin()),
- std::move_iterator<RegionSet::iterator>(Keep.end()));
-}
-
-
-Region *Region::removeSubRegion(Region *Child) {
- assert(Child->parent == this && "Child is not a child of this region!");
- Child->parent = nullptr;
- RegionSet::iterator I = std::find_if(
- children.begin(), children.end(),
- [&](const std::unique_ptr<Region> &R) { return R.get() == Child; });
- assert(I != children.end() && "Region does not exit. Unable to remove.");
- children.erase(children.begin()+(I-begin()));
- return Child;
-}
-
-unsigned Region::getDepth() const {
- unsigned Depth = 0;
-
- for (Region *R = parent; R != nullptr; R = R->parent)
- ++Depth;
-
- return Depth;
-}
-Region *Region::getExpandedRegion() const {
- unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
- if (NumSuccessors == 0)
- return nullptr;
-
- for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
- PI != PE; ++PI)
- if (!DT->dominates(getEntry(), *PI))
- return nullptr;
-
- Region *R = RI->getRegionFor(exit);
-
- if (R->getEntry() != exit) {
- if (exit->getTerminator()->getNumSuccessors() == 1)
- return new Region(getEntry(), *succ_begin(exit), RI, DT);
- else
- return nullptr;
- }
-
- while (R->getParent() && R->getParent()->getEntry() == exit)
- R = R->getParent();
-
- if (!DT->dominates(getEntry(), R->getExit()))
- for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
- PI != PE; ++PI)
- if (!DT->dominates(R->getExit(), *PI))
- return nullptr;
-
- return new Region(getEntry(), R->getExit(), RI, DT);
-}
-
-void Region::print(raw_ostream &OS, bool print_tree, unsigned level,
- enum PrintStyle Style) const {
- if (print_tree)
- OS.indent(level*2) << "[" << level << "] " << getNameStr();
- else
- OS.indent(level*2) << getNameStr();
-
- OS << "\n";
-
-
- if (Style != PrintNone) {
- OS.indent(level*2) << "{\n";
- OS.indent(level*2 + 2);
-
- if (Style == PrintBB) {
- for (const auto &BB : blocks())
- OS << BB->getName() << ", "; // TODO: remove the last ","
- } else if (Style == PrintRN) {
- for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
- OS << **I << ", "; // TODO: remove the last ",
- }
-
- OS << "\n";
- }
+//===----------------------------------------------------------------------===//
+// Region implementation
+//
- if (print_tree)
- for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
- (*RI)->print(OS, print_tree, level+1, Style);
+Region::Region(BasicBlock *Entry, BasicBlock *Exit,
+ RegionInfo* RI,
+ DominatorTree *DT, Region *Parent) :
+ RegionBase<RegionTraits<Function>>(Entry, Exit, RI, DT, Parent) {
- if (Style != PrintNone)
- OS.indent(level*2) << "} \n";
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void Region::dump() const {
- print(dbgs(), true, getDepth(), printStyle.getValue());
-}
-#endif
-
-void Region::clearNodeCache() {
- // Free the cached nodes.
- for (BBNodeMapT::iterator I = BBNodeMap.begin(),
- IE = BBNodeMap.end(); I != IE; ++I)
- delete I->second;
-
- BBNodeMap.clear();
- for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
- (*RI)->clearNodeCache();
-}
+Region::~Region() { }
//===----------------------------------------------------------------------===//
// RegionInfo implementation
//
-bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
- BasicBlock *exit) const {
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (DT->dominates(entry, P) && !DT->dominates(exit, P))
- return false;
- }
- return true;
-}
-
-bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
- assert(entry && exit && "entry and exit must not be null!");
- typedef DominanceFrontier::DomSetType DST;
-
- DST *entrySuccs = &DF->find(entry)->second;
-
- // Exit is the header of a loop that contains the entry. In this case,
- // the dominance frontier must only contain the exit.
- if (!DT->dominates(entry, exit)) {
- for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
- SI != SE; ++SI)
- if (*SI != exit && *SI != entry)
- return false;
-
- return true;
- }
-
- DST *exitSuccs = &DF->find(exit)->second;
-
- // Do not allow edges leaving the region.
- for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
- SI != SE; ++SI) {
- if (*SI == exit || *SI == entry)
- continue;
- if (exitSuccs->find(*SI) == exitSuccs->end())
- return false;
- if (!isCommonDomFrontier(*SI, entry, exit))
- return false;
- }
-
- // Do not allow edges pointing into the region.
- for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
- SI != SE; ++SI)
- if (DT->properlyDominates(entry, *SI) && *SI != exit)
- return false;
+RegionInfo::RegionInfo() :
+ RegionInfoBase<RegionTraits<Function>>() {
-
- return true;
-}
-
-void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
- BBtoBBMap *ShortCut) const {
- assert(entry && exit && "entry and exit must not be null!");
-
- BBtoBBMap::iterator e = ShortCut->find(exit);
-
- if (e == ShortCut->end())
- // No further region at exit available.
- (*ShortCut)[entry] = exit;
- else {
- // We found a region e that starts at exit. Therefore (entry, e->second)
- // is also a region, that is larger than (entry, exit). Insert the
- // larger one.
- BasicBlock *BB = e->second;
- (*ShortCut)[entry] = BB;
- }
}
-DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
- BBtoBBMap *ShortCut) const {
- BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
-
- if (e == ShortCut->end())
- return N->getIDom();
+RegionInfo::~RegionInfo() {
- return PDT->getNode(e->second)->getIDom();
-}
-
-bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
- assert(entry && exit && "entry and exit must not be null!");
-
- unsigned num_successors = succ_end(entry) - succ_begin(entry);
-
- if (num_successors <= 1 && exit == *(succ_begin(entry)))
- return true;
-
- return false;
}
void RegionInfo::updateStatistics(Region *R) {
++numRegions;
// TODO: Slow. Should only be enabled if -stats is used.
- if (R->isSimple()) ++numSimpleRegions;
-}
-
-Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
- assert(entry && exit && "entry and exit must not be null!");
-
- if (isTrivialRegion(entry, exit))
- return nullptr;
-
- Region *region = new Region(entry, exit, this, DT);
- BBtoRegion.insert(std::make_pair(entry, region));
-
- #ifdef XDEBUG
- region->verifyRegion();
- #else
- DEBUG(region->verifyRegion());
- #endif
-
- updateStatistics(region);
- return region;
-}
-
-void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
- assert(entry);
-
- DomTreeNode *N = PDT->getNode(entry);
-
- if (!N)
- return;
-
- Region *lastRegion= nullptr;
- BasicBlock *lastExit = entry;
-
- // As only a BasicBlock that postdominates entry can finish a region, walk the
- // post dominance tree upwards.
- while ((N = getNextPostDom(N, ShortCut))) {
- BasicBlock *exit = N->getBlock();
-
- if (!exit)
- break;
-
- if (isRegion(entry, exit)) {
- Region *newRegion = createRegion(entry, exit);
-
- if (lastRegion)
- newRegion->addSubRegion(lastRegion);
-
- lastRegion = newRegion;
- lastExit = exit;
- }
-
- // This can never be a region, so stop the search.
- if (!DT->dominates(entry, exit))
- break;
- }
-
- // Tried to create regions from entry to lastExit. Next time take a
- // shortcut from entry to lastExit.
- if (lastExit != entry)
- insertShortCut(entry, lastExit, ShortCut);
+ if (R->isSimple())
+ ++numSimpleRegions;
}
-void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
- BasicBlock *entry = &(F.getEntryBlock());
- DomTreeNode *N = DT->getNode(entry);
-
- // Iterate over the dominance tree in post order to start with the small
- // regions from the bottom of the dominance tree. If the small regions are
- // detected first, detection of bigger regions is faster, as we can jump
- // over the small regions.
- for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
- ++FI) {
- findRegionsWithEntry(FI->getBlock(), ShortCut);
- }
-}
+void RegionInfo::recalculate(Function &F, DominatorTree *DT_,
+ PostDominatorTree *PDT_, DominanceFrontier *DF_) {
+ DT = DT_;
+ PDT = PDT_;
+ DF = DF_;
-Region *RegionInfo::getTopMostParent(Region *region) {
- while (region->parent)
- region = region->getParent();
-
- return region;
+ TopLevelRegion = new Region(&F.getEntryBlock(), nullptr,
+ this, DT, nullptr);
+ updateStatistics(TopLevelRegion);
+ calculate(F);
}
-void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
- BasicBlock *BB = N->getBlock();
-
- // Passed region exit
- while (BB == region->getExit())
- region = region->getParent();
-
- BBtoRegionMap::iterator it = BBtoRegion.find(BB);
-
- // This basic block is a start block of a region. It is already in the
- // BBtoRegion relation. Only the child basic blocks have to be updated.
- if (it != BBtoRegion.end()) {
- Region *newRegion = it->second;
- region->addSubRegion(getTopMostParent(newRegion));
- region = newRegion;
- } else {
- BBtoRegion[BB] = region;
- }
+//===----------------------------------------------------------------------===//
+// RegionInfoPass implementation
+//
- for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
- buildRegionsTree(*CI, region);
+RegionInfoPass::RegionInfoPass() : FunctionPass(ID) {
+ initializeRegionInfoPassPass(*PassRegistry::getPassRegistry());
}
-void RegionInfo::releaseMemory() {
- BBtoRegion.clear();
- if (TopLevelRegion)
- delete TopLevelRegion;
- TopLevelRegion = nullptr;
-}
+RegionInfoPass::~RegionInfoPass() {
-RegionInfo::RegionInfo() : FunctionPass(ID) {
- initializeRegionInfoPass(*PassRegistry::getPassRegistry());
- TopLevelRegion = nullptr;
}
-RegionInfo::~RegionInfo() {
+bool RegionInfoPass::runOnFunction(Function &F) {
releaseMemory();
-}
-void RegionInfo::Calculate(Function &F) {
- // ShortCut a function where for every BB the exit of the largest region
- // starting with BB is stored. These regions can be threated as single BBS.
- // This improves performance on linear CFGs.
- BBtoBBMap ShortCut;
+ auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto PDT = &getAnalysis<PostDominatorTree>();
+ auto DF = &getAnalysis<DominanceFrontier>();
- scanForRegions(F, &ShortCut);
- BasicBlock *BB = &F.getEntryBlock();
- buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+ RI.recalculate(F, DT, PDT, DF);
+ return false;
}
-bool RegionInfo::runOnFunction(Function &F) {
- releaseMemory();
-
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PDT = &getAnalysis<PostDominatorTree>();
- DF = &getAnalysis<DominanceFrontier>();
-
- TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, this, DT, nullptr);
- updateStatistics(TopLevelRegion);
-
- Calculate(F);
+void RegionInfoPass::releaseMemory() {
+ RI.releaseMemory();
+}
- return false;
+void RegionInfoPass::verifyAnalysis() const {
+ RI.verifyAnalysis();
}
-void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+void RegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTree>();
AU.addRequired<DominanceFrontier>();
}
-void RegionInfo::print(raw_ostream &OS, const Module *) const {
- OS << "Region tree:\n";
- TopLevelRegion->print(OS, true, 0, printStyle.getValue());
- OS << "End region tree\n";
-}
-
-void RegionInfo::verifyAnalysis() const {
- // Only do verification when user wants to, otherwise this expensive check
- // will be invoked by PMDataManager::verifyPreservedAnalysis when
- // a regionpass (marked PreservedAll) finish.
- if (!VerifyRegionInfo) return;
-
- TopLevelRegion->verifyRegionNest();
-}
-
-// Region pass manager support.
-Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
- BBtoRegionMap::const_iterator I=
- BBtoRegion.find(BB);
- return I != BBtoRegion.end() ? I->second : nullptr;
-}
-
-void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
- BBtoRegion[BB] = R;
-}
-
-Region *RegionInfo::operator[](BasicBlock *BB) const {
- return getRegionFor(BB);
+void RegionInfoPass::print(raw_ostream &OS, const Module *) const {
+ RI.print(OS);
}
-BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
- BasicBlock *Exit = nullptr;
-
- while (true) {
- // Get largest region that starts at BB.
- Region *R = getRegionFor(BB);
- while (R && R->getParent() && R->getParent()->getEntry() == BB)
- R = R->getParent();
-
- // Get the single exit of BB.
- if (R && R->getEntry() == BB)
- Exit = R->getExit();
- else if (++succ_begin(BB) == succ_end(BB))
- Exit = *succ_begin(BB);
- else // No single exit exists.
- return Exit;
-
- // Get largest region that starts at Exit.
- Region *ExitR = getRegionFor(Exit);
- while (ExitR && ExitR->getParent()
- && ExitR->getParent()->getEntry() == Exit)
- ExitR = ExitR->getParent();
-
- for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
- ++PI)
- if (!R->contains(*PI) && !ExitR->contains(*PI))
- break;
-
- // This stops infinite cycles.
- if (DT->dominates(Exit, BB))
- break;
-
- BB = Exit;
- }
-
- return Exit;
-}
-
-Region*
-RegionInfo::getCommonRegion(Region *A, Region *B) const {
- assert (A && B && "One of the Regions is NULL");
-
- if (A->contains(B)) return A;
-
- while (!B->contains(A))
- B = B->getParent();
-
- return B;
-}
-
-Region*
-RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
- Region* ret = Regions.back();
- Regions.pop_back();
-
- for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
- E = Regions.end(); I != E; ++I)
- ret = getCommonRegion(ret, *I);
-
- return ret;
-}
-
-Region*
-RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
- Region* ret = getRegionFor(BBs.back());
- BBs.pop_back();
-
- for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
- E = BBs.end(); I != E; ++I)
- ret = getCommonRegion(ret, getRegionFor(*I));
-
- return ret;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void RegionInfoPass::dump() const {
+ RI.dump();
}
+#endif
-void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
-{
- Region *R = getRegionFor(OldBB);
-
- setRegionFor(NewBB, R);
-
- while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
- R->replaceEntry(NewBB);
- R = R->getParent();
- }
-
- setRegionFor(OldBB, R);
-}
+char RegionInfoPass::ID = 0;
-char RegionInfo::ID = 0;
-INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+INITIALIZE_PASS_BEGIN(RegionInfoPass, "regions",
"Detect single entry single exit regions", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
-INITIALIZE_PASS_END(RegionInfo, "regions",
+INITIALIZE_PASS_END(RegionInfoPass, "regions",
"Detect single entry single exit regions", true, true)
// Create methods available outside of this file, to use them
@@ -863,7 +167,7 @@ INITIALIZE_PASS_END(RegionInfo, "regions",
namespace llvm {
FunctionPass *createRegionInfoPass() {
- return new RegionInfo();
+ return new RegionInfoPass();
}
}
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 71de144..de34b72 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -45,14 +45,14 @@ static void addRegionIntoQueue(Region &R, std::deque<Region *> &RQ) {
/// Pass Manager itself does not invalidate any analysis info.
void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
- Info.addRequired<RegionInfo>();
+ Info.addRequired<RegionInfoPass>();
Info.setPreservesAll();
}
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the function, and if so, return true.
bool RGPassManager::runOnFunction(Function &F) {
- RI = &getAnalysis<RegionInfo>();
+ RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
bool Changed = false;
// Collect inherited analysis from Module level pass manager.
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 893210a..ad83113 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -56,23 +56,24 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
};
template<>
-struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
- DOTGraphTraits (bool isSimple=false)
+ DOTGraphTraits (bool isSimple = false)
: DOTGraphTraits<RegionNode*>(isSimple) {}
- static std::string getGraphName(RegionInfo *DT) {
+ static std::string getGraphName(RegionInfoPass *DT) {
return "Region Graph";
}
- std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) {
+ RegionInfo &RI = G->getRegionInfo();
return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
- G->getTopLevelRegion());
+ reinterpret_cast<RegionNode*>(RI.getTopLevelRegion()));
}
std::string getEdgeAttributes(RegionNode *srcNode,
- GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) {
-
+ GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfoPass *G) {
+ RegionInfo &RI = G->getRegionInfo();
RegionNode *destNode = *CI;
if (srcNode->isSubRegion() || destNode->isSubRegion())
@@ -82,7 +83,7 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
- Region *R = RI->getRegionFor(destBB);
+ Region *R = RI.getRegionFor(destBB);
while (R && R->getParent())
if (R->getParent()->getEntry() == destBB)
@@ -98,7 +99,8 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
// Print the cluster of the subregions. This groups the single basic blocks
// and adds a different background color for each group.
- static void printRegionCluster(const Region &R, GraphWriter<RegionInfo*> &GW,
+ static void printRegionCluster(const Region &R,
+ GraphWriter<RegionInfoPass*> &GW,
unsigned depth = 0) {
raw_ostream &O = GW.getOStream();
O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R)
@@ -119,22 +121,23 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI)
printRegionCluster(**RI, GW, depth + 1);
- RegionInfo *RI = R.getRegionInfo();
+ const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo());
for (const auto &BB : R.blocks())
- if (RI->getRegionFor(BB) == &R)
+ if (RI.getRegionFor(BB) == &R)
O.indent(2 * (depth + 1)) << "Node"
- << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
+ << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB))
<< ";\n";
O.indent(2 * depth) << "}\n";
}
- static void addCustomGraphFeatures(const RegionInfo* RI,
- GraphWriter<RegionInfo*> &GW) {
+ static void addCustomGraphFeatures(const RegionInfoPass* RIP,
+ GraphWriter<RegionInfoPass*> &GW) {
+ const RegionInfo &RI = RIP->getRegionInfo();
raw_ostream &O = GW.getOStream();
O << "\tcolorscheme = \"paired12\"\n";
- printRegionCluster(*RI->getTopLevelRegion(), GW, 4);
+ printRegionCluster(*RI.getTopLevelRegion(), GW, 4);
}
};
} //end namespace llvm
@@ -142,28 +145,28 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
namespace {
struct RegionViewer
- : public DOTGraphTraitsViewer<RegionInfo, false> {
+ : public DOTGraphTraitsViewer<RegionInfoPass, false> {
static char ID;
- RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+ RegionViewer() : DOTGraphTraitsViewer<RegionInfoPass, false>("reg", ID){
initializeRegionViewerPass(*PassRegistry::getPassRegistry());
}
};
char RegionViewer::ID = 0;
struct RegionOnlyViewer
- : public DOTGraphTraitsViewer<RegionInfo, true> {
+ : public DOTGraphTraitsViewer<RegionInfoPass, true> {
static char ID;
- RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+ RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfoPass, true>("regonly", ID) {
initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
}
};
char RegionOnlyViewer::ID = 0;
struct RegionPrinter
- : public DOTGraphTraitsPrinter<RegionInfo, false> {
+ : public DOTGraphTraitsPrinter<RegionInfoPass, false> {
static char ID;
RegionPrinter() :
- DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+ DOTGraphTraitsPrinter<RegionInfoPass, false>("reg", ID) {
initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
}
};
@@ -175,7 +178,7 @@ INITIALIZE_PASS(RegionPrinter, "dot-regions",
INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
true, true)
-
+
INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
"View regions of function (with no function bodies)",
true, true)
@@ -183,10 +186,10 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
namespace {
struct RegionOnlyPrinter
- : public DOTGraphTraitsPrinter<RegionInfo, true> {
+ : public DOTGraphTraitsPrinter<RegionInfoPass, true> {
static char ID;
RegionOnlyPrinter() :
- DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+ DOTGraphTraitsPrinter<RegionInfoPass, true>("reg", ID) {
initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
}
};
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 06dbde5..68549ef 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1,4 +1,4 @@
-//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -59,9 +59,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -78,6 +80,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -113,6 +116,7 @@ VerifySCEV("verify-scev",
INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
@@ -671,7 +675,321 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
}
}
+static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::srem(A, B);
+}
+
+static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.sext(ABW);
+ else if (ABW < BBW)
+ A = A.sext(BBW);
+
+ return APIntOps::sdiv(A, B);
+}
+
+static const APInt urem(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::urem(A, B);
+}
+
+static const APInt udiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::udiv(A, B);
+}
+
+namespace {
+struct FindSCEVSize {
+ int Size;
+ FindSCEVSize() : Size(0) {}
+
+ bool follow(const SCEV *S) {
+ ++Size;
+ // Keep looking at all operands of S.
+ return true;
+ }
+ bool isDone() const {
+ return false;
+ }
+};
+}
+
+// Returns the size of the SCEV S.
+static inline int sizeOfSCEV(const SCEV *S) {
+ FindSCEVSize F;
+ SCEVTraversal<FindSCEVSize> ST(F);
+ ST.visitAll(S);
+ return F.Size;
+}
+namespace {
+
+template <typename Derived>
+struct SCEVDivision : public SCEVVisitor<Derived, void> {
+public:
+ // Computes the Quotient and Remainder of the division of Numerator by
+ // Denominator.
+ static void divide(ScalarEvolution &SE, const SCEV *Numerator,
+ const SCEV *Denominator, const SCEV **Quotient,
+ const SCEV **Remainder) {
+ assert(Numerator && Denominator && "Uninitialized SCEV");
+
+ Derived D(SE, Numerator, Denominator);
+
+ // Check for the trivial case here to avoid having to check for it in the
+ // rest of the code.
+ if (Numerator == Denominator) {
+ *Quotient = D.One;
+ *Remainder = D.Zero;
+ return;
+ }
+
+ if (Numerator->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = D.Zero;
+ return;
+ }
+
+ // Split the Denominator when it is a product.
+ if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
+ const SCEV *Q, *R;
+ *Quotient = Numerator;
+ for (const SCEV *Op : T->operands()) {
+ divide(SE, *Quotient, Op, &Q, &R);
+ *Quotient = Q;
+
+ // Bail out when the Numerator is not divisible by one of the terms of
+ // the Denominator.
+ if (!R->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = Numerator;
+ return;
+ }
+ }
+ *Remainder = D.Zero;
+ return;
+ }
+
+ D.visit(Numerator);
+ *Quotient = D.Quotient;
+ *Remainder = D.Remainder;
+ }
+
+ // Except in the trivial case described above, we do not know how to divide
+ // Expr by Denominator for the following functions with empty implementation.
+ void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
+ void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
+ void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
+ void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+ void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+ void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+ void visitUnknown(const SCEVUnknown *Numerator) {}
+ void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+
+ void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
+ const SCEV *StartQ, *StartR, *StepQ, *StepR;
+ assert(Numerator->isAffine() && "Numerator should be affine");
+ divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
+ divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+ Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
+ Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
+ }
+
+ void visitAddExpr(const SCEVAddExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs, Rs;
+ Type *Ty = Denominator->getType();
+
+ for (const SCEV *Op : Numerator->operands()) {
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
+
+ // Bail out if types do not match.
+ if (Ty != Q->getType() || Ty != R->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ Qs.push_back(Q);
+ Rs.push_back(R);
+ }
+
+ if (Qs.size() == 1) {
+ Quotient = Qs[0];
+ Remainder = Rs[0];
+ return;
+ }
+
+ Quotient = SE.getAddExpr(Qs);
+ Remainder = SE.getAddExpr(Rs);
+ }
+
+ void visitMulExpr(const SCEVMulExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs;
+ Type *Ty = Denominator->getType();
+
+ bool FoundDenominatorTerm = false;
+ for (const SCEV *Op : Numerator->operands()) {
+ // Bail out if types do not match.
+ if (Ty != Op->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ if (FoundDenominatorTerm) {
+ Qs.push_back(Op);
+ continue;
+ }
+
+ // Check whether Denominator divides one of the product operands.
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
+ if (!R->isZero()) {
+ Qs.push_back(Op);
+ continue;
+ }
+
+ // Bail out if types do not match.
+ if (Ty != Q->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ FoundDenominatorTerm = true;
+ Qs.push_back(Q);
+ }
+
+ if (FoundDenominatorTerm) {
+ Remainder = Zero;
+ if (Qs.size() == 1)
+ Quotient = Qs[0];
+ else
+ Quotient = SE.getMulExpr(Qs);
+ return;
+ }
+
+ if (!isa<SCEVUnknown>(Denominator)) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ // The Remainder is obtained by replacing Denominator by 0 in Numerator.
+ ValueToValueMap RewriteMap;
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(Zero)->getValue();
+ Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+
+ if (Remainder->isZero()) {
+ // The Quotient is obtained by replacing Denominator by 1 in Numerator.
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(One)->getValue();
+ Quotient =
+ SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+ return;
+ }
+
+ // Quotient is (Numerator - Remainder) divided by Denominator.
+ const SCEV *Q, *R;
+ const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
+ if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
+ // This SCEV does not seem to simplify: fail the division here.
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+ divide(SE, Diff, Denominator, &Q, &R);
+ assert(R == Zero &&
+ "(Numerator - Remainder) should evenly divide Denominator");
+ Quotient = Q;
+ }
+
+private:
+ SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
+ const SCEV *Denominator)
+ : SE(S), Denominator(Denominator) {
+ Zero = SE.getConstant(Denominator->getType(), 0);
+ One = SE.getConstant(Denominator->getType(), 1);
+
+ // By default, we don't know how to divide Expr by Denominator.
+ // Providing the default here simplifies the rest of the code.
+ Quotient = Zero;
+ Remainder = Numerator;
+ }
+
+ ScalarEvolution &SE;
+ const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
+
+ friend struct SCEVSDivision;
+ friend struct SCEVUDivision;
+};
+
+struct SCEVSDivision : public SCEVDivision<SCEVSDivision> {
+ SCEVSDivision(ScalarEvolution &S, const SCEV *Numerator,
+ const SCEV *Denominator)
+ : SCEVDivision(S, Numerator, Denominator) {}
+
+ void visitConstant(const SCEVConstant *Numerator) {
+ if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+ Quotient = SE.getConstant(sdiv(Numerator, D));
+ Remainder = SE.getConstant(srem(Numerator, D));
+ return;
+ }
+ }
+};
+
+struct SCEVUDivision : public SCEVDivision<SCEVUDivision> {
+ SCEVUDivision(ScalarEvolution &S, const SCEV *Numerator,
+ const SCEV *Denominator)
+ : SCEVDivision(S, Numerator, Denominator) {}
+
+ void visitConstant(const SCEVConstant *Numerator) {
+ if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+ Quotient = SE.getConstant(udiv(Numerator, D));
+ Remainder = SE.getConstant(urem(Numerator, D));
+ return;
+ }
+ }
+};
+
+}
//===----------------------------------------------------------------------===//
// Simple SCEV method implementations
@@ -2061,71 +2379,66 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Okay, if there weren't any loop invariants to be folded, check to see if
// there are multiple AddRec's with the same loop induction variable being
// multiplied together. If so, we can fold them.
+
+ // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+ // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+ // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+ // ]]],+,...up to x=2n}.
+ // Note that the arguments to choose() are always integers with values
+ // known at compile time, never SCEV objects.
+ //
+ // The implementation avoids pointless extra computations when the two
+ // addrec's are of different length (mathematically, it's equivalent to
+ // an infinite stream of zeros on the right).
+ bool OpsModified = false;
for (unsigned OtherIdx = Idx+1;
- OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
- if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
+ const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+ if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
continue;
- // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
- // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
- // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
- // ]]],+,...up to x=2n}.
- // Note that the arguments to choose() are always integers with values
- // known at compile time, never SCEV objects.
- //
- // The implementation avoids pointless extra computations when the two
- // addrec's are of different length (mathematically, it's equivalent to
- // an infinite stream of zeros on the right).
- bool OpsModified = false;
- for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
- ++OtherIdx) {
- const SCEVAddRecExpr *OtherAddRec =
- dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
- if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
- continue;
-
- bool Overflow = false;
- Type *Ty = AddRec->getType();
- bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
- SmallVector<const SCEV*, 7> AddRecOps;
- for (int x = 0, xe = AddRec->getNumOperands() +
- OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
- const SCEV *Term = getConstant(Ty, 0);
- for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
- uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
- for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
- ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
- z < ze && !Overflow; ++z) {
- uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
- uint64_t Coeff;
- if (LargerThan64Bits)
- Coeff = umul_ov(Coeff1, Coeff2, Overflow);
- else
- Coeff = Coeff1*Coeff2;
- const SCEV *CoeffTerm = getConstant(Ty, Coeff);
- const SCEV *Term1 = AddRec->getOperand(y-z);
- const SCEV *Term2 = OtherAddRec->getOperand(z);
- Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
- }
+ bool Overflow = false;
+ Type *Ty = AddRec->getType();
+ bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+ SmallVector<const SCEV*, 7> AddRecOps;
+ for (int x = 0, xe = AddRec->getNumOperands() +
+ OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
+ const SCEV *Term = getConstant(Ty, 0);
+ for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+ uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+ for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+ ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+ z < ze && !Overflow; ++z) {
+ uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+ uint64_t Coeff;
+ if (LargerThan64Bits)
+ Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+ else
+ Coeff = Coeff1*Coeff2;
+ const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+ const SCEV *Term1 = AddRec->getOperand(y-z);
+ const SCEV *Term2 = OtherAddRec->getOperand(z);
+ Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
}
- AddRecOps.push_back(Term);
- }
- if (!Overflow) {
- const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
- SCEV::FlagAnyWrap);
- if (Ops.size() == 2) return NewAddRec;
- Ops[Idx] = NewAddRec;
- Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
- OpsModified = true;
- AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
- if (!AddRec)
- break;
}
+ AddRecOps.push_back(Term);
+ }
+ if (!Overflow) {
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+ SCEV::FlagAnyWrap);
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = NewAddRec;
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ OpsModified = true;
+ AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
+ if (!AddRec)
+ break;
}
- if (OpsModified)
- return getMulExpr(Ops);
}
+ if (OpsModified)
+ return getMulExpr(Ops);
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
@@ -3082,7 +3395,8 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
Visited.insert(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -3263,7 +3577,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, DL, TLI, DT))
+ if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AT))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
@@ -3395,7 +3709,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
return Zeros.countTrailingOnes();
}
@@ -3403,6 +3717,31 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
return 0;
}
+/// GetRangeFromMetadata - Helper method to assign a range to V from
+/// metadata present in the IR.
+static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
+ ConstantRange TotalRange(
+ cast<IntegerType>(I->getType())->getBitWidth(), false);
+
+ unsigned NumRanges = MD->getNumOperands() / 2;
+ assert(NumRanges >= 1);
+
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Lower = cast<ConstantInt>(MD->getOperand(2*i + 0));
+ ConstantInt *Upper = cast<ConstantInt>(MD->getOperand(2*i + 1));
+ ConstantRange Range(Lower->getValue(), Upper->getValue());
+ TotalRange = TotalRange.unionWith(Range);
+ }
+
+ return TotalRange;
+ }
+ }
+
+ return None;
+}
+
/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
///
ConstantRange
@@ -3532,9 +3871,14 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // Check if the IR explicitly contains !range metadata.
+ Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+ if (MDRange.hasValue())
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+
// For a SCEVUnknown, ask ValueTracking.
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
if (Ones == ~Zeros + 1)
return setUnsignedRange(U, ConservativeResult);
return setUnsignedRange(U,
@@ -3683,10 +4027,15 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // Check if the IR explicitly contains !range metadata.
+ Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+ if (MDRange.hasValue())
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+
// For a SCEVUnknown, ask ValueTracking.
if (!U->getValue()->getType()->isIntegerTy() && !DL)
return setSignedRange(U, ConservativeResult);
- unsigned NS = ComputeNumSignBits(U->getValue(), DL);
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AT, nullptr, DT);
if (NS <= 1)
return setSignedRange(U, ConservativeResult);
return setSignedRange(U, ConservativeResult.intersectWith(
@@ -3793,7 +4142,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL,
+ 0, AT, nullptr, DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4070,6 +4420,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
+ if (BasicBlock *ExitingBB = L->getExitingBlock())
+ return getSmallConstantTripCount(L, ExitingBB);
+
+ // No trip count information for multiple exits.
+ return 0;
+}
+
/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
/// normal unsigned value. Returns 0 if the trip count is unknown or not
/// constant. Will also return 0 if the maximum trip count is very large (>=
@@ -4080,19 +4438,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
/// before taking the branch. For loops with multiple exits, it may not be the
/// number times that the loop header executes because the loop may exit
/// prematurely via another branch.
-///
-/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
-/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
-/// loop exits. getExitCount() may return an exact count for this branch
-/// assuming no-signed-wrap. The number of well-defined iterations may actually
-/// be higher than this trip count if this exit test is skipped and the loop
-/// exits via a different branch. Ideally, getExitCount() would know whether it
-/// depends on a NSW assumption, and we would only fall back to a conservative
-/// trip count in that case.
-unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+ BasicBlock *ExitingBlock) {
+ assert(ExitingBlock && "Must pass a non-null exiting block!");
+ assert(L->isLoopExiting(ExitingBlock) &&
+ "Exiting block must actually branch out of the loop!");
const SCEVConstant *ExitCount =
- dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
+ dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
if (!ExitCount)
return 0;
@@ -4106,6 +4458,14 @@ getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
return ((unsigned)ExitConst->getZExtValue()) + 1;
}
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
+ if (BasicBlock *ExitingBB = L->getExitingBlock())
+ return getSmallConstantTripMultiple(L, ExitingBB);
+
+ // No trip multiple information for multiple exits.
+ return 0;
+}
+
/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
/// trip count of this loop as a normal unsigned value, if possible. This
/// means that the actual trip count is always a multiple of the returned
@@ -4118,9 +4478,13 @@ getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
///
/// As explained in the comments for getSmallConstantTripCount, this assumes
/// that control exits the loop via ExitingBlock.
-unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) {
- const SCEV *ExitCount = getBackedgeTakenCount(L);
+unsigned
+ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+ BasicBlock *ExitingBlock) {
+ assert(ExitingBlock && "Must pass a non-null exiting block!");
+ assert(L->isLoopExiting(ExitingBlock) &&
+ "Exiting block must actually branch out of the loop!");
+ const SCEV *ExitCount = getExitCount(L, ExitingBlock);
if (ExitCount == getCouldNotCompute())
return 1;
@@ -4230,7 +4594,8 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -4282,7 +4647,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -4316,7 +4682,8 @@ void ScalarEvolution::forgetValue(Value *V) {
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -4467,20 +4834,12 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
// non-exiting iterations. Partition the loop exits into two kinds:
// LoopMustExits and LoopMayExits.
//
- // A LoopMustExit meets two requirements:
- //
- // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit
- // test condition cannot be skipped (the tested variable has unit stride or
- // the test is less-than or greater-than, rather than a strict inequality).
- //
- // (b) It must dominate the loop latch, hence must be tested on every loop
- // iteration.
- //
- // If any computable LoopMustExit is found, then MaxBECount is the minimum
- // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is
- // conservatively the maximum EL.Max, where CouldNotCompute is considered
- // greater than any computable EL.Max.
- if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch &&
+ // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
+ // is a LoopMayExit. If any computable LoopMustExit is found, then
+ // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
+ // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
+ // considered greater than any computable EL.Max.
+ if (EL.Max != getCouldNotCompute() && Latch &&
DT->dominates(ExitBB, Latch)) {
if (!MustExitMaxBECount)
MustExitMaxBECount = EL.Max;
@@ -4567,18 +4926,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
return getCouldNotCompute();
}
+ bool IsOnlyExit = (L->getExitingBlock() != nullptr);
TerminatorInst *Term = ExitingBlock->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
// Proceed to the next level to examine the exit condition expression.
return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
BI->getSuccessor(1),
- /*IsSubExpr=*/false);
+ /*ControlsExit=*/IsOnlyExit);
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
- /*IsSubExpr=*/false);
+ /*ControlsExit=*/IsOnlyExit);
return getCouldNotCompute();
}
@@ -4587,28 +4947,27 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
///
-/// @param IsSubExpr is true if ExitCond does not directly control the exit
-/// branch. In this case, we cannot assume that the loop only exits when the
-/// condition is true and cannot infer that failing to meet the condition prior
-/// to integer wraparound results in undefined behavior.
+/// @param ControlsExit is true if ExitCond directly controls the exit
+/// branch. In this case, we can assume that the loop exits only if the
+/// condition is true and can infer that failing to meet the condition prior to
+/// integer wraparound results in undefined behavior.
ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool IsSubExpr) {
+ bool ControlsExit) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
bool EitherMayExit = L->contains(TBB);
ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- bool MustExit = false;
if (EitherMayExit) {
// Both conditions must be true for the loop to continue executing.
// Choose the less conservative count.
@@ -4623,7 +4982,6 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
else
MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
- MustExit = EL0.MustExit || EL1.MustExit;
} else {
// Both conditions must be true at the same time for the loop to exit.
// For now, be conservative.
@@ -4632,21 +4990,19 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
if (EL0.Exact == EL1.Exact)
BECount = EL0.Exact;
- MustExit = EL0.MustExit && EL1.MustExit;
}
- return ExitLimit(BECount, MaxBECount, MustExit);
+ return ExitLimit(BECount, MaxBECount);
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
bool EitherMayExit = L->contains(FBB);
ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- bool MustExit = false;
if (EitherMayExit) {
// Both conditions must be false for the loop to continue executing.
// Choose the less conservative count.
@@ -4661,7 +5017,6 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
else
MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
- MustExit = EL0.MustExit || EL1.MustExit;
} else {
// Both conditions must be false at the same time for the loop to exit.
// For now, be conservative.
@@ -4670,17 +5025,16 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
if (EL0.Exact == EL1.Exact)
BECount = EL0.Exact;
- MustExit = EL0.MustExit && EL1.MustExit;
}
- return ExitLimit(BECount, MaxBECount, MustExit);
+ return ExitLimit(BECount, MaxBECount);
}
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
+ return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4707,7 +5061,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool IsSubExpr) {
+ bool ControlsExit) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
@@ -4759,7 +5113,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -4772,14 +5126,14 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT: { // while (X < Y)
bool IsSigned = Cond == ICmpInst::ICMP_SLT;
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_UGT: { // while (X > Y)
bool IsSigned = Cond == ICmpInst::ICMP_SGT;
- ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
+ ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -4801,7 +5155,7 @@ ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
SwitchInst *Switch,
BasicBlock *ExitingBlock,
- bool IsSubExpr) {
+ bool ControlsExit) {
assert(!L->contains(ExitingBlock) && "Not an exiting block!");
// Give up if the exit is the default dest of a switch.
@@ -4814,7 +5168,7 @@ ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
// while (X != Y) --> while (X-Y != 0)
- ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
if (EL.hasAnyInfo())
return EL;
@@ -5687,7 +6041,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
/// effectively V != 0. We know and take advantage of the fact that this
/// expression only being used in a comparison by zero context.
ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
@@ -5781,37 +6135,30 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
else
MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
: -CR.getUnsignedMin());
- return ExitLimit(Distance, MaxBECount, /*MustExit=*/true);
+ return ExitLimit(Distance, MaxBECount);
}
- // If the recurrence is known not to wraparound, unsigned divide computes the
- // back edge count. (Ideally we would have an "isexact" bit for udiv). We know
- // that the value will either become zero (and thus the loop terminates), that
- // the loop will terminate through some other exit condition first, or that
- // the loop has undefined behavior. This means we can't "miss" the exit
- // value, even with nonunit stride, and exit later via the same branch. Note
- // that we can skip this exit if loop later exits via a different
- // branch. Hence MustExit=false.
- //
- // This is only valid for expressions that directly compute the loop exit. It
- // is invalid for subexpressions in which the loop may exit through this
- // branch even if this subexpression is false. In that case, the trip count
- // computed by this udiv could be smaller than the number of well-defined
- // iterations.
- if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+ // If the step exactly divides the distance then unsigned divide computes the
+ // backedge count.
+ const SCEV *Q, *R;
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+ SCEVUDivision::divide(SE, Distance, Step, &Q, &R);
+ if (R->isZero()) {
const SCEV *Exact =
- getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- return ExitLimit(Exact, Exact, /*MustExit=*/false);
+ getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+ return ExitLimit(Exact, Exact);
}
- // If Step is a power of two that evenly divides Start we know that the loop
- // will always terminate. Start may not be a constant so we just have the
- // number of trailing zeros available. This is safe even in presence of
- // overflow as the recurrence will overflow to exactly 0.
- const APInt &StepV = StepC->getValue()->getValue();
- if (StepV.isPowerOf2() &&
- GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros())
- return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+ // If the condition controls loop exit (the loop exits only if the expression
+ // is true) and the addition is no-wrap we can use unsigned divide to
+ // compute the backedge count. In this case, the step may not divide the
+ // distance, but we don't care because if the condition is "missed" the loop
+ // will have undefined behavior due to wrapping.
+ if (ControlsExit && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+ const SCEV *Exact =
+ getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+ return ExitLimit(Exact, Exact);
+ }
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
@@ -6309,19 +6656,30 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return true;
+ if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return false;
BranchInst *LoopContinuePredicate =
dyn_cast<BranchInst>(Latch->getTerminator());
- if (!LoopContinuePredicate ||
- LoopContinuePredicate->isUnconditional())
- return false;
+ if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
+ isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
+ LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
+ return true;
+
+ // Check conditions due to any @llvm.assume intrinsics.
+ for (auto &CI : AT->assumptions(F)) {
+ if (!DT->dominates(CI, Latch->getTerminator()))
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+ return true;
+ }
- return isImpliedCond(Pred, LHS, RHS,
- LoopContinuePredicate->getCondition(),
- LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+ return false;
}
/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
@@ -6335,6 +6693,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return false;
+ if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
@@ -6355,6 +6715,15 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
return true;
}
+ // Check conditions due to any @llvm.assume intrinsics.
+ for (auto &CI : AT->assumptions(F)) {
+ if (!DT->dominates(CI, L->getHeader()))
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+ return true;
+ }
+
return false;
}
@@ -6469,6 +6838,66 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
RHS, LHS, FoundLHS, FoundRHS);
}
+ // Check if we can make progress by sharpening ranges.
+ if (FoundPred == ICmpInst::ICMP_NE &&
+ (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
+
+ const SCEVConstant *C = nullptr;
+ const SCEV *V = nullptr;
+
+ if (isa<SCEVConstant>(FoundLHS)) {
+ C = cast<SCEVConstant>(FoundLHS);
+ V = FoundRHS;
+ } else {
+ C = cast<SCEVConstant>(FoundRHS);
+ V = FoundLHS;
+ }
+
+ // The guarding predicate tells us that C != V. If the known range
+ // of V is [C, t), we can sharpen the range to [C + 1, t). The
+ // range we consider has to correspond to same signedness as the
+ // predicate we're interested in folding.
+
+ APInt Min = ICmpInst::isSigned(Pred) ?
+ getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
+
+ if (Min == C->getValue()->getValue()) {
+ // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
+ // This is true even if (Min + 1) wraps around -- in case of
+ // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
+
+ APInt SharperMin = Min + 1;
+
+ switch (Pred) {
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE:
+ // We know V `Pred` SharperMin. If this implies LHS `Pred`
+ // RHS, we're done.
+ if (isImpliedCondOperands(Pred, LHS, RHS, V,
+ getConstant(SharperMin)))
+ return true;
+
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT:
+ // We know from the range information that (V `Pred` Min ||
+ // V == Min). We know from the guarding condition that !(V
+ // == Min). This gives us
+ //
+ // V `Pred` Min || V == Min && !(V == Min)
+ // => V `Pred` Min
+ //
+ // If V `Pred` Min implies LHS `Pred` RHS, we're done.
+
+ if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
+ return true;
+
+ default:
+ // No change
+ break;
+ }
+ }
+ }
+
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
@@ -6614,13 +7043,13 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
/// specified less-than comparison will execute. If not computable, return
/// CouldNotCompute.
///
-/// @param IsSubExpr is true when the LHS < RHS condition does not directly
-/// control the branch. In this case, we can only compute an iteration count for
-/// a subexpression that cannot overflow before evaluating true.
+/// @param ControlsExit is true when the LHS < RHS condition directly controls
+/// the branch (loops exits only if condition is true). In this case, we can use
+/// NoWrapFlags to skip overflow checks.
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
- bool IsSubExpr) {
+ bool ControlsExit) {
// We handle only IV < Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
@@ -6631,7 +7060,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- bool NoWrap = !IsSubExpr &&
+ bool NoWrap = ControlsExit &&
IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
const SCEV *Stride = IV->getStepRecurrence(*this);
@@ -6651,9 +7080,19 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
: ICmpInst::ICMP_ULT;
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
- End = IsSigned ? getSMaxExpr(RHS, Start)
- : getUMaxExpr(RHS, Start);
+ if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) {
+ const SCEV *Diff = getMinusSCEV(RHS, Start);
+ // If we have NoWrap set, then we can assume that the increment won't
+ // overflow, in which case if RHS - Start is a constant, we don't need to
+ // do a max operation since we can just figure it out statically
+ if (NoWrap && isa<SCEVConstant>(Diff)) {
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ if (D.isNegative())
+ End = Start;
+ } else
+ End = IsSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+ }
const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
@@ -6684,13 +7123,13 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
+ return ExitLimit(BECount, MaxBECount);
}
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
- bool IsSubExpr) {
+ bool ControlsExit) {
// We handle only IV > Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
@@ -6701,7 +7140,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- bool NoWrap = !IsSubExpr &&
+ bool NoWrap = ControlsExit &&
IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
@@ -6722,9 +7161,19 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
- End = IsSigned ? getSMinExpr(RHS, Start)
- : getUMinExpr(RHS, Start);
+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
+ const SCEV *Diff = getMinusSCEV(RHS, Start);
+ // If we have NoWrap set, then we can assume that the increment won't
+ // overflow, in which case if RHS - Start is a constant, we don't need to
+ // do a max operation since we can just figure it out statically
+ if (NoWrap && isa<SCEVConstant>(Diff)) {
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ if (!D.isNegative())
+ End = Start;
+ } else
+ End = IsSigned ? getSMinExpr(RHS, Start)
+ : getUMinExpr(RHS, Start);
+ }
const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
@@ -6756,7 +7205,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
+ return ExitLimit(BECount, MaxBECount);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6984,268 +7433,6 @@ void SCEVAddRecExpr::collectParametricTerms(
});
}
-static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.sext(ABW);
- else if (ABW < BBW)
- A = A.sext(BBW);
-
- return APIntOps::srem(A, B);
-}
-
-static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.sext(ABW);
- else if (ABW < BBW)
- A = A.sext(BBW);
-
- return APIntOps::sdiv(A, B);
-}
-
-namespace {
-struct FindSCEVSize {
- int Size;
- FindSCEVSize() : Size(0) {}
-
- bool follow(const SCEV *S) {
- ++Size;
- // Keep looking at all operands of S.
- return true;
- }
- bool isDone() const {
- return false;
- }
-};
-}
-
-// Returns the size of the SCEV S.
-static inline int sizeOfSCEV(const SCEV *S) {
- FindSCEVSize F;
- SCEVTraversal<FindSCEVSize> ST(F);
- ST.visitAll(S);
- return F.Size;
-}
-
-namespace {
-
-struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
-public:
- // Computes the Quotient and Remainder of the division of Numerator by
- // Denominator.
- static void divide(ScalarEvolution &SE, const SCEV *Numerator,
- const SCEV *Denominator, const SCEV **Quotient,
- const SCEV **Remainder) {
- assert(Numerator && Denominator && "Uninitialized SCEV");
-
- SCEVDivision D(SE, Numerator, Denominator);
-
- // Check for the trivial case here to avoid having to check for it in the
- // rest of the code.
- if (Numerator == Denominator) {
- *Quotient = D.One;
- *Remainder = D.Zero;
- return;
- }
-
- if (Numerator->isZero()) {
- *Quotient = D.Zero;
- *Remainder = D.Zero;
- return;
- }
-
- // Split the Denominator when it is a product.
- if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
- const SCEV *Q, *R;
- *Quotient = Numerator;
- for (const SCEV *Op : T->operands()) {
- divide(SE, *Quotient, Op, &Q, &R);
- *Quotient = Q;
-
- // Bail out when the Numerator is not divisible by one of the terms of
- // the Denominator.
- if (!R->isZero()) {
- *Quotient = D.Zero;
- *Remainder = Numerator;
- return;
- }
- }
- *Remainder = D.Zero;
- return;
- }
-
- D.visit(Numerator);
- *Quotient = D.Quotient;
- *Remainder = D.Remainder;
- }
-
- SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator)
- : SE(S), Denominator(Denominator) {
- Zero = SE.getConstant(Denominator->getType(), 0);
- One = SE.getConstant(Denominator->getType(), 1);
-
- // By default, we don't know how to divide Expr by Denominator.
- // Providing the default here simplifies the rest of the code.
- Quotient = Zero;
- Remainder = Numerator;
- }
-
- // Except in the trivial case described above, we do not know how to divide
- // Expr by Denominator for the following functions with empty implementation.
- void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
- void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
- void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
- void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
- void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
- void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
- void visitUnknown(const SCEVUnknown *Numerator) {}
- void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
-
- void visitConstant(const SCEVConstant *Numerator) {
- if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
- Quotient = SE.getConstant(sdiv(Numerator, D));
- Remainder = SE.getConstant(srem(Numerator, D));
- return;
- }
- }
-
- void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
- const SCEV *StartQ, *StartR, *StepQ, *StepR;
- assert(Numerator->isAffine() && "Numerator should be affine");
- divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
- divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
- Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
- Numerator->getNoWrapFlags());
- Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
- Numerator->getNoWrapFlags());
- }
-
- void visitAddExpr(const SCEVAddExpr *Numerator) {
- SmallVector<const SCEV *, 2> Qs, Rs;
- Type *Ty = Denominator->getType();
-
- for (const SCEV *Op : Numerator->operands()) {
- const SCEV *Q, *R;
- divide(SE, Op, Denominator, &Q, &R);
-
- // Bail out if types do not match.
- if (Ty != Q->getType() || Ty != R->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- Qs.push_back(Q);
- Rs.push_back(R);
- }
-
- if (Qs.size() == 1) {
- Quotient = Qs[0];
- Remainder = Rs[0];
- return;
- }
-
- Quotient = SE.getAddExpr(Qs);
- Remainder = SE.getAddExpr(Rs);
- }
-
- void visitMulExpr(const SCEVMulExpr *Numerator) {
- SmallVector<const SCEV *, 2> Qs;
- Type *Ty = Denominator->getType();
-
- bool FoundDenominatorTerm = false;
- for (const SCEV *Op : Numerator->operands()) {
- // Bail out if types do not match.
- if (Ty != Op->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- if (FoundDenominatorTerm) {
- Qs.push_back(Op);
- continue;
- }
-
- // Check whether Denominator divides one of the product operands.
- const SCEV *Q, *R;
- divide(SE, Op, Denominator, &Q, &R);
- if (!R->isZero()) {
- Qs.push_back(Op);
- continue;
- }
-
- // Bail out if types do not match.
- if (Ty != Q->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- FoundDenominatorTerm = true;
- Qs.push_back(Q);
- }
-
- if (FoundDenominatorTerm) {
- Remainder = Zero;
- if (Qs.size() == 1)
- Quotient = Qs[0];
- else
- Quotient = SE.getMulExpr(Qs);
- return;
- }
-
- if (!isa<SCEVUnknown>(Denominator)) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- // The Remainder is obtained by replacing Denominator by 0 in Numerator.
- ValueToValueMap RewriteMap;
- RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
- cast<SCEVConstant>(Zero)->getValue();
- Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
-
- if (Remainder->isZero()) {
- // The Quotient is obtained by replacing Denominator by 1 in Numerator.
- RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
- cast<SCEVConstant>(One)->getValue();
- Quotient =
- SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
- return;
- }
-
- // Quotient is (Numerator - Remainder) divided by Denominator.
- const SCEV *Q, *R;
- const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
- if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
- // This SCEV does not seem to simplify: fail the division here.
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
- divide(SE, Diff, Denominator, &Q, &R);
- assert(R == Zero &&
- "(Numerator - Remainder) should evenly divide Denominator");
- Quotient = Q;
- }
-
-private:
- ScalarEvolution &SE;
- const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
-};
-}
-
static bool findArrayDimensionsRec(ScalarEvolution &SE,
SmallVectorImpl<const SCEV *> &Terms,
SmallVectorImpl<const SCEV *> &Sizes) {
@@ -7270,7 +7457,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
for (const SCEV *&Term : Terms) {
// Normalize the terms before the next call to findArrayDimensionsRec.
const SCEV *Q, *R;
- SCEVDivision::divide(SE, Term, Step, &Q, &R);
+ SCEVSDivision::divide(SE, Term, Step, &Q, &R);
// Bail out when GCD does not evenly divide one of the terms.
if (!R->isZero())
@@ -7407,7 +7594,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
// Divide all terms by the element size.
for (const SCEV *&Term : Terms) {
const SCEV *Q, *R;
- SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+ SCEVSDivision::divide(SE, Term, ElementSize, &Q, &R);
Term = Q;
}
@@ -7454,7 +7641,7 @@ void SCEVAddRecExpr::computeAccessFunctions(
int Last = Sizes.size() - 1;
for (int i = Last; i >= 0; i--) {
const SCEV *Q, *R;
- SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+ SCEVSDivision::divide(SE, Res, Sizes[i], &Q, &R);
DEBUG({
dbgs() << "Res: " << *Res << "\n";
@@ -7609,7 +7796,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
// that until everything else is done.
if (U == Old)
continue;
- if (!Visited.insert(U))
+ if (!Visited.insert(U).second)
continue;
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
@@ -7638,6 +7825,7 @@ ScalarEvolution::ScalarEvolution()
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
+ AT = &getAnalysis<AssumptionTracker>();
LI = &getAnalysis<LoopInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
@@ -7678,6 +7866,7 @@ void ScalarEvolution::releaseMemory() {
void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredTransitive<LoopInfo>();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 6933f74..5c339ee 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -162,10 +162,10 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
if (alias(Location(AO ? AO : LocA.Ptr,
AO ? +UnknownSize : LocA.Size,
- AO ? nullptr : LocA.TBAATag),
+ AO ? AAMDNodes() : LocA.AATags),
Location(BO ? BO : LocB.Ptr,
BO ? +UnknownSize : LocB.Size,
- BO ? nullptr : LocB.TBAATag)) == NoAlias)
+ BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 8c75b0d..bee3685 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1443,8 +1443,12 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
BasicBlock *HP = *HPI;
- if (!PredSeen.insert(HP))
+ if (!PredSeen.insert(HP).second) {
+ // There must be an incoming value for each predecessor, even the
+ // duplicates!
+ CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
continue;
+ }
if (L->contains(HP)) {
// Insert a unit add instruction right before the terminator
@@ -1707,7 +1711,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT)) {
+ if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AT)) {
Phi->replaceAllUsesWith(V);
DeadInsts.push_back(Phi);
++NumElim;
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 3ccefb0..b238fe4 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -126,7 +126,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
// Normalized form: {-2,+,1,+,2}
// Denormalized form: {1,+,3,+,2}
//
- // However, denormalization would use the a different step expression than
+ // However, denormalization would use a different step expression than
// normalization (see getPostIncExpr), generating the wrong final
// expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
if (AR->isAffine() &&
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
new file mode 100644
index 0000000..f6c300a
--- /dev/null
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -0,0 +1,245 @@
+//===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScopedNoAlias alias-analysis pass, which implements
+// metadata-based scoped no-alias support.
+//
+// Alias-analysis scopes are defined by an id (which can be a string or some
+// other metadata node), a domain node, and an optional descriptive string.
+// A domain is defined by an id (which can be a string or some other metadata
+// node), and an optional descriptive string.
+//
+// !dom0 = metadata !{ metadata !"domain of foo()" }
+// !scope1 = metadata !{ metadata !scope1, metadata !dom0, metadata !"scope 1" }
+// !scope2 = metadata !{ metadata !scope2, metadata !dom0, metadata !"scope 2" }
+//
+// Loads and stores can be tagged with an alias-analysis scope, and also, with
+// a noalias tag for a specific scope:
+//
+// ... = load %ptr1, !alias.scope !{ !scope1 }
+// ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 }
+//
+// When evaluating an aliasing query, if one of the instructions is associated
+// has a set of noalias scopes in some domain that is superset of the alias
+// scopes in that domain of some other instruction, then the two memory
+// accesses are assumed not to alias.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling scoped no-alias functionality. The same effect
+// can also be achieved by stripping the associated metadata tags from IR, but
+// this option is sometimes more convenient.
+static cl::opt<bool>
+EnableScopedNoAlias("enable-scoped-noalias", cl::init(true));
+
+namespace {
+/// AliasScopeNode - This is a simple wrapper around an MDNode which provides
+/// a higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class AliasScopeNode {
+ const MDNode *Node;
+
+public:
+ AliasScopeNode() : Node(0) {}
+ explicit AliasScopeNode(const MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this AliasScopeNode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getDomain - Get the MDNode for this AliasScopeNode's domain.
+ const MDNode *getDomain() const {
+ if (Node->getNumOperands() < 2)
+ return nullptr;
+ return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ }
+};
+
+/// ScopedNoAliasAA - This is a simple alias analysis
+/// implementation that uses scoped-noalias metadata to answer queries.
+class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis {
+public:
+ static char ID; // Class identification, replacement for typeinfo
+ ScopedNoAliasAA() : ImmutablePass(ID) {
+ initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
+ }
+
+ void initializePass() override { InitializeAliasAnalysis(this); }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ void *getAdjustedAnalysisPointer(const void *PI) override {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+protected:
+ bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
+ void collectMDInDomain(const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) const;
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ AliasResult alias(const Location &LocA, const Location &LocB) override;
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
+ ModRefBehavior getModRefBehavior(const Function *F) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) override;
+};
+} // End of anonymous namespace
+
+// Register this pass...
+char ScopedNoAliasAA::ID = 0;
+INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias",
+ "Scoped NoAlias Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createScopedNoAliasAAPass() {
+ return new ScopedNoAliasAA();
+}
+
+void
+ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+void
+ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) const {
+ for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i)
+ if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i)))
+ if (AliasScopeNode(MD).getDomain() == Domain)
+ Nodes.insert(MD);
+}
+
+bool
+ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
+ const MDNode *NoAlias) const {
+ if (!Scopes || !NoAlias)
+ return true;
+
+ // Collect the set of scope domains relevant to the noalias scopes.
+ SmallPtrSet<const MDNode *, 16> Domains;
+ for (unsigned i = 0, ie = NoAlias->getNumOperands(); i != ie; ++i)
+ if (const MDNode *NAMD = dyn_cast<MDNode>(NoAlias->getOperand(i)))
+ if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+ Domains.insert(Domain);
+
+ // We alias unless, for some domain, the set of noalias scopes in that domain
+ // is a superset of the set of alias scopes in that domain.
+ for (const MDNode *Domain : Domains) {
+ SmallPtrSet<const MDNode *, 16> NANodes, ScopeNodes;
+ collectMDInDomain(NoAlias, Domain, NANodes);
+ collectMDInDomain(Scopes, Domain, ScopeNodes);
+ if (!ScopeNodes.size())
+ continue;
+
+ // To not alias, all of the nodes in ScopeNodes must be in NANodes.
+ bool FoundAll = true;
+ for (const MDNode *SMD : ScopeNodes)
+ if (!NANodes.count(SMD)) {
+ FoundAll = false;
+ break;
+ }
+
+ if (FoundAll)
+ return false;
+ }
+
+ return true;
+}
+
+AliasAnalysis::AliasResult
+ScopedNoAliasAA::alias(const Location &LocA, const Location &LocB) {
+ if (!EnableScopedNoAlias)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Get the attached MDNodes.
+ const MDNode *AScopes = LocA.AATags.Scope,
+ *BScopes = LocB.AATags.Scope;
+
+ const MDNode *ANoAlias = LocA.AATags.NoAlias,
+ *BNoAlias = LocB.AATags.NoAlias;
+
+ if (!mayAliasInScopes(AScopes, BNoAlias))
+ return NoAlias;
+
+ if (!mayAliasInScopes(BScopes, ANoAlias))
+ return NoAlias;
+
+ // If they may alias, chain to the next AliasAnalysis.
+ return AliasAnalysis::alias(LocA, LocB);
+}
+
+bool ScopedNoAliasAA::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) {
+ return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ScopedNoAliasAA::getModRefBehavior(const Function *F) {
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ if (!EnableScopedNoAlias)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
+ LLVMContext::MD_noalias)))
+ return NoModRef;
+
+ if (!mayAliasInScopes(
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ Loc.AATags.NoAlias))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ if (!EnableScopedNoAlias)
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+ if (!mayAliasInScopes(
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return NoModRef;
+
+ if (!mayAliasInScopes(
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
new file mode 100644
index 0000000..fd3fbc0
--- /dev/null
+++ b/lib/Analysis/StratifiedSets.h
@@ -0,0 +1,692 @@
+//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRATIFIEDSETS_H
+#define LLVM_ADT_STRATIFIEDSETS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include <bitset>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+// \brief An index into Stratified Sets.
+typedef unsigned StratifiedIndex;
+// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
+// ~1M sets exist.
+
+// \brief Container of information related to a value in a StratifiedSet.
+struct StratifiedInfo {
+ StratifiedIndex Index;
+ // For field sensitivity, etc. we can tack attributes on to this struct.
+};
+
+// The number of attributes that StratifiedAttrs should contain. Attributes are
+// described below, and 32 was an arbitrary choice because it fits nicely in 32
+// bits (because we use a bitset for StratifiedAttrs).
+static const unsigned NumStratifiedAttrs = 32;
+
+// These are attributes that the users of StratifiedSets/StratifiedSetBuilders
+// may use for various purposes. These also have the special property of that
+// they are merged down. So, if set A is above set B, and one decides to set an
+// attribute in set A, then the attribute will automatically be set in set B.
+typedef std::bitset<NumStratifiedAttrs> StratifiedAttrs;
+
+// \brief A "link" between two StratifiedSets.
+struct StratifiedLink {
+ // \brief This is a value used to signify "does not exist" where
+ // the StratifiedIndex type is used. This is used instead of
+ // Optional<StratifiedIndex> because Optional<StratifiedIndex> would
+ // eat up a considerable amount of extra memory, after struct
+ // padding/alignment is taken into account.
+ static const StratifiedIndex SetSentinel;
+
+ // \brief The index for the set "above" current
+ StratifiedIndex Above;
+
+ // \brief The link for the set "below" current
+ StratifiedIndex Below;
+
+ // \brief Attributes for these StratifiedSets.
+ StratifiedAttrs Attrs;
+
+ StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {}
+
+ bool hasBelow() const { return Below != SetSentinel; }
+ bool hasAbove() const { return Above != SetSentinel; }
+
+ void clearBelow() { Below = SetSentinel; }
+ void clearAbove() { Above = SetSentinel; }
+};
+
+// \brief These are stratified sets, as described in "Fast algorithms for
+// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
+// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
+// of Value*s. If two Value*s are in the same set, or if both sets have
+// overlapping attributes, then the Value*s are said to alias.
+//
+// Sets may be related by position, meaning that one set may be considered as
+// above or below another. In CFL Alias Analysis, this gives us an indication
+// of how two variables are related; if the set of variable A is below a set
+// containing variable B, then at some point, a variable that has interacted
+// with B (or B itself) was either used in order to extract the variable A, or
+// was used as storage of variable A.
+//
+// Sets may also have attributes (as noted above). These attributes are
+// generally used for noting whether a variable in the set has interacted with
+// a variable whose origins we don't quite know (i.e. globals/arguments), or if
+// the variable may have had operations performed on it (modified in a function
+// call). All attributes that exist in a set A must exist in all sets marked as
+// below set A.
+template <typename T> class StratifiedSets {
+public:
+ StratifiedSets() {}
+
+ StratifiedSets(DenseMap<T, StratifiedInfo> Map,
+ std::vector<StratifiedLink> Links)
+ : Values(std::move(Map)), Links(std::move(Links)) {}
+
+ StratifiedSets(StratifiedSets<T> &&Other) { *this = std::move(Other); }
+
+ StratifiedSets &operator=(StratifiedSets<T> &&Other) {
+ Values = std::move(Other.Values);
+ Links = std::move(Other.Links);
+ return *this;
+ }
+
+ Optional<StratifiedInfo> find(const T &Elem) const {
+ auto Iter = Values.find(Elem);
+ if (Iter == Values.end()) {
+ return NoneType();
+ }
+ return Iter->second;
+ }
+
+ const StratifiedLink &getLink(StratifiedIndex Index) const {
+ assert(inbounds(Index));
+ return Links[Index];
+ }
+
+private:
+ DenseMap<T, StratifiedInfo> Values;
+ std::vector<StratifiedLink> Links;
+
+ bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); }
+};
+
+// \brief Generic Builder class that produces StratifiedSets instances.
+//
+// The goal of this builder is to efficiently produce correct StratifiedSets
+// instances. To this end, we use a few tricks:
+// > Set chains (A method for linking sets together)
+// > Set remaps (A method for marking a set as an alias [irony?] of another)
+//
+// ==== Set chains ====
+// This builder has a notion of some value A being above, below, or with some
+// other value B:
+// > The `A above B` relationship implies that there is a reference edge going
+// from A to B. Namely, it notes that A can store anything in B's set.
+// > The `A below B` relationship is the opposite of `A above B`. It implies
+// that there's a dereference edge going from A to B.
+// > The `A with B` relationship states that there's an assignment edge going
+// from A to B, and that A and B should be treated as equals.
+//
+// As an example, take the following code snippet:
+//
+// %a = alloca i32, align 4
+// %ap = alloca i32*, align 8
+// %app = alloca i32**, align 8
+// store %a, %ap
+// store %ap, %app
+// %aw = getelementptr %ap, 0
+//
+// Given this, the follow relations exist:
+// - %a below %ap & %ap above %a
+// - %ap below %app & %app above %ap
+// - %aw with %ap & %ap with %aw
+//
+// These relations produce the following sets:
+// [{%a}, {%ap, %aw}, {%app}]
+//
+// ...Which states that the only MayAlias relationship in the above program is
+// between %ap and %aw.
+//
+// Life gets more complicated when we actually have logic in our programs. So,
+// we either must remove this logic from our programs, or make consessions for
+// it in our AA algorithms. In this case, we have decided to select the latter
+// option.
+//
+// First complication: Conditionals
+// Motivation:
+// %ad = alloca int, align 4
+// %a = alloca int*, align 8
+// %b = alloca int*, align 8
+// %bp = alloca int**, align 8
+// %c = call i1 @SomeFunc()
+// %k = select %c, %ad, %bp
+// store %ad, %a
+// store %b, %bp
+//
+// %k has 'with' edges to both %a and %b, which ordinarily would not be linked
+// together. So, we merge the set that contains %a with the set that contains
+// %b. We then recursively merge the set above %a with the set above %b, and
+// the set below %a with the set below %b, etc. Ultimately, the sets for this
+// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below
+// {%a, %b, %c} is below {%ad}.
+//
+// Second complication: Arbitrary casts
+// Motivation:
+// %ip = alloca int*, align 8
+// %ipp = alloca int**, align 8
+// %i = bitcast ipp to int
+// store %ip, %ipp
+// store %i, %ip
+//
+// This is impossible to construct with any of the rules above, because a set
+// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed
+// to be below the set with %ip, and the set with %ip is supposed to be below
+// the set with %ipp. Because we don't allow circular relationships like this,
+// we merge all concerned sets into one. So, the above code would generate a
+// single StratifiedSet: {%ip, %ipp, %i}.
+//
+// ==== Set remaps ====
+// More of an implementation detail than anything -- when merging sets, we need
+// to update the numbers of all of the elements mapped to those sets. Rather
+// than doing this at each merge, we note in the BuilderLink structure that a
+// remap has occurred, and use this information so we can defer renumbering set
+// elements until build time.
+template <typename T> class StratifiedSetsBuilder {
+ // \brief Represents a Stratified Set, with information about the Stratified
+ // Set above it, the set below it, and whether the current set has been
+ // remapped to another.
+ struct BuilderLink {
+ const StratifiedIndex Number;
+
+ BuilderLink(StratifiedIndex N) : Number(N) {
+ Remap = StratifiedLink::SetSentinel;
+ }
+
+ bool hasAbove() const {
+ assert(!isRemapped());
+ return Link.hasAbove();
+ }
+
+ bool hasBelow() const {
+ assert(!isRemapped());
+ return Link.hasBelow();
+ }
+
+ void setBelow(StratifiedIndex I) {
+ assert(!isRemapped());
+ Link.Below = I;
+ }
+
+ void setAbove(StratifiedIndex I) {
+ assert(!isRemapped());
+ Link.Above = I;
+ }
+
+ void clearBelow() {
+ assert(!isRemapped());
+ Link.clearBelow();
+ }
+
+ void clearAbove() {
+ assert(!isRemapped());
+ Link.clearAbove();
+ }
+
+ StratifiedIndex getBelow() const {
+ assert(!isRemapped());
+ assert(hasBelow());
+ return Link.Below;
+ }
+
+ StratifiedIndex getAbove() const {
+ assert(!isRemapped());
+ assert(hasAbove());
+ return Link.Above;
+ }
+
+ StratifiedAttrs &getAttrs() {
+ assert(!isRemapped());
+ return Link.Attrs;
+ }
+
+ void setAttr(unsigned index) {
+ assert(!isRemapped());
+ assert(index < NumStratifiedAttrs);
+ Link.Attrs.set(index);
+ }
+
+ void setAttrs(const StratifiedAttrs &other) {
+ assert(!isRemapped());
+ Link.Attrs |= other;
+ }
+
+ bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; }
+
+ // \brief For initial remapping to another set
+ void remapTo(StratifiedIndex Other) {
+ assert(!isRemapped());
+ Remap = Other;
+ }
+
+ StratifiedIndex getRemapIndex() const {
+ assert(isRemapped());
+ return Remap;
+ }
+
+ // \brief Should only be called when we're already remapped.
+ void updateRemap(StratifiedIndex Other) {
+ assert(isRemapped());
+ Remap = Other;
+ }
+
+ // \brief Prefer the above functions to calling things directly on what's
+ // returned from this -- they guard against unexpected calls when the
+ // current BuilderLink is remapped.
+ const StratifiedLink &getLink() const { return Link; }
+
+ private:
+ StratifiedLink Link;
+ StratifiedIndex Remap;
+ };
+
+ // \brief This function performs all of the set unioning/value renumbering
+ // that we've been putting off, and generates a vector<StratifiedLink> that
+ // may be placed in a StratifiedSets instance.
+ void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
+ DenseMap<StratifiedIndex, StratifiedIndex> Remaps;
+ for (auto &Link : Links) {
+ if (Link.isRemapped()) {
+ continue;
+ }
+
+ StratifiedIndex Number = StratLinks.size();
+ Remaps.insert(std::make_pair(Link.Number, Number));
+ StratLinks.push_back(Link.getLink());
+ }
+
+ for (auto &Link : StratLinks) {
+ if (Link.hasAbove()) {
+ auto &Above = linksAt(Link.Above);
+ auto Iter = Remaps.find(Above.Number);
+ assert(Iter != Remaps.end());
+ Link.Above = Iter->second;
+ }
+
+ if (Link.hasBelow()) {
+ auto &Below = linksAt(Link.Below);
+ auto Iter = Remaps.find(Below.Number);
+ assert(Iter != Remaps.end());
+ Link.Below = Iter->second;
+ }
+ }
+
+ for (auto &Pair : Values) {
+ auto &Info = Pair.second;
+ auto &Link = linksAt(Info.Index);
+ auto Iter = Remaps.find(Link.Number);
+ assert(Iter != Remaps.end());
+ Info.Index = Iter->second;
+ }
+ }
+
+ // \brief There's a guarantee in StratifiedLink where all bits set in a
+ // Link.externals will be set in all Link.externals "below" it.
+ static void propagateAttrs(std::vector<StratifiedLink> &Links) {
+ const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
+ const auto *Link = &Links[Idx];
+ while (Link->hasAbove()) {
+ Idx = Link->Above;
+ Link = &Links[Idx];
+ }
+ return Idx;
+ };
+
+ SmallSet<StratifiedIndex, 16> Visited;
+ for (unsigned I = 0, E = Links.size(); I < E; ++I) {
+ auto CurrentIndex = getHighestParentAbove(I);
+ if (!Visited.insert(CurrentIndex).second) {
+ continue;
+ }
+
+ while (Links[CurrentIndex].hasBelow()) {
+ auto &CurrentBits = Links[CurrentIndex].Attrs;
+ auto NextIndex = Links[CurrentIndex].Below;
+ auto &NextBits = Links[NextIndex].Attrs;
+ NextBits |= CurrentBits;
+ CurrentIndex = NextIndex;
+ }
+ }
+ }
+
+public:
+ // \brief Builds a StratifiedSet from the information we've been given since
+ // either construction or the prior build() call.
+ StratifiedSets<T> build() {
+ std::vector<StratifiedLink> StratLinks;
+ finalizeSets(StratLinks);
+ propagateAttrs(StratLinks);
+ Links.clear();
+ return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
+ }
+
+ std::size_t size() const { return Values.size(); }
+ std::size_t numSets() const { return Links.size(); }
+
+ bool has(const T &Elem) const { return get(Elem).hasValue(); }
+
+ bool add(const T &Main) {
+ if (get(Main).hasValue())
+ return false;
+
+ auto NewIndex = getNewUnlinkedIndex();
+ return addAtMerging(Main, NewIndex);
+ }
+
+ // \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+ // set above "Main". There are some cases where this is not possible (see
+ // above), so we merge them such that ToAdd and Main are in the same set.
+ bool addAbove(const T &Main, const T &ToAdd) {
+ assert(has(Main));
+ auto Index = *indexOf(Main);
+ if (!linksAt(Index).hasAbove())
+ addLinkAbove(Index);
+
+ auto Above = linksAt(Index).getAbove();
+ return addAtMerging(ToAdd, Above);
+ }
+
+ // \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+ // set below "Main". There are some cases where this is not possible (see
+ // above), so we merge them such that ToAdd and Main are in the same set.
+ bool addBelow(const T &Main, const T &ToAdd) {
+ assert(has(Main));
+ auto Index = *indexOf(Main);
+ if (!linksAt(Index).hasBelow())
+ addLinkBelow(Index);
+
+ auto Below = linksAt(Index).getBelow();
+ return addAtMerging(ToAdd, Below);
+ }
+
+ bool addWith(const T &Main, const T &ToAdd) {
+ assert(has(Main));
+ auto MainIndex = *indexOf(Main);
+ return addAtMerging(ToAdd, MainIndex);
+ }
+
+ void noteAttribute(const T &Main, unsigned AttrNum) {
+ assert(has(Main));
+ assert(AttrNum < StratifiedLink::SetSentinel);
+ auto *Info = *get(Main);
+ auto &Link = linksAt(Info->Index);
+ Link.setAttr(AttrNum);
+ }
+
+ void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) {
+ assert(has(Main));
+ auto *Info = *get(Main);
+ auto &Link = linksAt(Info->Index);
+ Link.setAttrs(NewAttrs);
+ }
+
+ StratifiedAttrs getAttributes(const T &Main) {
+ assert(has(Main));
+ auto *Info = *get(Main);
+ auto *Link = &linksAt(Info->Index);
+ auto Attrs = Link->getAttrs();
+ while (Link->hasAbove()) {
+ Link = &linksAt(Link->getAbove());
+ Attrs |= Link->getAttrs();
+ }
+
+ return Attrs;
+ }
+
+ bool getAttribute(const T &Main, unsigned AttrNum) {
+ assert(AttrNum < StratifiedLink::SetSentinel);
+ auto Attrs = getAttributes(Main);
+ return Attrs[AttrNum];
+ }
+
+ // \brief Gets the attributes that have been applied to the set that Main
+ // belongs to. It ignores attributes in any sets above the one that Main
+ // resides in.
+ StratifiedAttrs getRawAttributes(const T &Main) {
+ assert(has(Main));
+ auto *Info = *get(Main);
+ auto &Link = linksAt(Info->Index);
+ return Link.getAttrs();
+ }
+
+ // \brief Gets an attribute from the attributes that have been applied to the
+ // set that Main belongs to. It ignores attributes in any sets above the one
+ // that Main resides in.
+ bool getRawAttribute(const T &Main, unsigned AttrNum) {
+ assert(AttrNum < StratifiedLink::SetSentinel);
+ auto Attrs = getRawAttributes(Main);
+ return Attrs[AttrNum];
+ }
+
+private:
+ DenseMap<T, StratifiedInfo> Values;
+ std::vector<BuilderLink> Links;
+
+ // \brief Adds the given element at the given index, merging sets if
+ // necessary.
+ bool addAtMerging(const T &ToAdd, StratifiedIndex Index) {
+ StratifiedInfo Info = {Index};
+ auto Pair = Values.insert(std::make_pair(ToAdd, Info));
+ if (Pair.second)
+ return true;
+
+ auto &Iter = Pair.first;
+ auto &IterSet = linksAt(Iter->second.Index);
+ auto &ReqSet = linksAt(Index);
+
+ // Failed to add where we wanted to. Merge the sets.
+ if (&IterSet != &ReqSet)
+ merge(IterSet.Number, ReqSet.Number);
+
+ return false;
+ }
+
+ // \brief Gets the BuilderLink at the given index, taking set remapping into
+ // account.
+ BuilderLink &linksAt(StratifiedIndex Index) {
+ auto *Start = &Links[Index];
+ if (!Start->isRemapped())
+ return *Start;
+
+ auto *Current = Start;
+ while (Current->isRemapped())
+ Current = &Links[Current->getRemapIndex()];
+
+ auto NewRemap = Current->Number;
+
+ // Run through everything that has yet to be updated, and update them to
+ // remap to NewRemap
+ Current = Start;
+ while (Current->isRemapped()) {
+ auto *Next = &Links[Current->getRemapIndex()];
+ Current->updateRemap(NewRemap);
+ Current = Next;
+ }
+
+ return *Current;
+ }
+
+ // \brief Merges two sets into one another. Assumes that these sets are not
+ // already one in the same
+ void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
+ assert(inbounds(Idx1) && inbounds(Idx2));
+ assert(&linksAt(Idx1) != &linksAt(Idx2) &&
+ "Merging a set into itself is not allowed");
+
+ // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge
+ // both the
+ // given sets, and all sets between them, into one.
+ if (tryMergeUpwards(Idx1, Idx2))
+ return;
+
+ if (tryMergeUpwards(Idx2, Idx1))
+ return;
+
+ // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`.
+ // We therefore need to merge the two chains together.
+ mergeDirect(Idx1, Idx2);
+ }
+
+ // \brief Merges two sets assuming that the set at `Idx1` is unreachable from
+ // traversing above or below the set at `Idx2`.
+ void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
+ assert(inbounds(Idx1) && inbounds(Idx2));
+
+ auto *LinksInto = &linksAt(Idx1);
+ auto *LinksFrom = &linksAt(Idx2);
+ // Merging everything above LinksInto then proceeding to merge everything
+ // below LinksInto becomes problematic, so we go as far "up" as possible!
+ while (LinksInto->hasAbove() && LinksFrom->hasAbove()) {
+ LinksInto = &linksAt(LinksInto->getAbove());
+ LinksFrom = &linksAt(LinksFrom->getAbove());
+ }
+
+ if (LinksFrom->hasAbove()) {
+ LinksInto->setAbove(LinksFrom->getAbove());
+ auto &NewAbove = linksAt(LinksInto->getAbove());
+ NewAbove.setBelow(LinksInto->Number);
+ }
+
+ // Merging strategy:
+ // > If neither has links below, stop.
+ // > If only `LinksInto` has links below, stop.
+ // > If only `LinksFrom` has links below, reset `LinksInto.Below` to
+ // match `LinksFrom.Below`
+ // > If both have links above, deal with those next.
+ while (LinksInto->hasBelow() && LinksFrom->hasBelow()) {
+ auto &FromAttrs = LinksFrom->getAttrs();
+ LinksInto->setAttrs(FromAttrs);
+
+ // Remap needs to happen after getBelow(), but before
+ // assignment of LinksFrom
+ auto *NewLinksFrom = &linksAt(LinksFrom->getBelow());
+ LinksFrom->remapTo(LinksInto->Number);
+ LinksFrom = NewLinksFrom;
+ LinksInto = &linksAt(LinksInto->getBelow());
+ }
+
+ if (LinksFrom->hasBelow()) {
+ LinksInto->setBelow(LinksFrom->getBelow());
+ auto &NewBelow = linksAt(LinksInto->getBelow());
+ NewBelow.setAbove(LinksInto->Number);
+ }
+
+ LinksFrom->remapTo(LinksInto->Number);
+ }
+
+ // \brief Checks to see if lowerIndex is at a level lower than upperIndex.
+ // If so, it will merge lowerIndex with upperIndex (and all of the sets
+ // between) and return true. Otherwise, it will return false.
+ bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) {
+ assert(inbounds(LowerIndex) && inbounds(UpperIndex));
+ auto *Lower = &linksAt(LowerIndex);
+ auto *Upper = &linksAt(UpperIndex);
+ if (Lower == Upper)
+ return true;
+
+ SmallVector<BuilderLink *, 8> Found;
+ auto *Current = Lower;
+ auto Attrs = Current->getAttrs();
+ while (Current->hasAbove() && Current != Upper) {
+ Found.push_back(Current);
+ Attrs |= Current->getAttrs();
+ Current = &linksAt(Current->getAbove());
+ }
+
+ if (Current != Upper)
+ return false;
+
+ Upper->setAttrs(Attrs);
+
+ if (Lower->hasBelow()) {
+ auto NewBelowIndex = Lower->getBelow();
+ Upper->setBelow(NewBelowIndex);
+ auto &NewBelow = linksAt(NewBelowIndex);
+ NewBelow.setAbove(UpperIndex);
+ } else {
+ Upper->clearBelow();
+ }
+
+ for (const auto &Ptr : Found)
+ Ptr->remapTo(Upper->Number);
+
+ return true;
+ }
+
+ Optional<const StratifiedInfo *> get(const T &Val) const {
+ auto Result = Values.find(Val);
+ if (Result == Values.end())
+ return NoneType();
+ return &Result->second;
+ }
+
+ Optional<StratifiedInfo *> get(const T &Val) {
+ auto Result = Values.find(Val);
+ if (Result == Values.end())
+ return NoneType();
+ return &Result->second;
+ }
+
+ Optional<StratifiedIndex> indexOf(const T &Val) {
+ auto MaybeVal = get(Val);
+ if (!MaybeVal.hasValue())
+ return NoneType();
+ auto *Info = *MaybeVal;
+ auto &Link = linksAt(Info->Index);
+ return Link.Number;
+ }
+
+ StratifiedIndex addLinkBelow(StratifiedIndex Set) {
+ auto At = addLinks();
+ Links[Set].setBelow(At);
+ Links[At].setAbove(Set);
+ return At;
+ }
+
+ StratifiedIndex addLinkAbove(StratifiedIndex Set) {
+ auto At = addLinks();
+ Links[At].setBelow(Set);
+ Links[Set].setAbove(At);
+ return At;
+ }
+
+ StratifiedIndex getNewUnlinkedIndex() { return addLinks(); }
+
+ StratifiedIndex addLinks() {
+ auto Link = Links.size();
+ Links.push_back(BuilderLink(Link));
+ return Link;
+ }
+
+ bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
+};
+}
+#endif // LLVM_ADT_STRATIFIEDSETS_H
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index cdb0b79..c1ffb9d 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -87,9 +87,10 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return PrevTTI->isLoweredToCall(F);
}
-void TargetTransformInfo::getUnrollingPreferences(Loop *L,
- UnrollingPreferences &UP) const {
- PrevTTI->getUnrollingPreferences(L, UP);
+void
+TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const {
+ PrevTTI->getUnrollingPreferences(F, L, UP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
@@ -167,15 +168,16 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
return PrevTTI->getRegisterBitWidth(Vector);
}
-unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
- return PrevTTI->getMaximumUnrollFactor();
+unsigned TargetTransformInfo::getMaxInterleaveFactor() const {
+ return PrevTTI->getMaxInterleaveFactor();
}
-unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
- Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
- return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+unsigned TargetTransformInfo::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
+ return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
}
unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
@@ -230,6 +232,11 @@ unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
}
+unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys)
+ const {
+ return PrevTTI->getCostOfKeepingLiveOverCall(Tys);
+}
+
namespace {
struct NoTTI final : ImmutablePass, TargetTransformInfo {
@@ -239,7 +246,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
initializeNoTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
+ void initializePass() override {
// Note that this subclass is special, and must *not* call initializeTTI as
// it does not chain.
TopTTI = this;
@@ -248,7 +255,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
DL = DLP ? &DLP->getDataLayout() : nullptr;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
// Note that this subclass is special, and must *not* call
// TTI::getAnalysisUsage as it breaks the recursion.
}
@@ -257,7 +264,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
@@ -385,6 +392,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
// FIXME: This is wrong for libc intrinsics.
return TCC_Basic;
+ case Intrinsic::annotation:
+ case Intrinsic::assume:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::invariant_start:
@@ -466,6 +475,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
// These will all likely lower to a single selection DAG node.
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
+ Name == "fmin" || Name == "fminf" || Name == "fminl" ||
+ Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
return false;
@@ -480,8 +491,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
return true;
}
- void getUnrollingPreferences(Loop *, UnrollingPreferences &) const override {
- }
+ void getUnrollingPreferences(const Function *, Loop *,
+ UnrollingPreferences &) const override {}
bool isLegalAddImmediate(int64_t Imm) const override {
return false;
@@ -558,12 +569,13 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
return 32;
}
- unsigned getMaximumUnrollFactor() const override {
+ unsigned getMaxInterleaveFactor() const override {
return 1;
}
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind) const override {
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override {
return 1;
}
@@ -612,6 +624,11 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
unsigned getReductionCost(unsigned, Type *, bool) const override {
return 1;
}
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override {
+ return 0;
+ }
+
};
} // end anonymous namespace
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index f36f6f8..f347eb5 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -454,9 +454,9 @@ TypeBasedAliasAnalysis::alias(const Location &LocA,
// Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
// be conservative.
- const MDNode *AM = LocA.TBAATag;
+ const MDNode *AM = LocA.AATags.TBAA;
if (!AM) return AliasAnalysis::alias(LocA, LocB);
- const MDNode *BM = LocB.TBAATag;
+ const MDNode *BM = LocB.AATags.TBAA;
if (!BM) return AliasAnalysis::alias(LocA, LocB);
// If they may alias, chain to the next AliasAnalysis.
@@ -472,7 +472,7 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
if (!EnableTBAA)
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
- const MDNode *M = Loc.TBAATag;
+ const MDNode *M = Loc.AATags.TBAA;
if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
@@ -513,9 +513,9 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (!EnableTBAA)
return AliasAnalysis::getModRefInfo(CS, Loc);
- if (const MDNode *L = Loc.TBAATag)
+ if (const MDNode *L = Loc.AATags.TBAA)
if (const MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
return NoModRef;
@@ -529,9 +529,9 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
if (const MDNode *M1 =
- CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (const MDNode *M2 =
- CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
return NoModRef;
@@ -611,3 +611,24 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
return MDNode::get(A->getContext(), Ops);
}
+
+void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
+ if (Merge)
+ N.TBAA =
+ MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
+ else
+ N.TBAA = getMetadata(LLVMContext::MD_tbaa);
+
+ if (Merge)
+ N.Scope =
+ MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope));
+ else
+ N.Scope = getMetadata(LLVMContext::MD_alias_scope);
+
+ if (Merge)
+ N.NoAlias =
+ MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
+ else
+ N.NoAlias = getMetadata(LLVMContext::MD_noalias);
+}
+
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 5264745..e9bbf83 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -20,6 +21,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
@@ -29,6 +31,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include <cstring>
using namespace llvm;
@@ -36,8 +39,8 @@ using namespace llvm::PatternMatch;
const unsigned MaxDepth = 6;
-/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
-/// unknown returns 0). For vector types, returns the element type's bitwidth.
+/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
+/// 0). For vector types, returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
@@ -45,10 +48,125 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
}
+// Many of these functions have internal versions that take an assumption
+// exclusion set. This is because of the potential for mutual recursion to
+// cause computeKnownBits to repeatedly visit the same assume intrinsic. The
+// classic case of this is assume(x = y), which will attempt to determine
+// bits in x from bits in y, which will attempt to determine bits in y from
+// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
+// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
+// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so on.
+typedef SmallPtrSet<const Value *, 8> ExclInvsSet;
+
+namespace {
+// Simplifying using an assume can only be done in a particular control-flow
+// context (the context instruction provides that context). If an assume and
+// the context instruction are not in the same block then the DT helps in
+// figuring out if we can use it.
+struct Query {
+ ExclInvsSet ExclInvs;
+ AssumptionTracker *AT;
+ const Instruction *CxtI;
+ const DominatorTree *DT;
+
+ Query(AssumptionTracker *AT = nullptr, const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr)
+ : AT(AT), CxtI(CxtI), DT(DT) {}
+
+ Query(const Query &Q, const Value *NewExcl)
+ : ExclInvs(Q.ExclInvs), AT(Q.AT), CxtI(Q.CxtI), DT(Q.DT) {
+ ExclInvs.insert(NewExcl);
+ }
+};
+} // end anonymous namespace
+
+// Given the provided Value and, potentially, a context instruction, return
+// the preferred context instruction (if any).
+static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
+ // If we've been provided with a context instruction, then use that (provided
+ // it has been inserted).
+ if (CxtI && CxtI->getParent())
+ return CxtI;
+
+ // If the value is really an already-inserted instruction, then use that.
+ CxtI = dyn_cast<Instruction>(V);
+ if (CxtI && CxtI->getParent())
+ return CxtI;
+
+ return nullptr;
+}
+
+static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ AssumptionTracker *AT, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth,
+ Query(AT, safeCxtI(V, CxtI), DT));
+}
+
+static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ AssumptionTracker *AT, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth,
+ Query(AT, safeCxtI(V, CxtI), DT));
+}
+
+static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+ const Query &Q);
+
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+ AssumptionTracker *AT,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
+ Query(AT, safeCxtI(V, CxtI), DT));
+}
+
+static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+ AssumptionTracker *AT, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::isKnownNonZero(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT));
+}
+
+static bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
+ const DataLayout *TD, unsigned Depth,
+ AssumptionTracker *AT, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::MaskedValueIsZero(V, Mask, TD, Depth,
+ Query(AT, safeCxtI(V, CxtI), DT));
+}
+
+static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
+ unsigned Depth, const Query &Q);
+
+unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
+ unsigned Depth, AssumptionTracker *AT,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::ComputeNumSignBits(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT));
+}
+
static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth) {
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
if (!Add) {
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
// We know that the top bits of C-X are clear if X contains less bits
@@ -59,7 +177,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -75,55 +193,51 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
unsigned BitWidth = KnownZero.getBitWidth();
- // If one of the operands has trailing zeros, then the bits that the
- // other operand has in those bit positions will be preserved in the
- // result. For an add, this works with either operand. For a subtract,
- // this only works if the known zeros are in the right operand.
+ // If an initial sequence of bits in the result is not needed, the
+ // corresponding bits in the operands are not needed.
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
- unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
-
- llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
- unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
-
- // Determine which operand has more trailing zeros, and use that
- // many bits from the other operand.
- if (LHSKnownZeroOut > RHSKnownZeroOut) {
- if (Add) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
- KnownZero |= KnownZero2 & Mask;
- KnownOne |= KnownOne2 & Mask;
- } else {
- // If the known zeros are in the left operand for a subtract,
- // fall back to the minimum known zeros in both operands.
- KnownZero |= APInt::getLowBitsSet(BitWidth,
- std::min(LHSKnownZeroOut,
- RHSKnownZeroOut));
- }
- } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
- KnownZero |= LHSKnownZero & Mask;
- KnownOne |= LHSKnownOne & Mask;
+ computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+
+ // Carry in a 1 for a subtract, rather than a 0.
+ APInt CarryIn(BitWidth, 0);
+ if (!Add) {
+ // Sum = LHS + ~RHS + 1
+ std::swap(KnownZero2, KnownOne2);
+ CarryIn.setBit(0);
}
+ APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn;
+ APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn;
+
+ // Compute known bits of the carry.
+ APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2);
+ APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2;
+
+ // Compute set of known bits (where all three relevant bits are known).
+ APInt LHSKnown = LHSKnownZero | LHSKnownOne;
+ APInt RHSKnown = KnownZero2 | KnownOne2;
+ APInt CarryKnown = CarryKnownZero | CarryKnownOne;
+ APInt Known = LHSKnown & RHSKnown & CarryKnown;
+
+ assert((PossibleSumZero & Known) == (PossibleSumOne & Known) &&
+ "known bits of sum differ");
+
+ // Compute known bits of the result.
+ KnownZero = ~PossibleSumOne & Known;
+ KnownOne = PossibleSumOne & Known;
+
// Are we still trying to solve for the sign bit?
- if (!KnownZero.isNegative() && !KnownOne.isNegative()) {
+ if (!Known.isNegative()) {
if (NSW) {
- if (Add) {
- // Adding two positive numbers can't wrap into negative
- if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // and adding two negative numbers can't wrap into positive.
- else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- } else {
- // Subtracting a negative number from a positive one can't wrap
- if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // neither can subtracting a positive number from a negative one.
- else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- }
+ // Adding two non-negative numbers, or subtracting a negative number from
+ // a non-negative one, can't wrap into negative.
+ if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // Adding two negative numbers, or subtracting a non-negative number from
+ // a negative one, can't wrap into non-negative.
+ else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
}
}
}
@@ -131,10 +245,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth) {
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
unsigned BitWidth = KnownZero.getBitWidth();
- computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q);
bool isKnownNegative = false;
bool isKnownNonNegative = false;
@@ -155,9 +270,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
// negative or zero.
if (!isKnownNonNegative)
isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
- isKnownNonZero(Op0, TD, Depth)) ||
+ isKnownNonZero(Op0, TD, Depth, Q)) ||
(isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
- isKnownNonZero(Op1, TD, Depth));
+ isKnownNonZero(Op1, TD, Depth, Q));
}
}
@@ -209,6 +324,410 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
}
+static bool isEphemeralValueOf(Instruction *I, const Value *E) {
+ SmallVector<const Value *, 16> WorkSet(1, I);
+ SmallPtrSet<const Value *, 32> Visited;
+ SmallPtrSet<const Value *, 16> EphValues;
+
+ while (!WorkSet.empty()) {
+ const Value *V = WorkSet.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ // If all uses of this value are ephemeral, then so is this value.
+ bool FoundNEUse = false;
+ for (const User *I : V->users())
+ if (!EphValues.count(I)) {
+ FoundNEUse = true;
+ break;
+ }
+
+ if (!FoundNEUse) {
+ if (V == E)
+ return true;
+
+ EphValues.insert(V);
+ if (const User *U = dyn_cast<User>(V))
+ for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
+ J != JE; ++J) {
+ if (isSafeToSpeculativelyExecute(*J))
+ WorkSet.push_back(*J);
+ }
+ }
+ }
+
+ return false;
+}
+
+// Is this an intrinsic that cannot be speculated but also cannot trap?
+static bool isAssumeLikeIntrinsic(const Instruction *I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ default: break;
+ // FIXME: This list is repeated from NoTTI::getIntrinsicCost.
+ case Intrinsic::assume:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ return true;
+ }
+
+ return false;
+}
+
+static bool isValidAssumeForContext(Value *V, const Query &Q,
+ const DataLayout *DL) {
+ Instruction *Inv = cast<Instruction>(V);
+
+ // There are two restrictions on the use of an assume:
+ // 1. The assume must dominate the context (or the control flow must
+ // reach the assume whenever it reaches the context).
+ // 2. The context must not be in the assume's set of ephemeral values
+ // (otherwise we will use the assume to prove that the condition
+ // feeding the assume is trivially true, thus causing the removal of
+ // the assume).
+
+ if (Q.DT) {
+ if (Q.DT->dominates(Inv, Q.CxtI)) {
+ return true;
+ } else if (Inv->getParent() == Q.CxtI->getParent()) {
+ // The context comes first, but they're both in the same block. Make sure
+ // there is nothing in between that might interrupt the control flow.
+ for (BasicBlock::const_iterator I =
+ std::next(BasicBlock::const_iterator(Q.CxtI)),
+ IE(Inv); I != IE; ++I)
+ if (!isSafeToSpeculativelyExecute(I, DL) &&
+ !isAssumeLikeIntrinsic(I))
+ return false;
+
+ return !isEphemeralValueOf(Inv, Q.CxtI);
+ }
+
+ return false;
+ }
+
+ // When we don't have a DT, we do a limited search...
+ if (Inv->getParent() == Q.CxtI->getParent()->getSinglePredecessor()) {
+ return true;
+ } else if (Inv->getParent() == Q.CxtI->getParent()) {
+ // Search forward from the assume until we reach the context (or the end
+ // of the block); the common case is that the assume will come first.
+ for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
+ IE = Inv->getParent()->end(); I != IE; ++I)
+ if (I == Q.CxtI)
+ return true;
+
+ // The context must come first...
+ for (BasicBlock::const_iterator I =
+ std::next(BasicBlock::const_iterator(Q.CxtI)),
+ IE(Inv); I != IE; ++I)
+ if (!isSafeToSpeculativelyExecute(I, DL) &&
+ !isAssumeLikeIntrinsic(I))
+ return false;
+
+ return !isEphemeralValueOf(Inv, Q.CxtI);
+ }
+
+ return false;
+}
+
+bool llvm::isValidAssumeForContext(const Instruction *I,
+ const Instruction *CxtI,
+ const DataLayout *DL,
+ const DominatorTree *DT) {
+ return ::isValidAssumeForContext(const_cast<Instruction*>(I),
+ Query(nullptr, CxtI, DT), DL);
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>,
+ CmpClass_match<RHS, LHS, ICmpInst, ICmpInst::Predicate>>
+m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
+ return m_CombineOr(m_ICmp(Pred, L, R), m_ICmp(Pred, R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::And>,
+ BinaryOp_match<RHS, LHS, Instruction::And>>
+m_c_And(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_And(L, R), m_And(R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Or>,
+ BinaryOp_match<RHS, LHS, Instruction::Or>>
+m_c_Or(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_Or(L, R), m_Or(R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Xor>,
+ BinaryOp_match<RHS, LHS, Instruction::Xor>>
+m_c_Xor(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
+}
+
+static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout *DL,
+ unsigned Depth, const Query &Q) {
+ // Use of assumptions is context-sensitive. If we don't have a context, we
+ // cannot use them!
+ if (!Q.AT || !Q.CxtI)
+ return;
+
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ Function *F = const_cast<Function*>(Q.CxtI->getParent()->getParent());
+ for (auto &CI : Q.AT->assumptions(F)) {
+ CallInst *I = CI;
+ if (Q.ExclInvs.count(I))
+ continue;
+
+ if (match(I, m_Intrinsic<Intrinsic::assume>(m_Specific(V))) &&
+ isValidAssumeForContext(I, Q, DL)) {
+ assert(BitWidth == 1 && "assume operand is not i1?");
+ KnownZero.clearAllBits();
+ KnownOne.setAllBits();
+ return;
+ }
+
+ Value *A, *B;
+ auto m_V = m_CombineOr(m_Specific(V),
+ m_CombineOr(m_PtrToInt(m_Specific(V)),
+ m_BitCast(m_Specific(V))));
+
+ CmpInst::Predicate Pred;
+ ConstantInt *C;
+ // assume(v = a)
+ if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_V, m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ KnownZero |= RHSKnownZero;
+ KnownOne |= RHSKnownOne;
+ // assume(v & b = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
+ computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // known bits from the RHS to V.
+ KnownZero |= RHSKnownZero & MaskKnownOne;
+ KnownOne |= RHSKnownOne & MaskKnownOne;
+ // assume(~(v & b) = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+ m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
+ computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // inverted known bits from the RHS to V.
+ KnownZero |= RHSKnownOne & MaskKnownOne;
+ KnownOne |= RHSKnownZero & MaskKnownOne;
+ // assume(v | b = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V.
+ KnownZero |= RHSKnownZero & BKnownZero;
+ KnownOne |= RHSKnownOne & BKnownZero;
+ // assume(~(v | b) = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+ m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V.
+ KnownZero |= RHSKnownOne & BKnownZero;
+ KnownOne |= RHSKnownZero & BKnownZero;
+ // assume(v ^ b = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V. For those bits in B that are known to be one,
+ // we can propagate inverted known bits from the RHS to V.
+ KnownZero |= RHSKnownZero & BKnownZero;
+ KnownOne |= RHSKnownOne & BKnownZero;
+ KnownZero |= RHSKnownOne & BKnownOne;
+ KnownOne |= RHSKnownZero & BKnownOne;
+ // assume(~(v ^ b) = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+ m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V. For those bits in B that are
+ // known to be one, we can propagate known bits from the RHS to V.
+ KnownZero |= RHSKnownOne & BKnownZero;
+ KnownOne |= RHSKnownZero & BKnownZero;
+ KnownZero |= RHSKnownZero & BKnownOne;
+ KnownOne |= RHSKnownOne & BKnownOne;
+ // assume(v << c = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+ m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ KnownZero |= RHSKnownZero.lshr(C->getZExtValue());
+ KnownOne |= RHSKnownOne.lshr(C->getZExtValue());
+ // assume(~(v << c) = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+ m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ KnownZero |= RHSKnownOne.lshr(C->getZExtValue());
+ KnownOne |= RHSKnownZero.lshr(C->getZExtValue());
+ // assume(v >> c = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
+ m_AShr(m_V,
+ m_ConstantInt(C))),
+ m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ KnownZero |= RHSKnownZero << C->getZExtValue();
+ KnownOne |= RHSKnownOne << C->getZExtValue();
+ // assume(~(v >> c) = a)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_c_ICmp(Pred, m_Not(m_CombineOr(
+ m_LShr(m_V, m_ConstantInt(C)),
+ m_AShr(m_V, m_ConstantInt(C)))),
+ m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ KnownZero |= RHSKnownOne << C->getZExtValue();
+ KnownOne |= RHSKnownZero << C->getZExtValue();
+ // assume(v >=_s c) where c is non-negative
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_V, m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_SGE &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownZero.isNegative()) {
+ // We know that the sign bit is zero.
+ KnownZero |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v >_s c) where c is at least -1.
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_V, m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_SGT &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) {
+ // We know that the sign bit is zero.
+ KnownZero |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v <=_s c) where c is negative
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_V, m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_SLE &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownOne.isNegative()) {
+ // We know that the sign bit is one.
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v <_s c) where c is non-positive
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_V, m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_SLT &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) {
+ // We know that the sign bit is one.
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v <=_u c)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_V, m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_ULE &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ // Whatever high bits in c are zero are known to be zero.
+ KnownZero |=
+ APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+ // assume(v <_u c)
+ } else if (match(I, m_Intrinsic<Intrinsic::assume>(
+ m_ICmp(Pred, m_V, m_Value(A)))) &&
+ Pred == ICmpInst::ICMP_ULT &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ // Whatever high bits in c are zero are known to be zero (if c is a power
+ // of 2, then one more).
+ if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I)))
+ KnownZero |=
+ APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
+ else
+ KnownZero |=
+ APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+ }
+ }
+}
+
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
///
@@ -224,8 +743,9 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth) {
+void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = KnownZero.getBitWidth();
@@ -270,6 +790,17 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
return;
}
+ // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+ // the bits of its aliasee.
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden()) {
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ } else {
+ computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1, Q);
+ }
+ return;
+ }
+
// The address of an aligned GlobalValue has trailing zeros.
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
unsigned Align = GV->getAlignment();
@@ -295,25 +826,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownOne.clearAllBits();
return;
}
- // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
- // the bits of its aliasee.
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->mayBeOverridden()) {
- KnownZero.clearAllBits(); KnownOne.clearAllBits();
- } else {
- computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
- }
- return;
- }
if (Argument *A = dyn_cast<Argument>(V)) {
- unsigned Align = 0;
+ unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
- if (A->hasByValOrInAllocaAttr()) {
- // Get alignment information off byval/inalloca arguments if specified in
- // the IR.
- Align = A->getParamAlignment();
- } else if (TD && A->hasStructRetAttr()) {
+ if (!Align && TD && A->hasStructRetAttr()) {
// An sret parameter has at least the ABI alignment of the return type.
Type *EltTy = cast<PointerType>(A->getType())->getElementType();
if (EltTy->isSized())
@@ -322,6 +839,10 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Align)
KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+
+ // Don't give up yet... there might be an assumption that provides more
+ // information...
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
return;
}
@@ -331,6 +852,9 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Depth == MaxDepth)
return; // Limit search depth.
+ // Check whether a nearby assume intrinsic can determine some known bits.
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
+
Operator *I = dyn_cast<Operator>(V);
if (!I) return;
@@ -343,8 +867,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
@@ -353,8 +877,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Or: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Output known-0 bits are only known if clear in both the LHS & RHS.
KnownZero &= KnownZero2;
@@ -363,8 +887,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Xor: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -376,19 +900,20 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::Mul: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+ Depth, Q);
break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
@@ -398,9 +923,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Select:
- computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
- Depth+1);
+ computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
@@ -415,6 +939,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break; // Can't work with floating point.
case Instruction::PtrToInt:
case Instruction::IntToPtr:
+ case Instruction::AddrSpaceCast: // Pointers could be different sizes.
// We can't handle these if we don't know the pointer size.
if (!TD) break;
// FALL THROUGH and handle them the same as zext/trunc.
@@ -435,7 +960,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
assert(SrcBitWidth && "SrcBitWidth can't be zero");
KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = KnownZero.zextOrTrunc(BitWidth);
KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
@@ -449,7 +974,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
break;
}
break;
@@ -460,7 +985,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -476,11 +1001,10 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero <<= ShiftAmt;
KnownOne <<= ShiftAmt;
KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
- break;
}
break;
case Instruction::LShr:
@@ -490,12 +1014,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
// Unsigned shift right.
- computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
// high bits known zero.
KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- break;
}
break;
case Instruction::AShr:
@@ -505,7 +1028,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -514,21 +1037,20 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero |= HighBits;
else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
KnownOne |= HighBits;
- break;
}
break;
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth);
+ Depth, Q);
break;
}
case Instruction::Add: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth);
+ Depth, Q);
break;
}
case Instruction::SRem:
@@ -536,7 +1058,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD,
+ Depth+1, Q);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -561,7 +1084,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
- Depth+1);
+ Depth+1, Q);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(BitWidth - 1);
@@ -574,7 +1097,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD,
- Depth+1);
+ Depth+1, Q);
KnownZero |= ~LowBits;
KnownOne &= LowBits;
break;
@@ -583,8 +1106,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
@@ -608,7 +1131,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// to determine if we can prove known low zero bits.
APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
- Depth+1);
+ Depth+1, Q);
unsigned TrailZ = LocalKnownZero.countTrailingOnes();
gep_type_iterator GTI = gep_type_begin(I);
@@ -644,7 +1167,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q);
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
LocalKnownZero.countTrailingOnes()));
@@ -686,11 +1209,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q);
// We need to take the minimum number of known bits
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
- computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1);
+ computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q);
KnownZero = APInt::getLowBitsSet(BitWidth,
std::min(KnownZero2.countTrailingOnes(),
@@ -722,7 +1245,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
- MaxDepth-1);
+ MaxDepth-1, Q);
KnownZero &= KnownZero2;
KnownOne &= KnownOne2;
// If all bits have been ruled out, there's no need to check
@@ -774,19 +1297,19 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Intrinsic::sadd_with_overflow:
computeKnownBitsAddSub(true, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
computeKnownBitsAddSub(false, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1),
false, KnownZero, KnownOne,
- KnownZero2, KnownOne2, TD, Depth);
+ KnownZero2, KnownOne2, TD, Depth, Q);
break;
}
}
@@ -796,10 +1319,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
-/// ComputeSignBit - Determine whether the sign bit is known to be zero or
-/// one. Convenience wrapper around computeKnownBits.
-void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth) {
+/// Determine whether the sign bit is known to be zero or one.
+/// Convenience wrapper around computeKnownBits.
+void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
unsigned BitWidth = getBitWidth(V->getType(), TD);
if (!BitWidth) {
KnownZero = false;
@@ -808,16 +1332,17 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
}
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
- computeKnownBits(V, ZeroBits, OneBits, TD, Depth);
+ computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q);
KnownOne = OneBits[BitWidth - 1];
KnownZero = ZeroBits[BitWidth - 1];
}
-/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one
+/// Return true if the given value is known to have exactly one
/// bit set when defined. For vectors return true if every element is known to
-/// be a power of two when defined. Supports values with integer or pointer
+/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
+bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+ const Query &Q) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return OrZero;
@@ -844,19 +1369,20 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
// A shift of a power of two is a power of two or zero.
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
match(V, m_Shr(m_Value(X), m_Value()))))
- return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth);
+ return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q);
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
- return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth);
+ return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
if (SelectInst *SI = dyn_cast<SelectInst>(V))
- return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) &&
- isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth);
+ return
+ isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
+ isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
// A power of two and'd with anything is a power of two or zero.
- if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) ||
- isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth))
+ if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) ||
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q))
return true;
// X & (-X) is always a power of two or zero.
if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
@@ -871,19 +1397,19 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
if (match(X, m_And(m_Specific(Y), m_Value())) ||
match(X, m_And(m_Value(), m_Specific(Y))))
- if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth))
+ if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
return true;
if (match(Y, m_And(m_Specific(X), m_Value())) ||
match(Y, m_And(m_Value(), m_Specific(X))))
- if (isKnownToBeAPowerOfTwo(X, OrZero, Depth))
+ if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
return true;
unsigned BitWidth = V->getType()->getScalarSizeInBits();
APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
- computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth);
+ computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q);
APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
- computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth);
+ computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q);
// If i8 V is a power of two or zero:
// ZeroBits: 1 1 1 0 1 1 1 1
// ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -900,7 +1426,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
// copying a sign bit (sdiv int_min, 2).
if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
- return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, Depth);
+ return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
+ Depth, Q);
}
return false;
@@ -913,7 +1440,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
///
/// Currently this routine does not support vector GEPs.
static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
- unsigned Depth) {
+ unsigned Depth, const Query &Q) {
if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
return false;
@@ -922,7 +1449,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
// If the base pointer is non-null, we cannot walk to a null address with an
// inbounds GEP in address space zero.
- if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth))
+ if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q))
return true;
// Past this, if we don't have DataLayout, we can't do much.
@@ -965,18 +1492,36 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
if (Depth++ >= MaxDepth)
continue;
- if (isKnownNonZero(GTI.getOperand(), DL, Depth))
+ if (isKnownNonZero(GTI.getOperand(), DL, Depth, Q))
return true;
}
return false;
}
-/// isKnownNonZero - Return true if the given value is known to be non-zero
-/// when defined. For vectors return true if every element is known to be
-/// non-zero when defined. Supports values with integer or pointer type and
-/// vectors of integers.
-bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
+/// Does the 'Range' metadata (which must be a valid MD_range operand list)
+/// ensure that the value it's attached to is never Value? 'RangeType' is
+/// is the type of the value described by the range.
+static bool rangeMetadataExcludesValue(MDNode* Ranges,
+ const APInt& Value) {
+ const unsigned NumRanges = Ranges->getNumOperands() / 2;
+ assert(NumRanges >= 1);
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Lower = cast<ConstantInt>(Ranges->getOperand(2*i + 0));
+ ConstantInt *Upper = cast<ConstantInt>(Ranges->getOperand(2*i + 1));
+ ConstantRange Range(Lower->getValue(), Upper->getValue());
+ if (Range.contains(Value))
+ return false;
+ }
+ return true;
+}
+
+/// Return true if the given value is known to be non-zero when defined.
+/// For vectors return true if every element is known to be non-zero when
+/// defined. Supports values with integer or pointer type and vectors of
+/// integers.
+bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return false;
@@ -987,6 +1532,18 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
return false;
}
+ if (Instruction* I = dyn_cast<Instruction>(V)) {
+ if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
+ // If the possible ranges don't contain zero, then the value is
+ // definitely non-zero.
+ if (IntegerType* Ty = dyn_cast<IntegerType>(V->getType())) {
+ const APInt ZeroValue(Ty->getBitWidth(), 0);
+ if (rangeMetadataExcludesValue(Ranges, ZeroValue))
+ return true;
+ }
+ }
+ }
+
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ >= MaxDepth)
return false;
@@ -996,7 +1553,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
if (isKnownNonNull(V))
return true;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- if (isGEPKnownNonNull(GEP, TD, Depth))
+ if (isGEPKnownNonNull(GEP, TD, Depth, Q))
return true;
}
@@ -1005,11 +1562,12 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// X | Y != 0 if X != 0 or Y != 0.
Value *X = nullptr, *Y = nullptr;
if (match(V, m_Or(m_Value(X), m_Value(Y))))
- return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q) ||
+ isKnownNonZero(Y, TD, Depth, Q);
// ext X != 0 if X != 0.
if (isa<SExtInst>(V) || isa<ZExtInst>(V))
- return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth, Q);
// shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
// if the lowest bit is shifted off the end.
@@ -1017,11 +1575,11 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// shl nuw can't remove any non-zero bits.
OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
if (BO->hasNoUnsignedWrap())
- return isKnownNonZero(X, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q);
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
if (KnownOne[0])
return true;
}
@@ -1031,28 +1589,29 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// shr exact can only shift out zero bits.
PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
if (BO->isExact())
- return isKnownNonZero(X, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q);
bool XKnownNonNegative, XKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
if (XKnownNegative)
return true;
}
// div exact can only produce a zero if the dividend is zero.
else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
- return isKnownNonZero(X, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q);
}
// X + Y.
else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
bool XKnownNonNegative, XKnownNegative;
bool YKnownNonNegative, YKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
- ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q);
// If X and Y are both non-negative (as signed values) then their sum is not
// zero unless both X and Y are zero.
if (XKnownNonNegative && YKnownNonNegative)
- if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+ if (isKnownNonZero(X, TD, Depth, Q) ||
+ isKnownNonZero(Y, TD, Depth, Q))
return true;
// If X and Y are both negative (as signed values) then their sum is not
@@ -1063,20 +1622,22 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- computeKnownBits(Y, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
}
// The sum of a non-negative number and a power of two is not zero.
- if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth))
+ if (XKnownNonNegative &&
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q))
return true;
- if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth))
+ if (YKnownNonNegative &&
+ isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q))
return true;
}
// X * Y.
@@ -1085,51 +1646,53 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
- isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth))
+ isKnownNonZero(X, TD, Depth, Q) &&
+ isKnownNonZero(Y, TD, Depth, Q))
return true;
}
// (C ? X : Y) != 0 if X != 0 and Y != 0.
else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
- if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
- isKnownNonZero(SI->getFalseValue(), TD, Depth))
+ if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) &&
+ isKnownNonZero(SI->getFalseValue(), TD, Depth, Q))
return true;
}
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
return KnownOne != 0;
}
-/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
-/// this predicate to simplify operations downstream. Mask is known to be zero
-/// for bits that V cannot have.
+/// Return true if 'V & Mask' is known to be zero. We use this predicate to
+/// simplify operations downstream. Mask is known to be zero for bits that V
+/// cannot have.
///
/// This function is defined on values with integer type, values with pointer
/// type (but only if TD is non-null), and vectors of integers. In the case
/// where V is a vector, the mask, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
- const DataLayout *TD, unsigned Depth) {
+bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
return (KnownZero & Mask) == Mask;
}
-/// ComputeNumSignBits - Return the number of times the sign bit of the
-/// register is replicated into the other bits. We know that at least 1 bit
-/// is always equal to the sign bit (itself), but other cases can give us
-/// information. For example, immediately after an "ashr X, 2", we know that
-/// the top 3 bits are all equal to each other, so we return 3.
+/// Return the number of times the sign bit of the register is replicated into
+/// the other bits. We know that at least 1 bit is always equal to the sign bit
+/// (itself), but other cases can give us information. For example, immediately
+/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
+/// other, so we return 3.
///
/// 'Op' must have a scalar integer type.
///
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
- unsigned Depth) {
+unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
+ unsigned Depth, const Query &Q) {
assert((TD || V->getType()->isIntOrIntVectorTy()) &&
"ComputeNumSignBits requires a DataLayout object to operate "
"on non-integer values!");
@@ -1150,10 +1713,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
default: break;
case Instruction::SExt:
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
- return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+ return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp;
case Instruction::AShr: {
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
// ashr X, C -> adds C sign bits. Vectors too.
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
@@ -1166,7 +1729,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
// shl destroys sign bits.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
Tmp2 = ShAmt->getZExtValue();
if (Tmp2 >= TyBits || // Bad shift.
Tmp2 >= Tmp) break; // Shifted all sign bits out.
@@ -1178,9 +1741,9 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
case Instruction::Or:
case Instruction::Xor: // NOT is handled here.
// Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
@@ -1189,22 +1752,22 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
break;
case Instruction::Select:
- Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q);
return std::min(Tmp, Tmp2);
case Instruction::Add:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -1217,19 +1780,19 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
return Tmp;
}
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
if (Tmp2 == 1) return 1;
return std::min(Tmp, Tmp2)-1;
case Instruction::Sub:
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
if (Tmp2 == 1) return 1;
// Handle NEG.
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
@@ -1245,7 +1808,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
@@ -1256,11 +1819,12 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
- Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q);
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
if (Tmp == 1) return Tmp;
Tmp = std::min(Tmp,
- ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+ ComputeNumSignBits(PN->getIncomingValue(i), TD,
+ Depth+1, Q));
}
return Tmp;
}
@@ -1275,7 +1839,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
// use this information.
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
APInt Mask;
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
@@ -1295,9 +1859,9 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
}
-/// ComputeMultiple - This function computes the integer multiple of Base that
-/// equals V. If successful, it returns true and returns the multiple in
-/// Multiple. If unsuccessful, it returns false. It looks
+/// This function computes the integer multiple of Base that equals V.
+/// If successful, it returns true and returns the multiple in
+/// Multiple. If unsuccessful, it returns false. It looks
/// through SExt instructions only if LookThroughSExt is true.
bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
bool LookThroughSExt, unsigned Depth) {
@@ -1415,8 +1979,8 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
return false;
}
-/// CannotBeNegativeZero - Return true if we can prove that the specified FP
-/// value is never equal to -0.0.
+/// Return true if we can prove that the specified FP value is never equal to
+/// -0.0.
///
/// NOTE: this function will need to be revisited when we support non-default
/// rounding modes!
@@ -1469,8 +2033,8 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return false;
}
-/// isBytewiseValue - If the specified value can be set by repeating the same
-/// byte in memory, return the i8 value that it is represented with. This is
+/// If the specified value can be set by repeating the same byte in memory,
+/// return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
/// byte store (e.g. i16 0x1234), return null.
@@ -1618,7 +2182,7 @@ static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
}
-/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// Given an aggregrate and an sequence of indices, see if
/// the scalar value indexed is already around as a register, for example if it
/// were inserted directly into the aggregrate.
///
@@ -1708,9 +2272,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
return nullptr;
}
-/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
-/// it can be expressed as a base pointer plus a constant offset. Return the
-/// base and offset to the caller.
+/// Analyze the specified pointer to see if it can be expressed as a base
+/// pointer plus a constant offset. Return the base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const DataLayout *DL) {
// Without DataLayout, conservatively assume 64-bit offsets, which is
@@ -1731,7 +2294,8 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
}
Ptr = GEP->getPointerOperand();
- } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
+ } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
+ Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
if (GA->mayBeOverridden())
@@ -1746,9 +2310,9 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
}
-/// getConstantStringInfo - This function computes the length of a
-/// null-terminated C string pointed to by V. If successful, it returns true
-/// and returns the string in Str. If unsuccessful, it returns false.
+/// This function computes the length of a null-terminated C string pointed to
+/// by V. If successful, it returns true and returns the string in Str.
+/// If unsuccessful, it returns false.
bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
uint64_t Offset, bool TrimAtNul) {
assert(V);
@@ -1832,16 +2396,16 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
// nodes.
// TODO: See if we can integrate these two together.
-/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
// Look through noop bitcast instructions.
V = V->stripPointerCasts();
// If this is a PHI node, there are two cases: either we have already seen it
// or we haven't.
if (PHINode *PN = dyn_cast<PHINode>(V)) {
- if (!PHIs.insert(PN))
+ if (!PHIs.insert(PN).second)
return ~0ULL; // already in the set.
// If it was new, see if all the input strings are the same length.
@@ -1881,7 +2445,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
return StrData.size()+1;
}
-/// GetStringLength - If we can compute the length of the string pointed to by
+/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
uint64_t llvm::GetStringLength(Value *V) {
if (!V->getType()->isPointerTy()) return 0;
@@ -1900,7 +2464,8 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+ Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
V = cast<Operator>(V)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (GA->mayBeOverridden())
@@ -1909,7 +2474,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
} else {
// See if InstructionSimplify knows any relevant tricks.
if (Instruction *I = dyn_cast<Instruction>(V))
- // TODO: Acquire a DominatorTree and use it.
+ // TODO: Acquire a DominatorTree and AssumptionTracker and use them.
if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
V = Simplified;
continue;
@@ -1934,7 +2499,7 @@ llvm::GetUnderlyingObjects(Value *V,
Value *P = Worklist.pop_back_val();
P = GetUnderlyingObject(P, TD, MaxLookup);
- if (!Visited.insert(P))
+ if (!Visited.insert(P).second)
continue;
if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
@@ -1953,9 +2518,7 @@ llvm::GetUnderlyingObjects(Value *V,
} while (!Worklist.empty());
}
-/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
-/// are lifetime markers.
-///
+/// Return true if the only users of this pointer are lifetime markers.
bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
for (const User *U : V->users()) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
@@ -1983,23 +2546,31 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
default:
return true;
case Instruction::UDiv:
- case Instruction::URem:
- // x / y is undefined if y == 0, but calculations like x / 3 are safe.
- return isKnownNonZero(Inst->getOperand(1), TD);
+ case Instruction::URem: {
+ // x / y is undefined if y == 0.
+ const APInt *V;
+ if (match(Inst->getOperand(1), m_APInt(V)))
+ return *V != 0;
+ return false;
+ }
case Instruction::SDiv:
case Instruction::SRem: {
- Value *Op = Inst->getOperand(1);
- // x / y is undefined if y == 0
- if (!isKnownNonZero(Op, TD))
- return false;
- // x / y might be undefined if y == -1
- unsigned BitWidth = getBitWidth(Op->getType(), TD);
- if (BitWidth == 0)
- return false;
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(Op, KnownZero, KnownOne, TD);
- return !!KnownZero;
+ // x / y is undefined if y == 0 or x == INT_MIN and y == -1
+ const APInt *X, *Y;
+ if (match(Inst->getOperand(1), m_APInt(Y))) {
+ if (*Y != 0) {
+ if (*Y == -1) {
+ // The numerator can't be MinSignedValue if the denominator is -1.
+ if (match(Inst->getOperand(0), m_APInt(X)))
+ return !Y->isMinSignedValue();
+ // The numerator *might* be MinSignedValue.
+ return false;
+ }
+ // The denominator is not 0 or -1, it's safe to proceed.
+ return true;
+ }
+ }
+ return false;
}
case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(Inst);
@@ -2010,41 +2581,44 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return LI->getPointerOperand()->isDereferenceablePointer(TD);
}
case Instruction::Call: {
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- // These synthetic intrinsics have no side-effects and just mark
- // information about their operands.
- // FIXME: There are other no-op synthetic instructions that potentially
- // should be considered at least *safe* to speculate...
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- return true;
-
- case Intrinsic::bswap:
- case Intrinsic::ctlz:
- case Intrinsic::ctpop:
- case Intrinsic::cttz:
- case Intrinsic::objectsize:
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::umul_with_overflow:
- case Intrinsic::usub_with_overflow:
- return true;
- // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set
- // errno like libm sqrt would.
- case Intrinsic::sqrt:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- return true;
- // TODO: some fp intrinsics are marked as having the same error handling
- // as libm. They're safe to speculate when they won't error.
- // TODO: are convert_{from,to}_fp16 safe?
- // TODO: can we list target-specific intrinsics here?
- default: break;
- }
- }
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ // These synthetic intrinsics have no side-effects and just mark
+ // information about their operands.
+ // FIXME: There are other no-op synthetic instructions that potentially
+ // should be considered at least *safe* to speculate...
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ return true;
+
+ case Intrinsic::bswap:
+ case Intrinsic::ctlz:
+ case Intrinsic::ctpop:
+ case Intrinsic::cttz:
+ case Intrinsic::objectsize:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ return true;
+ // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set
+ // errno like libm sqrt would.
+ case Intrinsic::sqrt:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ case Intrinsic::fabs:
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
+ return true;
+ // TODO: some fp intrinsics are marked as having the same error handling
+ // as libm. They're safe to speculate when they won't error.
+ // TODO: are convert_{from,to}_fp16 safe?
+ // TODO: can we list target-specific intrinsics here?
+ default: break;
+ }
+ }
return false; // The called function could have undefined behavior or
// side-effects, even if marked readnone nounwind.
}
@@ -2067,8 +2641,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
}
}
-/// isKnownNonNull - Return true if we know that the specified value is never
-/// null.
+/// Return true if we know that the specified value is never null.
bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
@@ -2081,8 +2654,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return !GV->hasExternalWeakLinkage();
+ // A Load tagged w/nonnull metadata is never null.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+ return LI->getMetadata(LLVMContext::MD_nonnull);
+
if (ImmutableCallSite CS = V)
- if (CS.paramHasAttr(0, Attribute::NonNull))
+ if (CS.isReturnNonNull())
return true;
// operator new never returns null.
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 1e5bcdd..6523bce 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -161,14 +161,10 @@ static const char *isLabelTail(const char *CurPtr) {
// Lexer definition.
//===----------------------------------------------------------------------===//
-LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
+LLLexer::LLLexer(StringRef StartBuf, SourceMgr &sm, SMDiagnostic &Err,
LLVMContext &C)
: CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
- CurPtr = CurBuf->getBufferStart();
-}
-
-std::string LLLexer::getFilename() const {
- return CurBuf->getBufferIdentifier();
+ CurPtr = CurBuf.begin();
}
int LLLexer::getNextChar() {
@@ -178,7 +174,7 @@ int LLLexer::getNextChar() {
case 0:
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf->getBufferEnd())
+ if (CurPtr-1 != CurBuf.end())
return 0; // Just whitespace.
// Otherwise, return end of file.
@@ -516,8 +512,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(private);
KEYWORD(internal);
- KEYWORD(linker_private); // NOTE: deprecated, for parser compatibility
- KEYWORD(linker_private_weak); // NOTE: deprecated, for parser compatibility
KEYWORD(available_externally);
KEYWORD(linkonce);
KEYWORD(linkonce_odr);
@@ -586,6 +580,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
KEYWORD(x86_thiscallcc);
+ KEYWORD(x86_vectorcallcc);
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
@@ -612,6 +607,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(byval);
KEYWORD(inalloca);
KEYWORD(cold);
+ KEYWORD(dereferenceable);
KEYWORD(inlinehint);
KEYWORD(inreg);
KEYWORD(jumptable);
@@ -669,6 +665,10 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(x);
KEYWORD(blockaddress);
+ // Use-list order directives.
+ KEYWORD(uselistorder);
+ KEYWORD(uselistorder_bb);
+
KEYWORD(personality);
KEYWORD(cleanup);
KEYWORD(catch);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index d42de57..219827f 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LIB_ASMPARSER_LLLEXER_H
-#define LIB_ASMPARSER_LLLEXER_H
+#ifndef LLVM_LIB_ASMPARSER_LLLEXER_H
+#define LLVM_LIB_ASMPARSER_LLLEXER_H
#include "LLToken.h"
#include "llvm/ADT/APFloat.h"
@@ -28,7 +28,7 @@ namespace llvm {
class LLLexer {
const char *CurPtr;
- MemoryBuffer *CurBuf;
+ StringRef CurBuf;
SMDiagnostic &ErrorInfo;
SourceMgr &SM;
LLVMContext &Context;
@@ -43,7 +43,7 @@ namespace llvm {
APSInt APSIntVal;
public:
- explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
+ explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &,
LLVMContext &C);
~LLLexer() {}
@@ -67,8 +67,6 @@ namespace llvm {
void Warning(LocTy WarningLoc, const Twine &Msg) const;
void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); }
- std::string getFilename() const;
-
private:
lltok::Kind LexToken();
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index be55ac6..2c835f9 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -129,28 +130,11 @@ bool LLParser::ValidateEndOfModule() {
}
}
- // If there are entries in ForwardRefBlockAddresses at this point, they are
- // references after the function was defined. Resolve those now.
- while (!ForwardRefBlockAddresses.empty()) {
- // Okay, we are referencing an already-parsed function, resolve them now.
- Function *TheFn = nullptr;
- const ValID &Fn = ForwardRefBlockAddresses.begin()->first;
- if (Fn.Kind == ValID::t_GlobalName)
- TheFn = M->getFunction(Fn.StrVal);
- else if (Fn.UIntVal < NumberedVals.size())
- TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
-
- if (!TheFn)
- return Error(Fn.Loc, "unknown function referenced by blockaddress");
-
- // Resolve all these references.
- if (ResolveForwardRefBlockAddresses(TheFn,
- ForwardRefBlockAddresses.begin()->second,
- nullptr))
- return true;
-
- ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
- }
+ // If there are entries in ForwardRefBlockAddresses at this point, the
+ // function was never defined.
+ if (!ForwardRefBlockAddresses.empty())
+ return Error(ForwardRefBlockAddresses.begin()->first.Loc,
+ "expected function name in blockaddress");
for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
if (NumberedTypes[i].second.isValid())
@@ -193,38 +177,6 @@ bool LLParser::ValidateEndOfModule() {
return false;
}
-bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
- std::vector<std::pair<ValID, GlobalValue*> > &Refs,
- PerFunctionState *PFS) {
- // Loop over all the references, resolving them.
- for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
- BasicBlock *Res;
- if (PFS) {
- if (Refs[i].first.Kind == ValID::t_LocalName)
- Res = PFS->GetBB(Refs[i].first.StrVal, Refs[i].first.Loc);
- else
- Res = PFS->GetBB(Refs[i].first.UIntVal, Refs[i].first.Loc);
- } else if (Refs[i].first.Kind == ValID::t_LocalID) {
- return Error(Refs[i].first.Loc,
- "cannot take address of numeric label after the function is defined");
- } else {
- Res = dyn_cast_or_null<BasicBlock>(
- TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
- }
-
- if (!Res)
- return Error(Refs[i].first.Loc,
- "referenced value is not a basic block");
-
- // Get the BlockAddress for this and update references to use it.
- BlockAddress *BA = BlockAddress::get(TheFn, Res);
- Refs[i].second->replaceAllUsesWith(BA);
- Refs[i].second->eraseFromParent();
- }
- return false;
-}
-
-
//===----------------------------------------------------------------------===//
// Top-Level Entities
//===----------------------------------------------------------------------===//
@@ -254,8 +206,6 @@ bool LLParser::ParseTopLevelEntities() {
// ('constant'|'global') ...
case lltok::kw_private: // OptionalLinkage
case lltok::kw_internal: // OptionalLinkage
- case lltok::kw_linker_private: // Obsolete OptionalLinkage
- case lltok::kw_linker_private_weak: // Obsolete OptionalLinkage
case lltok::kw_weak: // OptionalLinkage
case lltok::kw_weak_odr: // OptionalLinkage
case lltok::kw_linkonce: // OptionalLinkage
@@ -289,6 +239,9 @@ bool LLParser::ParseTopLevelEntities() {
}
case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break;
+ case lltok::kw_uselistorder: if (ParseUseListOrder()) return true; break;
+ case lltok::kw_uselistorder_bb:
+ if (ParseUseListOrderBB()) return true; break;
}
}
}
@@ -483,10 +436,10 @@ bool LLParser::ParseUnnamedGlobal() {
parseOptionalUnnamedAddr(UnnamedAddr))
return true;
- if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+ if (Lex.getKind() != lltok::kw_alias)
return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, TLM, UnnamedAddr);
- return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM,
+ return ParseAlias(Name, NameLoc, Linkage, Visibility, DLLStorageClass, TLM,
UnnamedAddr);
}
@@ -512,10 +465,11 @@ bool LLParser::ParseNamedGlobal() {
parseOptionalUnnamedAddr(UnnamedAddr))
return true;
- if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+ if (Lex.getKind() != lltok::kw_alias)
return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, TLM, UnnamedAddr);
- return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM,
+
+ return ParseAlias(Name, NameLoc, Linkage, Visibility, DLLStorageClass, TLM,
UnnamedAddr);
}
@@ -693,33 +647,29 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) {
}
/// ParseAlias:
-/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass
-/// OptionalThreadLocal OptionalUnNammedAddr 'alias'
-/// OptionalLinkage Aliasee
+/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility
+/// OptionalDLLStorageClass OptionalThreadLocal
+/// OptionalUnNammedAddr 'alias' Aliasee
///
/// Aliasee
/// ::= TypeAndValue
///
/// Everything through OptionalUnNammedAddr has already been parsed.
///
-bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
+bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
unsigned Visibility, unsigned DLLStorageClass,
GlobalVariable::ThreadLocalMode TLM,
bool UnnamedAddr) {
assert(Lex.getKind() == lltok::kw_alias);
Lex.Lex();
- LocTy LinkageLoc = Lex.getLoc();
- unsigned L;
- if (ParseOptionalLinkage(L))
- return true;
GlobalValue::LinkageTypes Linkage = (GlobalValue::LinkageTypes) L;
if(!GlobalAlias::isValidLinkage(Linkage))
- return Error(LinkageLoc, "invalid linkage type for alias");
+ return Error(NameLoc, "invalid linkage type for alias");
if (!isValidVisibilityForLinkage(Visibility, L))
- return Error(LinkageLoc,
+ return Error(NameLoc,
"symbol with local linkage must have default visibility");
Constant *Aliasee;
@@ -1052,6 +1002,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
"invalid use of attribute on a function");
break;
case lltok::kw_byval:
+ case lltok::kw_dereferenceable:
case lltok::kw_inalloca:
case lltok::kw_nest:
case lltok::kw_noalias:
@@ -1212,6 +1163,16 @@ bool LLParser::ParseUInt32(unsigned &Val) {
return false;
}
+/// ParseUInt64
+/// ::= uint64
+bool LLParser::ParseUInt64(uint64_t &Val) {
+ if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
+ return TokError("expected integer");
+ Val = Lex.getAPSIntVal().getLimitedValue();
+ Lex.Lex();
+ return false;
+}
+
/// ParseTLSModel
/// := 'localdynamic'
/// := 'initialexec'
@@ -1284,6 +1245,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
continue;
}
case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break;
+ case lltok::kw_dereferenceable: {
+ uint64_t Bytes;
+ if (ParseOptionalDereferenceableBytes(Bytes))
+ return true;
+ B.addDereferenceableAttr(Bytes);
+ continue;
+ }
case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break;
case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break;
case lltok::kw_nest: B.addAttribute(Attribute::Nest); break;
@@ -1341,6 +1309,13 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
switch (Token) {
default: // End of attributes.
return HaveError;
+ case lltok::kw_dereferenceable: {
+ uint64_t Bytes;
+ if (ParseOptionalDereferenceableBytes(Bytes))
+ return true;
+ B.addDereferenceableAttr(Bytes);
+ continue;
+ }
case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break;
case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break;
@@ -1409,10 +1384,6 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
/// ::= 'common'
/// ::= 'extern_weak'
/// ::= 'external'
-///
-/// Deprecated Values:
-/// ::= 'linker_private'
-/// ::= 'linker_private_weak'
bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
HasLinkage = false;
switch (Lex.getKind()) {
@@ -1430,15 +1401,6 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
case lltok::kw_common: Res = GlobalValue::CommonLinkage; break;
case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break;
case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break;
-
- case lltok::kw_linker_private:
- case lltok::kw_linker_private_weak:
- Lex.Warning("'" + Lex.getStrVal() + "' is deprecated, treating as"
- " PrivateLinkage");
- Lex.Lex();
- // treat linker_private and linker_private_weak as PrivateLinkage
- Res = GlobalValue::PrivateLinkage;
- return false;
}
Lex.Lex();
HasLinkage = true;
@@ -1486,6 +1448,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
/// ::= 'x86_thiscallcc'
+/// ::= 'x86_vectorcallcc'
/// ::= 'arm_apcscc'
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
@@ -1502,7 +1465,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'preserve_allcc'
/// ::= 'cc' UINT
///
-bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
+bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
switch (Lex.getKind()) {
default: CC = CallingConv::C; return false;
case lltok::kw_ccc: CC = CallingConv::C; break;
@@ -1511,6 +1474,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+ case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
@@ -1527,12 +1491,8 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break;
case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break;
case lltok::kw_cc: {
- unsigned ArbitraryCC;
Lex.Lex();
- if (ParseUInt32(ArbitraryCC))
- return true;
- CC = static_cast<CallingConv::ID>(ArbitraryCC);
- return false;
+ return ParseUInt32(CC);
}
}
@@ -1606,6 +1566,26 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
return false;
}
+/// ParseOptionalDereferenceableBytes
+/// ::= /* empty */
+/// ::= 'dereferenceable' '(' 4 ')'
+bool LLParser::ParseOptionalDereferenceableBytes(uint64_t &Bytes) {
+ Bytes = 0;
+ if (!EatIfPresent(lltok::kw_dereferenceable))
+ return false;
+ LocTy ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::lparen))
+ return Error(ParenLoc, "expected '('");
+ LocTy DerefLoc = Lex.getLoc();
+ if (ParseUInt64(Bytes)) return true;
+ ParenLoc = Lex.getLoc();
+ if (!EatIfPresent(lltok::rparen))
+ return Error(ParenLoc, "expected ')'");
+ if (!Bytes)
+ return Error(DerefLoc, "dereferenceable bytes must be non-zero");
+ return false;
+}
+
/// ParseOptionalCommaAlign
/// ::=
/// ::= ',' align 4
@@ -1837,7 +1817,8 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
/// Arg
/// ::= Type OptionalAttributes Value OptionalAttributes
bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
- PerFunctionState &PFS) {
+ PerFunctionState &PFS, bool IsMustTailCall,
+ bool InVarArgsFunc) {
if (ParseToken(lltok::lparen, "expected '(' in call"))
return true;
@@ -1848,6 +1829,17 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
ParseToken(lltok::comma, "expected ',' in argument list"))
return true;
+ // Parse an ellipsis if this is a musttail call in a variadic function.
+ if (Lex.getKind() == lltok::dotdotdot) {
+ const char *Msg = "unexpected ellipsis in argument list for ";
+ if (!IsMustTailCall)
+ return TokError(Twine(Msg) + "non-musttail call");
+ if (!InVarArgsFunc)
+ return TokError(Twine(Msg) + "musttail call in non-varargs function");
+ Lex.Lex(); // Lex the '...', it is purely for readability.
+ return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+ }
+
// Parse the argument.
LocTy ArgLoc;
Type *ArgTy = nullptr;
@@ -1864,6 +1856,10 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
ArgAttrs)));
}
+ if (IsMustTailCall && InVarArgsFunc)
+ return TokError("expected '...' at end of argument list for musttail call "
+ "in varargs function");
+
Lex.Lex(); // Lex the ')'.
return false;
}
@@ -2159,28 +2155,6 @@ LLParser::PerFunctionState::~PerFunctionState() {
}
bool LLParser::PerFunctionState::FinishFunction() {
- // Check to see if someone took the address of labels in this block.
- if (!P.ForwardRefBlockAddresses.empty()) {
- ValID FunctionID;
- if (!F.getName().empty()) {
- FunctionID.Kind = ValID::t_GlobalName;
- FunctionID.StrVal = F.getName();
- } else {
- FunctionID.Kind = ValID::t_GlobalID;
- FunctionID.UIntVal = FunctionNumber;
- }
-
- std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
- FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
- if (FRBAI != P.ForwardRefBlockAddresses.end()) {
- // Resolve all these references.
- if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
- return true;
-
- P.ForwardRefBlockAddresses.erase(FRBAI);
- }
- }
-
if (!ForwardRefVals.empty())
return P.Error(ForwardRefVals.begin()->second.second,
"use of undefined value '%" + ForwardRefVals.begin()->first +
@@ -2222,7 +2196,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
}
// Don't make placeholders with invalid type.
- if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+ if (!Ty->isFirstClassType()) {
P.Error(Loc, "invalid use of a non-first-class type");
return nullptr;
}
@@ -2263,7 +2237,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
return nullptr;
}
- if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
+ if (!Ty->isFirstClassType()) {
P.Error(Loc, "invalid use of a non-first-class type");
return nullptr;
}
@@ -2566,12 +2540,56 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
return Error(Label.Loc, "expected basic block name in blockaddress");
- // Make a global variable as a placeholder for this reference.
- GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
- false, GlobalValue::InternalLinkage,
- nullptr, "");
- ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef));
- ID.ConstantVal = FwdRef;
+ // Try to find the function (but skip it if it's forward-referenced).
+ GlobalValue *GV = nullptr;
+ if (Fn.Kind == ValID::t_GlobalID) {
+ if (Fn.UIntVal < NumberedVals.size())
+ GV = NumberedVals[Fn.UIntVal];
+ } else if (!ForwardRefVals.count(Fn.StrVal)) {
+ GV = M->getNamedValue(Fn.StrVal);
+ }
+ Function *F = nullptr;
+ if (GV) {
+ // Confirm that it's actually a function with a definition.
+ if (!isa<Function>(GV))
+ return Error(Fn.Loc, "expected function name in blockaddress");
+ F = cast<Function>(GV);
+ if (F->isDeclaration())
+ return Error(Fn.Loc, "cannot take blockaddress inside a declaration");
+ }
+
+ if (!F) {
+ // Make a global variable as a placeholder for this reference.
+ GlobalValue *&FwdRef = ForwardRefBlockAddresses[Fn][Label];
+ if (!FwdRef)
+ FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false,
+ GlobalValue::InternalLinkage, nullptr, "");
+ ID.ConstantVal = FwdRef;
+ ID.Kind = ValID::t_Constant;
+ return false;
+ }
+
+ // We found the function; now find the basic block. Don't use PFS, since we
+ // might be inside a constant expression.
+ BasicBlock *BB;
+ if (BlockAddressPFS && F == &BlockAddressPFS->getFunction()) {
+ if (Label.Kind == ValID::t_LocalID)
+ BB = BlockAddressPFS->GetBB(Label.UIntVal, Label.Loc);
+ else
+ BB = BlockAddressPFS->GetBB(Label.StrVal, Label.Loc);
+ if (!BB)
+ return Error(Label.Loc, "referenced value is not a basic block");
+ } else {
+ if (Label.Kind == ValID::t_LocalID)
+ return Error(Label.Loc, "cannot take address of numeric label after "
+ "the function is defined");
+ BB = dyn_cast_or_null<BasicBlock>(
+ F->getValueSymbolTable().lookup(Label.StrVal));
+ if (!BB)
+ return Error(Label.Loc, "referenced value is not a basic block");
+ }
+
+ ID.ConstantVal = BlockAddress::get(F, BB);
ID.Kind = ValID::t_Constant;
return false;
}
@@ -2886,7 +2904,7 @@ bool LLParser::parseOptionalComdat(Comdat *&C) {
/// ParseGlobalValueVector
/// ::= /*empty*/
/// ::= TypeAndValue (',' TypeAndValue)*
-bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts) {
// Empty list.
if (Lex.getKind() == lltok::rbrace ||
Lex.getKind() == lltok::rsquare ||
@@ -3111,7 +3129,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
unsigned Visibility;
unsigned DLLStorageClass;
AttrBuilder RetAttrs;
- CallingConv::ID CC;
+ unsigned CC;
Type *RetType = nullptr;
LocTy RetTypeLoc = Lex.getLoc();
if (ParseOptionalLinkage(Linkage) ||
@@ -3314,13 +3332,63 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
ArgList[i].Name + "'");
}
+ if (isDefine)
+ return false;
+
+ // Check the declaration has no block address forward references.
+ ValID ID;
+ if (FunctionName.empty()) {
+ ID.Kind = ValID::t_GlobalID;
+ ID.UIntVal = NumberedVals.size() - 1;
+ } else {
+ ID.Kind = ValID::t_GlobalName;
+ ID.StrVal = FunctionName;
+ }
+ auto Blocks = ForwardRefBlockAddresses.find(ID);
+ if (Blocks != ForwardRefBlockAddresses.end())
+ return Error(Blocks->first.Loc,
+ "cannot take blockaddress inside a declaration");
return false;
}
+bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() {
+ ValID ID;
+ if (FunctionNumber == -1) {
+ ID.Kind = ValID::t_GlobalName;
+ ID.StrVal = F.getName();
+ } else {
+ ID.Kind = ValID::t_GlobalID;
+ ID.UIntVal = FunctionNumber;
+ }
+
+ auto Blocks = P.ForwardRefBlockAddresses.find(ID);
+ if (Blocks == P.ForwardRefBlockAddresses.end())
+ return false;
+
+ for (const auto &I : Blocks->second) {
+ const ValID &BBID = I.first;
+ GlobalValue *GV = I.second;
+
+ assert((BBID.Kind == ValID::t_LocalID || BBID.Kind == ValID::t_LocalName) &&
+ "Expected local id or name");
+ BasicBlock *BB;
+ if (BBID.Kind == ValID::t_LocalName)
+ BB = GetBB(BBID.StrVal, BBID.Loc);
+ else
+ BB = GetBB(BBID.UIntVal, BBID.Loc);
+ if (!BB)
+ return P.Error(BBID.Loc, "referenced value is not a basic block");
+
+ GV->replaceAllUsesWith(BlockAddress::get(&F, BB));
+ GV->eraseFromParent();
+ }
+
+ P.ForwardRefBlockAddresses.erase(Blocks);
+ return false;
+}
/// ParseFunctionBody
-/// ::= '{' BasicBlock+ '}'
-///
+/// ::= '{' BasicBlock+ UseListOrderDirective* '}'
bool LLParser::ParseFunctionBody(Function &Fn) {
if (Lex.getKind() != lltok::lbrace)
return TokError("expected '{' in function body");
@@ -3331,13 +3399,24 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
PerFunctionState PFS(*this, Fn, FunctionNumber);
+ // Resolve block addresses and allow basic blocks to be forward-declared
+ // within this function.
+ if (PFS.resolveForwardRefBlockAddresses())
+ return true;
+ SaveAndRestore<PerFunctionState *> ScopeExit(BlockAddressPFS, &PFS);
+
// We need at least one basic block.
- if (Lex.getKind() == lltok::rbrace)
+ if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_uselistorder)
return TokError("function body requires at least one basic block");
- while (Lex.getKind() != lltok::rbrace)
+ while (Lex.getKind() != lltok::rbrace &&
+ Lex.getKind() != lltok::kw_uselistorder)
if (ParseBasicBlock(PFS)) return true;
+ while (Lex.getKind() != lltok::rbrace)
+ if (ParseUseListOrder(&PFS))
+ return true;
+
// Eat the }.
Lex.Lex();
@@ -3656,7 +3735,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
ParseTypeAndBasicBlock(DestBB, PFS))
return true;
- if (!SeenCases.insert(Constant))
+ if (!SeenCases.insert(Constant).second)
return Error(CondLoc, "duplicate case value in switch");
if (!isa<ConstantInt>(Constant))
return Error(CondLoc, "case value is not a constant integer");
@@ -3722,7 +3801,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
AttrBuilder RetAttrs, FnAttrs;
std::vector<unsigned> FwdRefAttrGrps;
LocTy NoBuiltinLoc;
- CallingConv::ID CC;
+ unsigned CC;
Type *RetType = nullptr;
LocTy RetTypeLoc;
ValID CalleeID;
@@ -4136,7 +4215,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
AttrBuilder RetAttrs, FnAttrs;
std::vector<unsigned> FwdRefAttrGrps;
LocTy BuiltinLoc;
- CallingConv::ID CC;
+ unsigned CC;
Type *RetType = nullptr;
LocTy RetTypeLoc;
ValID CalleeID;
@@ -4149,7 +4228,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
ParseOptionalReturnAttrs(RetAttrs) ||
ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
ParseValID(CalleeID) ||
- ParseParameterList(ArgList, PFS) ||
+ ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail,
+ PFS.getFunction().isVarArg()) ||
ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
BuiltinLoc))
return true;
@@ -4596,3 +4676,135 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
return false;
}
+
+//===----------------------------------------------------------------------===//
+// Use-list order directives.
+//===----------------------------------------------------------------------===//
+bool LLParser::sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes,
+ SMLoc Loc) {
+ if (V->use_empty())
+ return Error(Loc, "value has no uses");
+
+ unsigned NumUses = 0;
+ SmallDenseMap<const Use *, unsigned, 16> Order;
+ for (const Use &U : V->uses()) {
+ if (++NumUses > Indexes.size())
+ break;
+ Order[&U] = Indexes[NumUses - 1];
+ }
+ if (NumUses < 2)
+ return Error(Loc, "value only has one use");
+ if (Order.size() != Indexes.size() || NumUses > Indexes.size())
+ return Error(Loc, "wrong number of indexes, expected " +
+ Twine(std::distance(V->use_begin(), V->use_end())));
+
+ V->sortUseList([&](const Use &L, const Use &R) {
+ return Order.lookup(&L) < Order.lookup(&R);
+ });
+ return false;
+}
+
+/// ParseUseListOrderIndexes
+/// ::= '{' uint32 (',' uint32)+ '}'
+bool LLParser::ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes) {
+ SMLoc Loc = Lex.getLoc();
+ if (ParseToken(lltok::lbrace, "expected '{' here"))
+ return true;
+ if (Lex.getKind() == lltok::rbrace)
+ return Lex.Error("expected non-empty list of uselistorder indexes");
+
+ // Use Offset, Max, and IsOrdered to check consistency of indexes. The
+ // indexes should be distinct numbers in the range [0, size-1], and should
+ // not be in order.
+ unsigned Offset = 0;
+ unsigned Max = 0;
+ bool IsOrdered = true;
+ assert(Indexes.empty() && "Expected empty order vector");
+ do {
+ unsigned Index;
+ if (ParseUInt32(Index))
+ return true;
+
+ // Update consistency checks.
+ Offset += Index - Indexes.size();
+ Max = std::max(Max, Index);
+ IsOrdered &= Index == Indexes.size();
+
+ Indexes.push_back(Index);
+ } while (EatIfPresent(lltok::comma));
+
+ if (ParseToken(lltok::rbrace, "expected '}' here"))
+ return true;
+
+ if (Indexes.size() < 2)
+ return Error(Loc, "expected >= 2 uselistorder indexes");
+ if (Offset != 0 || Max >= Indexes.size())
+ return Error(Loc, "expected distinct uselistorder indexes in range [0, size)");
+ if (IsOrdered)
+ return Error(Loc, "expected uselistorder indexes to change the order");
+
+ return false;
+}
+
+/// ParseUseListOrder
+/// ::= 'uselistorder' Type Value ',' UseListOrderIndexes
+bool LLParser::ParseUseListOrder(PerFunctionState *PFS) {
+ SMLoc Loc = Lex.getLoc();
+ if (ParseToken(lltok::kw_uselistorder, "expected uselistorder directive"))
+ return true;
+
+ Value *V;
+ SmallVector<unsigned, 16> Indexes;
+ if (ParseTypeAndValue(V, PFS) ||
+ ParseToken(lltok::comma, "expected comma in uselistorder directive") ||
+ ParseUseListOrderIndexes(Indexes))
+ return true;
+
+ return sortUseListOrder(V, Indexes, Loc);
+}
+
+/// ParseUseListOrderBB
+/// ::= 'uselistorder_bb' @foo ',' %bar ',' UseListOrderIndexes
+bool LLParser::ParseUseListOrderBB() {
+ assert(Lex.getKind() == lltok::kw_uselistorder_bb);
+ SMLoc Loc = Lex.getLoc();
+ Lex.Lex();
+
+ ValID Fn, Label;
+ SmallVector<unsigned, 16> Indexes;
+ if (ParseValID(Fn) ||
+ ParseToken(lltok::comma, "expected comma in uselistorder_bb directive") ||
+ ParseValID(Label) ||
+ ParseToken(lltok::comma, "expected comma in uselistorder_bb directive") ||
+ ParseUseListOrderIndexes(Indexes))
+ return true;
+
+ // Check the function.
+ GlobalValue *GV;
+ if (Fn.Kind == ValID::t_GlobalName)
+ GV = M->getNamedValue(Fn.StrVal);
+ else if (Fn.Kind == ValID::t_GlobalID)
+ GV = Fn.UIntVal < NumberedVals.size() ? NumberedVals[Fn.UIntVal] : nullptr;
+ else
+ return Error(Fn.Loc, "expected function name in uselistorder_bb");
+ if (!GV)
+ return Error(Fn.Loc, "invalid function forward reference in uselistorder_bb");
+ auto *F = dyn_cast<Function>(GV);
+ if (!F)
+ return Error(Fn.Loc, "expected function name in uselistorder_bb");
+ if (F->isDeclaration())
+ return Error(Fn.Loc, "invalid declaration in uselistorder_bb");
+
+ // Check the basic block.
+ if (Label.Kind == ValID::t_LocalID)
+ return Error(Label.Loc, "invalid numeric label in uselistorder_bb");
+ if (Label.Kind != ValID::t_LocalName)
+ return Error(Label.Loc, "expected basic block name in uselistorder_bb");
+ Value *V = F->getValueSymbolTable().lookup(Label.StrVal);
+ if (!V)
+ return Error(Label.Loc, "invalid basic block in uselistorder_bb");
+ if (!isa<BasicBlock>(V))
+ return Error(Label.Loc, "expected basic block in uselistorder_bb");
+
+ return sortUseListOrder(V, Indexes, Loc);
+}
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 2efb260..aa62bcc 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ASMPARSER_LLPARSER_H
-#define LLVM_ASMPARSER_LLPARSER_H
+#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
+#define LLVM_LIB_ASMPARSER_LLPARSER_H
#include "LLLexer.h"
#include "llvm/ADT/DenseMap.h"
@@ -128,17 +128,21 @@ namespace llvm {
// References to blockaddress. The key is the function ValID, the value is
// a list of references to blocks in that function.
- std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
- ForwardRefBlockAddresses;
+ std::map<ValID, std::map<ValID, GlobalValue *>> ForwardRefBlockAddresses;
+ class PerFunctionState;
+ /// Reference to per-function state to allow basic blocks to be
+ /// forward-referenced by blockaddress instructions within the same
+ /// function.
+ PerFunctionState *BlockAddressPFS;
// Attribute builder reference information.
std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
public:
- LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
- Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
- M(m) {}
+ LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *m)
+ : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m),
+ BlockAddressPFS(nullptr) {}
bool Run();
LLVMContext &getContext() { return Context; }
@@ -202,6 +206,11 @@ namespace llvm {
Loc = Lex.getLoc();
return ParseUInt32(Val);
}
+ bool ParseUInt64(uint64_t &Val);
+ bool ParseUInt64(uint64_t &Val, LocTy &Loc) {
+ Loc = Lex.getLoc();
+ return ParseUInt64(Val);
+ }
bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
@@ -217,8 +226,9 @@ namespace llvm {
}
bool ParseOptionalVisibility(unsigned &Visibility);
bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
- bool ParseOptionalCallingConv(CallingConv::ID &CC);
+ bool ParseOptionalCallingConv(unsigned &CC);
bool ParseOptionalAlignment(unsigned &Alignment);
+ bool ParseOptionalDereferenceableBytes(uint64_t &Bytes);
bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
AtomicOrdering &Ordering);
bool ParseOrdering(AtomicOrdering &Ordering);
@@ -252,8 +262,8 @@ namespace llvm {
bool HasLinkage, unsigned Visibility,
unsigned DLLStorageClass,
GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
- bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility,
- unsigned DLLStorageClass,
+ bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Linkage,
+ unsigned Visibility, unsigned DLLStorageClass,
GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
bool parseComdat();
bool ParseStandaloneMetadata();
@@ -321,6 +331,8 @@ namespace llvm {
/// unnamed. If there is an error, this returns null otherwise it returns
/// the block being defined.
BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+
+ bool resolveForwardRefBlockAddresses();
};
bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
@@ -360,13 +372,15 @@ namespace llvm {
: Loc(loc), V(v), Attrs(attrs) {}
};
bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
- PerFunctionState &PFS);
+ PerFunctionState &PFS,
+ bool IsMustTailCall = false,
+ bool InVarArgsFunc = false);
// Constant Parsing.
bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
bool ParseGlobalValue(Type *Ty, Constant *&V);
bool ParseGlobalTypeAndValue(Constant *&V);
- bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts);
bool parseOptionalComdat(Comdat *&C);
bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
@@ -427,9 +441,11 @@ namespace llvm {
int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
- bool ResolveForwardRefBlockAddresses(Function *TheFn,
- std::vector<std::pair<ValID, GlobalValue*> > &Refs,
- PerFunctionState *PFS);
+ // Use-list order directives.
+ bool ParseUseListOrder(PerFunctionState *PFS = nullptr);
+ bool ParseUseListOrderBB();
+ bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
+ bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
};
} // End llvm namespace
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 534d824..f9821f7 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LIBS_ASMPARSER_LLTOKEN_H
-#define LIBS_ASMPARSER_LLTOKEN_H
+#ifndef LLVM_LIB_ASMPARSER_LLTOKEN_H
+#define LLVM_LIB_ASMPARSER_LLTOKEN_H
namespace llvm {
namespace lltok {
@@ -39,8 +39,6 @@ namespace lltok {
kw_private,
kw_internal,
- kw_linker_private, // NOTE: deprecated, for parser compatibility
- kw_linker_private_weak, // NOTE: deprecated, for parser compatibility
kw_linkonce, kw_linkonce_odr,
kw_weak, // Used as a linkage, and a modifier for "cmpxchg".
kw_weak_odr, kw_appending,
@@ -89,7 +87,7 @@ namespace lltok {
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
kw_intel_ocl_bicc,
- kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_vectorcallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_msp430_intrcc,
kw_ptx_kernel, kw_ptx_device,
@@ -106,6 +104,7 @@ namespace lltok {
kw_byval,
kw_inalloca,
kw_cold,
+ kw_dereferenceable,
kw_inlinehint,
kw_inreg,
kw_jumptable,
@@ -181,6 +180,9 @@ namespace lltok {
kw_extractelement, kw_insertelement, kw_shufflevector,
kw_extractvalue, kw_insertvalue, kw_blockaddress,
+ // Use-list order directives.
+ kw_uselistorder, kw_uselistorder_bb,
+
// Unsigned Valued tokens (UIntVal).
GlobalID, // @42
LocalVarID, // %42
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 91bb51c..0815907 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -21,26 +21,29 @@
#include <system_error>
using namespace llvm;
-Module *llvm::ParseAssembly(MemoryBuffer *F,
- Module *M,
- SMDiagnostic &Err,
- LLVMContext &Context) {
+bool llvm::parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err) {
SourceMgr SM;
- SM.AddNewSourceBuffer(F, SMLoc());
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F, false);
+ SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
- // If we are parsing into an existing module, do it.
- if (M)
- return LLParser(F, SM, Err, M).Run() ? nullptr : M;
+ return LLParser(F.getBuffer(), SM, Err, &M).Run();
+}
+
+std::unique_ptr<Module> llvm::parseAssembly(MemoryBufferRef F,
+ SMDiagnostic &Err,
+ LLVMContext &Context) {
+ std::unique_ptr<Module> M =
+ make_unique<Module>(F.getBufferIdentifier(), Context);
- // Otherwise create a new module.
- std::unique_ptr<Module> M2(new Module(F->getBufferIdentifier(), Context));
- if (LLParser(F, SM, Err, M2.get()).Run())
+ if (parseAssemblyInto(F, *M, Err))
return nullptr;
- return M2.release();
+
+ return std::move(M);
}
-Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
- LLVMContext &Context) {
+std::unique_ptr<Module> llvm::parseAssemblyFile(StringRef Filename,
+ SMDiagnostic &Err,
+ LLVMContext &Context) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = FileOrErr.getError()) {
@@ -49,13 +52,12 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
return nullptr;
}
- return ParseAssembly(FileOrErr.get().release(), nullptr, Err, Context);
+ return parseAssembly(FileOrErr.get()->getMemBufferRef(), Err, Context);
}
-Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
- SMDiagnostic &Err, LLVMContext &Context) {
- MemoryBuffer *F =
- MemoryBuffer::getMemBuffer(StringRef(AsmString), "<string>");
-
- return ParseAssembly(F, M, Err, Context);
+std::unique_ptr<Module> llvm::parseAssemblyString(StringRef AsmString,
+ SMDiagnostic &Err,
+ LLVMContext &Context) {
+ MemoryBufferRef F(AsmString, "<string>");
+ return parseAssembly(F, Err, Context);
}
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index b5886c1..9b3acb5 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -31,7 +31,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
LLVMModuleRef *OutModule,
char **OutMessage) {
ErrorOr<Module *> ModuleOrErr =
- parseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef));
+ parseBitcodeFile(unwrap(MemBuf)->getMemBufferRef(), *unwrap(ContextRef));
if (std::error_code EC = ModuleOrErr.getError()) {
if (OutMessage)
*OutMessage = strdup(EC.message().c_str());
@@ -51,8 +51,11 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
LLVMModuleRef *OutM,
char **OutMessage) {
std::string Message;
+ std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
+
ErrorOr<Module *> ModuleOrErr =
- getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef));
+ getLazyBitcodeModule(std::move(Owner), *unwrap(ContextRef));
+ Owner.release();
if (std::error_code EC = ModuleOrErr.getError()) {
*OutM = wrap((Module *)nullptr);
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 192f753..b2ca22c 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -25,17 +25,45 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ManagedStatic.h"
+
using namespace llvm;
enum {
SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
};
-void BitcodeReader::materializeForwardReferencedFunctions() {
- while (!BlockAddrFwdRefs.empty()) {
- Function *F = BlockAddrFwdRefs.begin()->first;
- F->Materialize();
+std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
+ if (WillMaterializeAllForwardRefs)
+ return std::error_code();
+
+ // Prevent recursion.
+ WillMaterializeAllForwardRefs = true;
+
+ while (!BasicBlockFwdRefQueue.empty()) {
+ Function *F = BasicBlockFwdRefQueue.front();
+ BasicBlockFwdRefQueue.pop_front();
+ assert(F && "Expected valid function");
+ if (!BasicBlockFwdRefs.count(F))
+ // Already materialized.
+ continue;
+
+ // Check for a function that isn't materializable to prevent an infinite
+ // loop. When parsing a blockaddress stored in a global variable, there
+ // isn't a trivial way to check if a function will have a body without a
+ // linear search through FunctionsWithBodies, so just check it here.
+ if (!F->isMaterializable())
+ return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress);
+
+ // Try to materialize F.
+ if (std::error_code EC = materialize(F))
+ return EC;
}
+ assert(BasicBlockFwdRefs.empty() && "Function missing from queue");
+
+ // Reset state.
+ WillMaterializeAllForwardRefs = false;
+ return std::error_code();
}
void BitcodeReader::FreeState() {
@@ -51,7 +79,8 @@ void BitcodeReader::FreeState() {
DeferredFunctionInfo.clear();
MDKindMap.clear();
- assert(BlockAddrFwdRefs.empty() && "Unresolved blockaddress fwd references");
+ assert(BasicBlockFwdRefs.empty() && "Unresolved blockaddress fwd references");
+ BasicBlockFwdRefQueue.clear();
}
//===----------------------------------------------------------------------===//
@@ -487,10 +516,10 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
std::error_code BitcodeReader::ParseAttributeBlock() {
if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (!MAttributes.empty())
- return Error(InvalidMultipleBlocks);
+ return Error(BitcodeError::InvalidMultipleBlocks);
SmallVector<uint64_t, 64> Record;
@@ -503,7 +532,7 @@ std::error_code BitcodeReader::ParseAttributeBlock() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
case BitstreamEntry::Record:
@@ -519,7 +548,7 @@ std::error_code BitcodeReader::ParseAttributeBlock() {
case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...]
// FIXME: Remove in 4.0.
if (Record.size() & 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
AttrBuilder B;
@@ -588,6 +617,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
return Attribute::NonLazyBind;
case bitc::ATTR_KIND_NON_NULL:
return Attribute::NonNull;
+ case bitc::ATTR_KIND_DEREFERENCEABLE:
+ return Attribute::Dereferenceable;
case bitc::ATTR_KIND_NO_RED_ZONE:
return Attribute::NoRedZone;
case bitc::ATTR_KIND_NO_RETURN:
@@ -635,16 +666,16 @@ std::error_code BitcodeReader::ParseAttrKind(uint64_t Code,
Attribute::AttrKind *Kind) {
*Kind = GetAttrFromCode(Code);
if (*Kind == Attribute::None)
- return Error(InvalidValue);
+ return Error(BitcodeError::InvalidValue);
return std::error_code();
}
std::error_code BitcodeReader::ParseAttributeGroupBlock() {
if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (!MAttributeGroups.empty())
- return Error(InvalidMultipleBlocks);
+ return Error(BitcodeError::InvalidMultipleBlocks);
SmallVector<uint64_t, 64> Record;
@@ -655,7 +686,7 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
case BitstreamEntry::Record:
@@ -670,7 +701,7 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
break;
case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
uint64_t GrpID = Record[0];
uint64_t Idx = Record[1]; // Index of the object this attribute refers to.
@@ -683,14 +714,16 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
return EC;
B.addAttribute(Kind);
- } else if (Record[i] == 1) { // Align attribute
+ } else if (Record[i] == 1) { // Integer attribute
Attribute::AttrKind Kind;
if (std::error_code EC = ParseAttrKind(Record[++i], &Kind))
return EC;
if (Kind == Attribute::Alignment)
B.addAlignmentAttr(Record[++i]);
- else
+ else if (Kind == Attribute::StackAlignment)
B.addStackAlignmentAttr(Record[++i]);
+ else if (Kind == Attribute::Dereferenceable)
+ B.addDereferenceableAttr(Record[++i]);
} else { // String attribute
assert((Record[i] == 3 || Record[i] == 4) &&
"Invalid attribute group entry");
@@ -723,14 +756,14 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
std::error_code BitcodeReader::ParseTypeTable() {
if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
return ParseTypeTableBody();
}
std::error_code BitcodeReader::ParseTypeTableBody() {
if (!TypeList.empty())
- return Error(InvalidMultipleBlocks);
+ return Error(BitcodeError::InvalidMultipleBlocks);
SmallVector<uint64_t, 64> Record;
unsigned NumRecords = 0;
@@ -744,10 +777,10 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
if (NumRecords != TypeList.size())
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
@@ -759,12 +792,12 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
Type *ResultTy = nullptr;
switch (Stream.readRecord(Entry.ID, Record)) {
default:
- return Error(InvalidValue);
+ return Error(BitcodeError::InvalidValue);
case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
// TYPE_CODE_NUMENTRY contains a count of the number of types in the
// type list. This allows us to reserve space.
if (Record.size() < 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
TypeList.resize(Record[0]);
continue;
case bitc::TYPE_CODE_VOID: // VOID
@@ -799,20 +832,20 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
break;
case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
if (Record.size() < 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
ResultTy = IntegerType::get(Context, Record[0]);
break;
case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
// [pointee type, address space]
if (Record.size() < 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned AddressSpace = 0;
if (Record.size() == 2)
AddressSpace = Record[1];
ResultTy = getTypeByID(Record[0]);
if (!ResultTy)
- return Error(InvalidType);
+ return Error(BitcodeError::InvalidType);
ResultTy = PointerType::get(ResultTy, AddressSpace);
break;
}
@@ -820,7 +853,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
// FIXME: attrid is dead, remove it in LLVM 4.0
// FUNCTION: [vararg, attrid, retty, paramty x N]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<Type*, 8> ArgTys;
for (unsigned i = 3, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -831,7 +864,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
ResultTy = getTypeByID(Record[2]);
if (!ResultTy || ArgTys.size() < Record.size()-3)
- return Error(InvalidType);
+ return Error(BitcodeError::InvalidType);
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
break;
@@ -839,7 +872,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
case bitc::TYPE_CODE_FUNCTION: {
// FUNCTION: [vararg, retty, paramty x N]
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<Type*, 8> ArgTys;
for (unsigned i = 2, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -850,14 +883,14 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
ResultTy = getTypeByID(Record[1]);
if (!ResultTy || ArgTys.size() < Record.size()-2)
- return Error(InvalidType);
+ return Error(BitcodeError::InvalidType);
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
break;
}
case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N]
if (Record.size() < 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<Type*, 8> EltTys;
for (unsigned i = 1, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -866,21 +899,21 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
break;
}
if (EltTys.size() != Record.size()-1)
- return Error(InvalidType);
+ return Error(BitcodeError::InvalidType);
ResultTy = StructType::get(Context, EltTys, Record[0]);
break;
}
case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N]
if (ConvertToString(Record, 0, TypeName))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
continue;
case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
if (Record.size() < 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (NumRecords >= TypeList.size())
- return Error(InvalidTYPETable);
+ return Error(BitcodeError::InvalidTYPETable);
// Check to see if this was forward referenced, if so fill in the temp.
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
@@ -899,17 +932,17 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
break;
}
if (EltTys.size() != Record.size()-1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Res->setBody(EltTys, Record[0]);
ResultTy = Res;
break;
}
case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: []
if (Record.size() != 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (NumRecords >= TypeList.size())
- return Error(InvalidTYPETable);
+ return Error(BitcodeError::InvalidTYPETable);
// Check to see if this was forward referenced, if so fill in the temp.
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
@@ -924,24 +957,24 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
}
case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if ((ResultTy = getTypeByID(Record[1])))
ResultTy = ArrayType::get(ResultTy, Record[0]);
else
- return Error(InvalidType);
+ return Error(BitcodeError::InvalidType);
break;
case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if ((ResultTy = getTypeByID(Record[1])))
ResultTy = VectorType::get(ResultTy, Record[0]);
else
- return Error(InvalidType);
+ return Error(BitcodeError::InvalidType);
break;
}
if (NumRecords >= TypeList.size())
- return Error(InvalidTYPETable);
+ return Error(BitcodeError::InvalidTYPETable);
assert(ResultTy && "Didn't read a type?");
assert(!TypeList[NumRecords] && "Already read type?");
TypeList[NumRecords++] = ResultTy;
@@ -950,7 +983,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() {
std::error_code BitcodeReader::ParseValueSymbolTable() {
if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -962,7 +995,7 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
case BitstreamEntry::Record:
@@ -977,10 +1010,10 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
break;
case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
if (ConvertToString(Record, 1, ValueName))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned ValueID = Record[0];
if (ValueID >= ValueList.size() || !ValueList[ValueID])
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Value *V = ValueList[ValueID];
V->setName(StringRef(ValueName.data(), ValueName.size()));
@@ -989,10 +1022,10 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
}
case bitc::VST_CODE_BBENTRY: {
if (ConvertToString(Record, 1, ValueName))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
BasicBlock *BB = getBasicBlock(Record[0]);
if (!BB)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
BB->setName(StringRef(ValueName.data(), ValueName.size()));
ValueName.clear();
@@ -1006,7 +1039,7 @@ std::error_code BitcodeReader::ParseMetadata() {
unsigned NextMDValueNo = MDValueList.size();
if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -1017,7 +1050,7 @@ std::error_code BitcodeReader::ParseMetadata() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
case BitstreamEntry::Record:
@@ -1048,7 +1081,7 @@ std::error_code BitcodeReader::ParseMetadata() {
for (unsigned i = 0; i != Size; ++i) {
MDNode *MD = dyn_cast_or_null<MDNode>(MDValueList.getValueFwdRef(Record[i]));
if (!MD)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
NMD->addOperand(MD);
}
break;
@@ -1058,14 +1091,14 @@ std::error_code BitcodeReader::ParseMetadata() {
// fall-through
case bitc::METADATA_NODE: {
if (Record.size() % 2 == 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned Size = Record.size();
SmallVector<Value*, 8> Elts;
for (unsigned i = 0; i != Size; i += 2) {
Type *Ty = getTypeByID(Record[i]);
if (!Ty)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (Ty->isMetadataTy())
Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
else if (!Ty->isVoidTy())
@@ -1087,14 +1120,14 @@ std::error_code BitcodeReader::ParseMetadata() {
}
case bitc::METADATA_KIND: {
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned Kind = Record[0];
SmallString<8> Name(Record.begin()+1, Record.end());
unsigned NewKind = TheModule->getMDKindID(Name.str());
if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
- return Error(ConflictingMETADATA_KINDRecords);
+ return Error(BitcodeError::ConflictingMETADATA_KINDRecords);
break;
}
}
@@ -1132,7 +1165,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
GlobalInitWorklist.back().first->setInitializer(C);
else
- return Error(ExpectedConstant);
+ return Error(BitcodeError::ExpectedConstant);
}
GlobalInitWorklist.pop_back();
}
@@ -1145,7 +1178,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
AliasInitWorklist.back().first->setAliasee(C);
else
- return Error(ExpectedConstant);
+ return Error(BitcodeError::ExpectedConstant);
}
AliasInitWorklist.pop_back();
}
@@ -1158,7 +1191,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
FunctionPrefixWorklist.back().first->setPrefixData(C);
else
- return Error(ExpectedConstant);
+ return Error(BitcodeError::ExpectedConstant);
}
FunctionPrefixWorklist.pop_back();
}
@@ -1176,7 +1209,7 @@ static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
std::error_code BitcodeReader::ParseConstants() {
if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -1189,10 +1222,10 @@ std::error_code BitcodeReader::ParseConstants() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
if (NextCstNo != ValueList.size())
- return Error(InvalidConstantReference);
+ return Error(BitcodeError::InvalidConstantReference);
// Once all the constants have been read, go through and resolve forward
// references.
@@ -1214,9 +1247,9 @@ std::error_code BitcodeReader::ParseConstants() {
break;
case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid]
if (Record.empty())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (Record[0] >= TypeList.size() || !TypeList[Record[0]])
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
CurTy = TypeList[Record[0]];
continue; // Skip the ValueList manipulation.
case bitc::CST_CODE_NULL: // NULL
@@ -1224,12 +1257,12 @@ std::error_code BitcodeReader::ParseConstants() {
break;
case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
if (!CurTy->isIntegerTy() || Record.empty())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0]));
break;
case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
if (!CurTy->isIntegerTy() || Record.empty())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
APInt VInt = ReadWideAPInt(Record,
cast<IntegerType>(CurTy)->getBitWidth());
@@ -1239,7 +1272,7 @@ std::error_code BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
if (Record.empty())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (CurTy->isHalfTy())
V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf,
APInt(16, (uint16_t)Record[0])));
@@ -1269,7 +1302,7 @@ std::error_code BitcodeReader::ParseConstants() {
case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
if (Record.empty())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned Size = Record.size();
SmallVector<Constant*, 16> Elts;
@@ -1297,7 +1330,7 @@ std::error_code BitcodeReader::ParseConstants() {
case bitc::CST_CODE_STRING: // STRING: [values]
case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
if (Record.empty())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallString<16> Elts(Record.begin(), Record.end());
V = ConstantDataArray::getString(Context, Elts,
@@ -1306,7 +1339,7 @@ std::error_code BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_DATA: {// DATA: [n x value]
if (Record.empty())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
unsigned Size = Record.size();
@@ -1351,14 +1384,14 @@ std::error_code BitcodeReader::ParseConstants() {
else
V = ConstantDataArray::get(Context, Elts);
} else {
- return Error(InvalidTypeForValue);
+ return Error(BitcodeError::InvalidTypeForValue);
}
break;
}
case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown binop.
@@ -1389,14 +1422,14 @@ std::error_code BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
int Opc = GetDecodedCastOpcode(Record[0]);
if (Opc < 0) {
V = UndefValue::get(CurTy); // Unknown cast.
} else {
Type *OpTy = getTypeByID(Record[1]);
if (!OpTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
V = UpgradeBitCastExpr(Opc, Op, CurTy);
if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy);
@@ -1406,12 +1439,12 @@ std::error_code BitcodeReader::ParseConstants() {
case bitc::CST_CODE_CE_INBOUNDS_GEP:
case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands]
if (Record.size() & 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
Type *ElTy = getTypeByID(Record[i]);
if (!ElTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
}
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
@@ -1422,7 +1455,7 @@ std::error_code BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *SelectorTy = Type::getInt1Ty(Context);
@@ -1441,22 +1474,22 @@ std::error_code BitcodeReader::ParseConstants() {
case bitc::CST_CODE_CE_EXTRACTELT
: { // CE_EXTRACTELT: [opty, opval, opty, opval]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (!OpTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = nullptr;
if (Record.size() == 4) {
Type *IdxTy = getTypeByID(Record[2]);
if (!IdxTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy);
} else // TODO: Remove with llvm 4.0
Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
if (!Op1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
V = ConstantExpr::getExtractElement(Op0, Op1);
break;
}
@@ -1464,7 +1497,7 @@ std::error_code BitcodeReader::ParseConstants() {
: { // CE_INSERTELT: [opval, opval, opty, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || !OpTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
OpTy->getElementType());
@@ -1472,19 +1505,19 @@ std::error_code BitcodeReader::ParseConstants() {
if (Record.size() == 4) {
Type *IdxTy = getTypeByID(Record[2]);
if (!IdxTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy);
} else // TODO: Remove with llvm 4.0
Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
if (!Op2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
break;
}
case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
if (Record.size() < 3 || !OpTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
@@ -1498,7 +1531,7 @@ std::error_code BitcodeReader::ParseConstants() {
VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (Record.size() < 4 || !RTy || !OpTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
@@ -1509,10 +1542,10 @@ std::error_code BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred]
if (Record.size() < 4)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
if (!OpTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
@@ -1526,16 +1559,16 @@ std::error_code BitcodeReader::ParseConstants() {
// FIXME: Remove with the 4.0 release.
case bitc::CST_CODE_INLINEASM_OLD: {
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[0] & 1;
bool IsAlignStack = Record[0] >> 1;
unsigned AsmStrSize = Record[1];
if (2+AsmStrSize >= Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned ConstStrSize = Record[2+AsmStrSize];
if (3+AsmStrSize+ConstStrSize > Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[2+i];
@@ -1550,17 +1583,17 @@ std::error_code BitcodeReader::ParseConstants() {
// inteldialect).
case bitc::CST_CODE_INLINEASM: {
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
std::string AsmStr, ConstrStr;
bool HasSideEffects = Record[0] & 1;
bool IsAlignStack = (Record[0] >> 1) & 1;
unsigned AsmDialect = Record[0] >> 2;
unsigned AsmStrSize = Record[1];
if (2+AsmStrSize >= Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned ConstStrSize = Record[2+AsmStrSize];
if (3+AsmStrSize+ConstStrSize > Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
for (unsigned i = 0; i != AsmStrSize; ++i)
AsmStr += (char)Record[2+i];
@@ -1574,35 +1607,46 @@ std::error_code BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_BLOCKADDRESS:{
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *FnTy = getTypeByID(Record[0]);
if (!FnTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Function *Fn =
dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
if (!Fn)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
+
+ // Don't let Fn get dematerialized.
+ BlockAddressesTaken.insert(Fn);
// If the function is already parsed we can insert the block address right
// away.
+ BasicBlock *BB;
+ unsigned BBID = Record[2];
+ if (!BBID)
+ // Invalid reference to entry block.
+ return Error(BitcodeError::InvalidID);
if (!Fn->empty()) {
Function::iterator BBI = Fn->begin(), BBE = Fn->end();
- for (size_t I = 0, E = Record[2]; I != E; ++I) {
+ for (size_t I = 0, E = BBID; I != E; ++I) {
if (BBI == BBE)
- return Error(InvalidID);
+ return Error(BitcodeError::InvalidID);
++BBI;
}
- V = BlockAddress::get(Fn, BBI);
+ BB = BBI;
} else {
// Otherwise insert a placeholder and remember it so it can be inserted
// when the function is parsed.
- GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
- Type::getInt8Ty(Context),
- false, GlobalValue::InternalLinkage,
- nullptr, "");
- BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
- V = FwdRef;
+ auto &FwdBBs = BasicBlockFwdRefs[Fn];
+ if (FwdBBs.empty())
+ BasicBlockFwdRefQueue.push_back(Fn);
+ if (FwdBBs.size() < BBID + 1)
+ FwdBBs.resize(BBID + 1);
+ if (!FwdBBs[BBID])
+ FwdBBs[BBID] = BasicBlock::Create(Context);
+ BB = FwdBBs[BBID];
}
+ V = BlockAddress::get(Fn, BB);
break;
}
}
@@ -1614,18 +1658,17 @@ std::error_code BitcodeReader::ParseConstants() {
std::error_code BitcodeReader::ParseUseLists() {
if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
- return Error(InvalidRecord);
-
- SmallVector<uint64_t, 64> Record;
+ return Error(BitcodeError::InvalidRecord);
// Read all the records.
+ SmallVector<uint64_t, 64> Record;
while (1) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
case BitstreamEntry::Record:
@@ -1635,14 +1678,42 @@ std::error_code BitcodeReader::ParseUseLists() {
// Read a use list record.
Record.clear();
+ bool IsBB = false;
switch (Stream.readRecord(Entry.ID, Record)) {
default: // Default behavior: unknown type.
break;
- case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+ case bitc::USELIST_CODE_BB:
+ IsBB = true;
+ // fallthrough
+ case bitc::USELIST_CODE_DEFAULT: {
unsigned RecordLength = Record.size();
- if (RecordLength < 1)
- return Error(InvalidRecord);
- UseListRecords.push_back(Record);
+ if (RecordLength < 3)
+ // Records should have at least an ID and two indexes.
+ return Error(BitcodeError::InvalidRecord);
+ unsigned ID = Record.back();
+ Record.pop_back();
+
+ Value *V;
+ if (IsBB) {
+ assert(ID < FunctionBBs.size() && "Basic block not found");
+ V = FunctionBBs[ID];
+ } else
+ V = ValueList[ID];
+ unsigned NumUses = 0;
+ SmallDenseMap<const Use *, unsigned, 16> Order;
+ for (const Use &U : V->uses()) {
+ if (++NumUses > Record.size())
+ break;
+ Order[&U] = Record[NumUses - 1];
+ }
+ if (Order.size() != Record.size() || NumUses > Record.size())
+ // Mismatches can happen if the functions are being materialized lazily
+ // (out-of-order), or a value has been upgraded.
+ break;
+
+ V->sortUseList([&](const Use &L, const Use &R) {
+ return Order.lookup(&L) < Order.lookup(&R);
+ });
break;
}
}
@@ -1655,7 +1726,7 @@ std::error_code BitcodeReader::ParseUseLists() {
std::error_code BitcodeReader::RememberAndSkipFunctionBody() {
// Get the function we are talking about.
if (FunctionsWithBodies.empty())
- return Error(InsufficientFunctionProtos);
+ return Error(BitcodeError::InsufficientFunctionProtos);
Function *Fn = FunctionsWithBodies.back();
FunctionsWithBodies.pop_back();
@@ -1666,7 +1737,7 @@ std::error_code BitcodeReader::RememberAndSkipFunctionBody() {
// Skip over the function block for now.
if (Stream.SkipBlock())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
return std::error_code();
}
@@ -1674,7 +1745,7 @@ std::error_code BitcodeReader::GlobalCleanup() {
// Patch the initializers for globals and aliases up.
ResolveGlobalAndAliasInits();
if (!GlobalInits.empty() || !AliasInits.empty())
- return Error(MalformedGlobalInitializerSet);
+ return Error(BitcodeError::MalformedGlobalInitializerSet);
// Look for intrinsic functions which need to be upgraded at some point
for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
@@ -1703,7 +1774,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
if (Resume)
Stream.JumpToBit(NextUnreadBit);
else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<uint64_t, 64> Record;
std::vector<std::string> SectionTable;
@@ -1715,7 +1786,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
switch (Entry.Kind) {
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return GlobalCleanup();
@@ -1723,11 +1794,11 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
switch (Entry.ID) {
default: // Skip unknown content.
if (Stream.SkipBlock())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
break;
case bitc::BLOCKINFO_BLOCK_ID:
if (Stream.ReadBlockInfoBlock())
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
break;
case bitc::PARAMATTR_BLOCK_ID:
if (std::error_code EC = ParseAttributeBlock())
@@ -1797,12 +1868,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
default: break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_VERSION: { // VERSION: [version#]
if (Record.size() < 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
// Only version #0 and #1 are supported so far.
unsigned module_version = Record[0];
switch (module_version) {
default:
- return Error(InvalidValue);
+ return Error(BitcodeError::InvalidValue);
case 0:
UseRelativeIDs = false;
break;
@@ -1815,21 +1886,21 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
TheModule->setTargetTriple(S);
break;
}
case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
TheModule->setDataLayout(S);
break;
}
case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
TheModule->setModuleInlineAsm(S);
break;
}
@@ -1837,27 +1908,27 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
// FIXME: Remove in 4.0.
std::string S;
if (ConvertToString(Record, 0, S))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
// Ignore value.
break;
}
case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SectionTable.push_back(S);
break;
}
case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
GCTable.push_back(S);
break;
}
case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name]
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
unsigned ComdatNameSize = Record[1];
std::string ComdatName;
@@ -1874,12 +1945,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
// unnamed_addr, dllstorageclass]
case bitc::MODULE_CODE_GLOBALVAR: {
if (Record.size() < 6)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
if (!Ty)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (!Ty->isPointerTy())
- return Error(InvalidTypeForValue);
+ return Error(BitcodeError::InvalidTypeForValue);
unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
Ty = cast<PointerType>(Ty)->getElementType();
@@ -1889,7 +1960,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
std::string Section;
if (Record[5]) {
if (Record[5]-1 >= SectionTable.size())
- return Error(InvalidID);
+ return Error(BitcodeError::InvalidID);
Section = SectionTable[Record[5]-1];
}
GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
@@ -1942,16 +2013,16 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
// dllstorageclass]
case bitc::MODULE_CODE_FUNCTION: {
if (Record.size() < 8)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
if (!Ty)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (!Ty->isPointerTy())
- return Error(InvalidTypeForValue);
+ return Error(BitcodeError::InvalidTypeForValue);
FunctionType *FTy =
dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
if (!FTy)
- return Error(InvalidTypeForValue);
+ return Error(BitcodeError::InvalidTypeForValue);
Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
"", TheModule);
@@ -1964,7 +2035,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
Func->setAlignment((1 << Record[5]) >> 1);
if (Record[6]) {
if (Record[6]-1 >= SectionTable.size())
- return Error(InvalidID);
+ return Error(BitcodeError::InvalidID);
Func->setSection(SectionTable[Record[6]-1]);
}
// Local linkage must have default visibility.
@@ -1973,7 +2044,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
Func->setVisibility(GetDecodedVisibility(Record[7]));
if (Record.size() > 8 && Record[8]) {
if (Record[8]-1 > GCTable.size())
- return Error(InvalidID);
+ return Error(BitcodeError::InvalidID);
Func->setGC(GCTable[Record[8]-1].c_str());
}
bool UnnamedAddr = false;
@@ -1999,8 +2070,10 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
// If this is a function with a body, remember the prototype we are
// creating now, so that we can match up the body with them later.
if (!isProto) {
+ Func->setIsMaterializable(true);
FunctionsWithBodies.push_back(Func);
- if (LazyStreamer) DeferredFunctionInfo[Func] = 0;
+ if (LazyStreamer)
+ DeferredFunctionInfo[Func] = 0;
}
break;
}
@@ -2008,13 +2081,13 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
// ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass]
case bitc::MODULE_CODE_ALIAS: {
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
if (!Ty)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
auto *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
- return Error(InvalidTypeForValue);
+ return Error(BitcodeError::InvalidTypeForValue);
auto *NewGA =
GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
@@ -2029,9 +2102,9 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
else
UpgradeDLLImportExportLinkage(NewGA, Record[2]);
if (Record.size() > 5)
- NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5]));
+ NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5]));
if (Record.size() > 6)
- NewGA->setUnnamedAddr(Record[6]);
+ NewGA->setUnnamedAddr(Record[6]);
ValueList.push_back(NewGA);
AliasInits.push_back(std::make_pair(NewGA, Record[1]));
break;
@@ -2040,7 +2113,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
case bitc::MODULE_CODE_PURGEVALS:
// Trim down the value list to the specified size.
if (Record.size() < 1 || Record[0] > ValueList.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
ValueList.shrinkTo(Record[0]);
break;
}
@@ -2061,7 +2134,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
Stream.Read(4) != 0xC ||
Stream.Read(4) != 0xE ||
Stream.Read(4) != 0xD)
- return Error(InvalidBitcodeSignature);
+ return Error(BitcodeError::InvalidBitcodeSignature);
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
@@ -2074,7 +2147,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
switch (Entry.Kind) {
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
@@ -2082,12 +2155,12 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
switch (Entry.ID) {
case bitc::BLOCKINFO_BLOCK_ID:
if (Stream.ReadBlockInfoBlock())
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
break;
case bitc::MODULE_BLOCK_ID:
// Reject multiple MODULE_BLOCK's in a single bitstream.
if (TheModule)
- return Error(InvalidMultipleBlocks);
+ return Error(BitcodeError::InvalidMultipleBlocks);
TheModule = M;
if (std::error_code EC = ParseModule(false))
return EC;
@@ -2096,7 +2169,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
break;
default:
if (Stream.SkipBlock())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
break;
}
continue;
@@ -2111,14 +2184,14 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
Stream.AtEndOfStream())
return std::error_code();
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
}
}
}
ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<uint64_t, 64> Record;
@@ -2130,7 +2203,7 @@ ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return Triple;
case BitstreamEntry::Record:
@@ -2144,7 +2217,7 @@ ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (ConvertToString(Record, 0, S))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Triple = S;
break;
}
@@ -2165,7 +2238,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
Stream.Read(4) != 0xC ||
Stream.Read(4) != 0xE ||
Stream.Read(4) != 0xD)
- return Error(InvalidBitcodeSignature);
+ return Error(BitcodeError::InvalidBitcodeSignature);
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
@@ -2174,7 +2247,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
switch (Entry.Kind) {
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
@@ -2184,7 +2257,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
// Ignore other sub-blocks.
if (Stream.SkipBlock())
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
continue;
case BitstreamEntry::Record:
@@ -2197,7 +2270,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
/// ParseMetadataAttachment - Parse metadata attachments.
std::error_code BitcodeReader::ParseMetadataAttachment() {
if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<uint64_t, 64> Record;
while (1) {
@@ -2206,7 +2279,7 @@ std::error_code BitcodeReader::ParseMetadataAttachment() {
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
return std::error_code();
case BitstreamEntry::Record:
@@ -2222,14 +2295,14 @@ std::error_code BitcodeReader::ParseMetadataAttachment() {
case bitc::METADATA_ATTACHMENT: {
unsigned RecordLength = Record.size();
if (Record.empty() || (RecordLength - 1) % 2 == 1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Instruction *Inst = InstructionList[Record[0]];
for (unsigned i = 1; i != RecordLength; i = i+2) {
unsigned Kind = Record[i];
DenseMap<unsigned, unsigned>::iterator I =
MDKindMap.find(Kind);
if (I == MDKindMap.end())
- return Error(InvalidID);
+ return Error(BitcodeError::InvalidID);
Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
Inst->setMetadata(I->second, cast<MDNode>(Node));
if (I->second == LLVMContext::MD_tbaa)
@@ -2244,7 +2317,7 @@ std::error_code BitcodeReader::ParseMetadataAttachment() {
/// ParseFunctionBody - Lazily parse the specified function body block.
std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
InstructionList.clear();
unsigned ModuleValueListSize = ValueList.size();
@@ -2267,7 +2340,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
switch (Entry.Kind) {
case BitstreamEntry::Error:
- return Error(MalformedBlock);
+ return Error(BitcodeError::MalformedBlock);
case BitstreamEntry::EndBlock:
goto OutOfRecordLoop;
@@ -2275,7 +2348,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
switch (Entry.ID) {
default: // Skip unknown content.
if (Stream.SkipBlock())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
break;
case bitc::CONSTANTS_BLOCK_ID:
if (std::error_code EC = ParseConstants())
@@ -2294,6 +2367,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (std::error_code EC = ParseMetadata())
return EC;
break;
+ case bitc::USELIST_BLOCK_ID:
+ if (std::error_code EC = ParseUseLists())
+ return EC;
+ break;
}
continue;
@@ -2308,16 +2385,41 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
default: // Default behavior: reject
- return Error(InvalidValue);
- case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks]
+ return Error(BitcodeError::InvalidValue);
+ case bitc::FUNC_CODE_DECLAREBLOCKS: { // DECLAREBLOCKS: [nblocks]
if (Record.size() < 1 || Record[0] == 0)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
// Create all the basic blocks for the function.
FunctionBBs.resize(Record[0]);
- for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
- FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+
+ // See if anything took the address of blocks in this function.
+ auto BBFRI = BasicBlockFwdRefs.find(F);
+ if (BBFRI == BasicBlockFwdRefs.end()) {
+ for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
+ FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+ } else {
+ auto &BBRefs = BBFRI->second;
+ // Check for invalid basic block references.
+ if (BBRefs.size() > FunctionBBs.size())
+ return Error(BitcodeError::InvalidID);
+ assert(!BBRefs.empty() && "Unexpected empty array");
+ assert(!BBRefs.front() && "Invalid reference to entry block");
+ for (unsigned I = 0, E = FunctionBBs.size(), RE = BBRefs.size(); I != E;
+ ++I)
+ if (I < RE && BBRefs[I]) {
+ BBRefs[I]->insertInto(F);
+ FunctionBBs[I] = BBRefs[I];
+ } else {
+ FunctionBBs[I] = BasicBlock::Create(Context, "", F);
+ }
+
+ // Erase from the table.
+ BasicBlockFwdRefs.erase(BBFRI);
+ }
+
CurBB = FunctionBBs[0];
continue;
+ }
case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN
// This record indicates that the last instruction is at the same
@@ -2332,7 +2434,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
I = &FunctionBBs[CurBBNo-1]->back();
if (!I)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I->setDebugLoc(LastLoc);
I = nullptr;
continue;
@@ -2345,7 +2447,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
!FunctionBBs[CurBBNo-1]->empty())
I = &FunctionBBs[CurBBNo-1]->back();
if (!I || Record.size() < 4)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned Line = Record[0], Col = Record[1];
unsigned ScopeID = Record[2], IAID = Record[3];
@@ -2365,11 +2467,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
OpNum+1 > Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
if (Opc == -1)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
InstructionList.push_back(I);
if (OpNum < Record.size()) {
@@ -2411,12 +2513,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
OpNum+2 != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *ResTy = getTypeByID(Record[OpNum]);
int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
if (Opc == -1 || !ResTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Instruction *Temp = nullptr;
if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) {
if (Temp) {
@@ -2434,13 +2536,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *BasePtr;
if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<Value*, 16> GEPIdx;
while (OpNum != Record.size()) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
GEPIdx.push_back(Op);
}
@@ -2456,14 +2558,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Agg;
if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<unsigned, 4> EXTRACTVALIdx;
for (unsigned RecSize = Record.size();
OpNum != RecSize; ++OpNum) {
uint64_t Index = Record[OpNum];
if ((unsigned)Index != Index)
- return Error(InvalidValue);
+ return Error(BitcodeError::InvalidValue);
EXTRACTVALIdx.push_back((unsigned)Index);
}
@@ -2477,17 +2579,17 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Agg;
if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Value *Val;
if (getValueTypePair(Record, OpNum, NextValueNo, Val))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<unsigned, 4> INSERTVALIdx;
for (unsigned RecSize = Record.size();
OpNum != RecSize; ++OpNum) {
uint64_t Index = Record[OpNum];
if ((unsigned)Index != Index)
- return Error(InvalidValue);
+ return Error(BitcodeError::InvalidValue);
INSERTVALIdx.push_back((unsigned)Index);
}
@@ -2504,7 +2606,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = SelectInst::Create(Cond, TrueVal, FalseVal);
InstructionList.push_back(I);
@@ -2519,18 +2621,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
getValueTypePair(Record, OpNum, NextValueNo, Cond))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
// select condition can be either i1 or [N x i1]
if (VectorType* vector_type =
dyn_cast<VectorType>(Cond->getType())) {
// expect <n x i1>
if (vector_type->getElementType() != Type::getInt1Ty(Context))
- return Error(InvalidTypeForValue);
+ return Error(BitcodeError::InvalidTypeForValue);
} else {
// expect i1
if (Cond->getType() != Type::getInt1Ty(Context))
- return Error(InvalidTypeForValue);
+ return Error(BitcodeError::InvalidTypeForValue);
}
I = SelectInst::Create(Cond, TrueVal, FalseVal);
@@ -2543,7 +2645,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Vec, *Idx;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
getValueTypePair(Record, OpNum, NextValueNo, Idx))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = ExtractElementInst::Create(Vec, Idx);
InstructionList.push_back(I);
break;
@@ -2556,7 +2658,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
getValueTypePair(Record, OpNum, NextValueNo, Idx))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = InsertElementInst::Create(Vec, Elt, Idx);
InstructionList.push_back(I);
break;
@@ -2567,10 +2669,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Vec1, *Vec2, *Mask;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = new ShuffleVectorInst(Vec1, Vec2, Mask);
InstructionList.push_back(I);
break;
@@ -2588,7 +2690,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
OpNum+1 != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (LHS->getType()->isFPOrFPVectorTy())
I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -2610,9 +2712,9 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Op = nullptr;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (OpNum != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = ReturnInst::Create(Context, Op);
InstructionList.push_back(I);
@@ -2620,10 +2722,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
if (Record.size() != 1 && Record.size() != 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
BasicBlock *TrueDest = getBasicBlock(Record[0]);
if (!TrueDest)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (Record.size() == 1) {
I = BranchInst::Create(TrueDest);
@@ -2634,7 +2736,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Cond = getValue(Record, 2, NextValueNo,
Type::getInt1Ty(Context));
if (!FalseDest || !Cond)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = BranchInst::Create(TrueDest, FalseDest, Cond);
InstructionList.push_back(I);
}
@@ -2654,7 +2756,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[3]);
if (!OpTy || !Cond || !Default)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned NumCases = Record[4];
@@ -2706,12 +2808,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
// Old SwitchInst format without case ranges.
if (Record.size() < 3 || (Record.size() & 1) == 0)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[2]);
if (!OpTy || !Cond || !Default)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned NumCases = (Record.size()-3)/2;
SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
InstructionList.push_back(SI);
@@ -2721,7 +2823,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
if (!CaseVal || !DestBB) {
delete SI;
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
}
SI->addCase(CaseVal, DestBB);
}
@@ -2730,11 +2832,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
if (Record.size() < 2)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
Value *Address = getValue(Record, 1, NextValueNo, OpTy);
if (!OpTy || !Address)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
unsigned NumDests = Record.size()-2;
IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
InstructionList.push_back(IBI);
@@ -2743,7 +2845,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
IBI->addDestination(DestBB);
} else {
delete IBI;
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
}
}
I = IBI;
@@ -2753,7 +2855,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_INVOKE: {
// INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
if (Record.size() < 4)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
AttributeSet PAL = getAttributes(Record[0]);
unsigned CCInfo = Record[1];
BasicBlock *NormalBB = getBasicBlock(Record[2]);
@@ -2762,7 +2864,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 4;
Value *Callee;
if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
FunctionType *FTy = !CalleeTy ? nullptr :
@@ -2771,25 +2873,25 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
// Check that the right number of fixed parameters are here.
if (!FTy || !NormalBB || !UnwindBB ||
Record.size() < OpNum+FTy->getNumParams())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<Value*, 16> Ops;
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
Ops.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
if (!Ops.back())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
}
if (!FTy->isVarArg()) {
if (Record.size() != OpNum)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
} else {
// Read type/value pairs for varargs params.
while (OpNum != Record.size()) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Ops.push_back(Op);
}
}
@@ -2805,7 +2907,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned Idx = 0;
Value *Val = nullptr;
if (getValueTypePair(Record, Idx, NextValueNo, Val))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = ResumeInst::Create(Val);
InstructionList.push_back(I);
break;
@@ -2816,10 +2918,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
break;
case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
if (Record.size() < 1 || ((Record.size()-1)&1))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *Ty = getTypeByID(Record[0]);
if (!Ty)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
InstructionList.push_back(PN);
@@ -2835,7 +2937,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
V = getValue(Record, 1+i, NextValueNo, Ty);
BasicBlock *BB = getBasicBlock(Record[2+i]);
if (!V || !BB)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
PN->addIncoming(V, BB);
}
I = PN;
@@ -2846,13 +2948,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
// LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?]
unsigned Idx = 0;
if (Record.size() < 4)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *Ty = getTypeByID(Record[Idx++]);
if (!Ty)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Value *PersFn = nullptr;
if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
bool IsCleanup = !!Record[Idx++];
unsigned NumClauses = Record[Idx++];
@@ -2865,7 +2967,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, Idx, NextValueNo, Val)) {
delete LP;
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
}
assert((CT != LandingPadInst::Catch ||
@@ -2884,15 +2986,19 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
if (Record.size() != 4)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
PointerType *Ty =
dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
- unsigned Align = Record[3];
+ unsigned AlignRecord = Record[3];
+ bool InAlloca = AlignRecord & (1 << 5);
+ unsigned Align = AlignRecord & ((1 << 5) - 1);
if (!Ty || !Size)
- return Error(InvalidRecord);
- I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+ return Error(BitcodeError::InvalidRecord);
+ AllocaInst *AI = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+ AI->setUsedWithInAlloca(InAlloca);
+ I = AI;
InstructionList.push_back(I);
break;
}
@@ -2901,7 +3007,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
OpNum+2 != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
InstructionList.push_back(I);
@@ -2913,15 +3019,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
OpNum+4 != Record.size())
- return Error(InvalidRecord);
-
+ return Error(BitcodeError::InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
if (Ordering == NotAtomic || Ordering == Release ||
Ordering == AcquireRelease)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
if (Ordering != NotAtomic && Record[OpNum] == 0)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1,
@@ -2936,7 +3041,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+2 != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
InstructionList.push_back(I);
@@ -2950,15 +3055,15 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+4 != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
if (Ordering == NotAtomic || Ordering == Acquire ||
Ordering == AcquireRelease)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
if (Ordering != NotAtomic && Record[OpNum] == 0)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1,
Ordering, SynchScope);
@@ -2976,10 +3081,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), New) ||
(Record.size() < OpNum + 3 || Record.size() > OpNum + 5))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
AtomicOrdering SuccessOrdering = GetDecodedOrdering(Record[OpNum+1]);
if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
AtomicOrdering FailureOrdering;
@@ -3014,14 +3119,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
OpNum+4 != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]);
if (Operation < AtomicRMWInst::FIRST_BINOP ||
Operation > AtomicRMWInst::LAST_BINOP)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
if (Ordering == NotAtomic || Ordering == Unordered)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
@@ -3030,11 +3135,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
if (2 != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
AtomicOrdering Ordering = GetDecodedOrdering(Record[0]);
if (Ordering == NotAtomic || Ordering == Unordered ||
Ordering == Monotonic)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]);
I = new FenceInst(Context, Ordering, SynchScope);
InstructionList.push_back(I);
@@ -3043,7 +3148,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_CALL: {
// CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
AttributeSet PAL = getAttributes(Record[0]);
unsigned CCInfo = Record[1];
@@ -3051,13 +3156,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 2;
Value *Callee;
if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
FunctionType *FTy = nullptr;
if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
SmallVector<Value*, 16> Args;
// Read the fixed params.
@@ -3068,18 +3173,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
Args.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
if (!Args.back())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
}
// Read type/value pairs for varargs params.
if (!FTy->isVarArg()) {
if (OpNum != Record.size())
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
} else {
while (OpNum != Record.size()) {
Value *Op;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Args.push_back(Op);
}
}
@@ -3099,12 +3204,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
if (Record.size() < 3)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
Value *Op = getValue(Record, 1, NextValueNo, OpTy);
Type *ResTy = getTypeByID(Record[2]);
if (!OpTy || !Op || !ResTy)
- return Error(InvalidRecord);
+ return Error(BitcodeError::InvalidRecord);
I = new VAArgInst(Op, ResTy);
InstructionList.push_back(I);
break;
@@ -3115,7 +3220,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
// this file.
if (!CurBB) {
delete I;
- return Error(InvalidInstructionWithNoBB);
+ return Error(BitcodeError::InvalidInstructionWithNoBB);
}
CurBB->getInstList().push_back(I);
@@ -3142,32 +3247,13 @@ OutOfRecordLoop:
delete A;
}
}
- return Error(NeverResolvedValueFoundInFunction);
+ return Error(BitcodeError::NeverResolvedValueFoundInFunction);
}
}
// FIXME: Check for unresolved forward-declared metadata references
// and clean up leaks.
- // See if anything took the address of blocks in this function. If so,
- // resolve them now.
- DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
- BlockAddrFwdRefs.find(F);
- if (BAFRI != BlockAddrFwdRefs.end()) {
- std::vector<BlockAddrRefTy> &RefList = BAFRI->second;
- for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
- unsigned BlockIdx = RefList[i].first;
- if (BlockIdx >= FunctionBBs.size())
- return Error(InvalidID);
-
- GlobalVariable *FwdRef = RefList[i].second;
- FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
- FwdRef->eraseFromParent();
- }
-
- BlockAddrFwdRefs.erase(BAFRI);
- }
-
// Trim the value list down to the size it was before we parsed this function.
ValueList.shrinkTo(ModuleValueListSize);
MDValueList.shrinkTo(ModuleMDValueListSize);
@@ -3181,7 +3267,7 @@ std::error_code BitcodeReader::FindFunctionInStream(
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
while (DeferredFunctionInfoIterator->second == 0) {
if (Stream.AtEndOfStream())
- return Error(CouldNotFindFunctionInStream);
+ return Error(BitcodeError::CouldNotFindFunctionInStream);
// ParseModule will parse the next body in the stream and set its
// position in the DeferredFunctionInfo map.
if (std::error_code EC = ParseModule(true))
@@ -3196,15 +3282,7 @@ std::error_code BitcodeReader::FindFunctionInStream(
void BitcodeReader::releaseBuffer() { Buffer.release(); }
-bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
- if (const Function *F = dyn_cast<Function>(GV)) {
- return F->isDeclaration() &&
- DeferredFunctionInfo.count(const_cast<Function*>(F));
- }
- return false;
-}
-
-std::error_code BitcodeReader::Materialize(GlobalValue *GV) {
+std::error_code BitcodeReader::materialize(GlobalValue *GV) {
Function *F = dyn_cast<Function>(GV);
// If it's not a function or is already material, ignore the request.
if (!F || !F->isMaterializable())
@@ -3223,6 +3301,7 @@ std::error_code BitcodeReader::Materialize(GlobalValue *GV) {
if (std::error_code EC = ParseFunctionBody(F))
return EC;
+ F->setIsMaterializable(false);
// Upgrade any old intrinsic calls in the function.
for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
@@ -3236,13 +3315,21 @@ std::error_code BitcodeReader::Materialize(GlobalValue *GV) {
}
}
- return std::error_code();
+ // Bring in any functions that this function forward-referenced via
+ // blockaddresses.
+ return materializeForwardReferencedFunctions();
}
bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
const Function *F = dyn_cast<Function>(GV);
if (!F || F->isDeclaration())
return false;
+
+ // Dematerializing F would leave dangling references that wouldn't be
+ // reconnected on re-materialization.
+ if (BlockAddressesTaken.count(F))
+ return false;
+
return DeferredFunctionInfo.count(const_cast<Function*>(F));
}
@@ -3255,20 +3342,23 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) {
assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
// Just forget the function body, we can remat it later.
- F->deleteBody();
+ F->dropAllReferences();
+ F->setIsMaterializable(true);
}
std::error_code BitcodeReader::MaterializeModule(Module *M) {
assert(M == TheModule &&
"Can only Materialize the Module this BitcodeReader is attached to.");
+
+ // Promise to materialize all forward references.
+ WillMaterializeAllForwardRefs = true;
+
// Iterate over the module, deserializing any functions that are still on
// disk.
for (Module::iterator F = TheModule->begin(), E = TheModule->end();
F != E; ++F) {
- if (F->isMaterializable()) {
- if (std::error_code EC = Materialize(F))
- return EC;
- }
+ if (std::error_code EC = materialize(F))
+ return EC;
}
// At this point, if there are any function bodies, the current bit is
// pointing to the END_BLOCK record after them. Now make sure the rest
@@ -3276,6 +3366,11 @@ std::error_code BitcodeReader::MaterializeModule(Module *M) {
if (NextUnreadBit)
ParseModule(true);
+ // Check that all block address forward references got resolved (as we
+ // promised above).
+ if (!BasicBlockFwdRefs.empty())
+ return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress);
+
// Upgrade any intrinsic calls that slipped through (should not happen!) and
// delete the old functions to clean up. We can't do this unless the entire
// module is materialized because there could always be another function body
@@ -3312,21 +3407,17 @@ std::error_code BitcodeReader::InitStreamFromBuffer() {
const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
- if (Buffer->getBufferSize() & 3) {
- if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
- return Error(InvalidBitcodeSignature);
- else
- return Error(BitcodeStreamInvalidSize);
- }
+ if (Buffer->getBufferSize() & 3)
+ return Error(BitcodeError::InvalidBitcodeSignature);
// If we have a wrapper header, parse it and ignore the non-bc file contents.
// The magic number is 0x0B17C0DE stored in little endian.
if (isBitcodeWrapper(BufPtr, BufEnd))
if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
- return Error(InvalidBitcodeWrapperHeader);
+ return Error(BitcodeError::InvalidBitcodeWrapperHeader);
StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
- Stream.init(*StreamFile);
+ Stream.init(&*StreamFile);
return std::error_code();
}
@@ -3336,14 +3427,14 @@ std::error_code BitcodeReader::InitLazyStream() {
// see it.
StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
StreamFile.reset(new BitstreamReader(Bytes));
- Stream.init(*StreamFile);
+ Stream.init(&*StreamFile);
unsigned char buf[16];
- if (Bytes->readBytes(0, 16, buf) == -1)
- return Error(BitcodeStreamInvalidSize);
+ if (Bytes->readBytes(buf, 16, 0) != 16)
+ return Error(BitcodeError::InvalidBitcodeSignature);
if (!isBitcode(buf, buf + 16))
- return Error(InvalidBitcodeSignature);
+ return Error(BitcodeError::InvalidBitcodeSignature);
if (isBitcodeWrapper(buf, buf + 4)) {
const unsigned char *bitcodeStart = buf;
@@ -3361,45 +3452,45 @@ class BitcodeErrorCategoryType : public std::error_category {
return "llvm.bitcode";
}
std::string message(int IE) const override {
- BitcodeReader::ErrorType E = static_cast<BitcodeReader::ErrorType>(IE);
+ BitcodeError E = static_cast<BitcodeError>(IE);
switch (E) {
- case BitcodeReader::BitcodeStreamInvalidSize:
- return "Bitcode stream length should be >= 16 bytes and a multiple of 4";
- case BitcodeReader::ConflictingMETADATA_KINDRecords:
+ case BitcodeError::ConflictingMETADATA_KINDRecords:
return "Conflicting METADATA_KIND records";
- case BitcodeReader::CouldNotFindFunctionInStream:
+ case BitcodeError::CouldNotFindFunctionInStream:
return "Could not find function in stream";
- case BitcodeReader::ExpectedConstant:
+ case BitcodeError::ExpectedConstant:
return "Expected a constant";
- case BitcodeReader::InsufficientFunctionProtos:
+ case BitcodeError::InsufficientFunctionProtos:
return "Insufficient function protos";
- case BitcodeReader::InvalidBitcodeSignature:
+ case BitcodeError::InvalidBitcodeSignature:
return "Invalid bitcode signature";
- case BitcodeReader::InvalidBitcodeWrapperHeader:
+ case BitcodeError::InvalidBitcodeWrapperHeader:
return "Invalid bitcode wrapper header";
- case BitcodeReader::InvalidConstantReference:
+ case BitcodeError::InvalidConstantReference:
return "Invalid ronstant reference";
- case BitcodeReader::InvalidID:
+ case BitcodeError::InvalidID:
return "Invalid ID";
- case BitcodeReader::InvalidInstructionWithNoBB:
+ case BitcodeError::InvalidInstructionWithNoBB:
return "Invalid instruction with no BB";
- case BitcodeReader::InvalidRecord:
+ case BitcodeError::InvalidRecord:
return "Invalid record";
- case BitcodeReader::InvalidTypeForValue:
+ case BitcodeError::InvalidTypeForValue:
return "Invalid type for value";
- case BitcodeReader::InvalidTYPETable:
+ case BitcodeError::InvalidTYPETable:
return "Invalid TYPE table";
- case BitcodeReader::InvalidType:
+ case BitcodeError::InvalidType:
return "Invalid type";
- case BitcodeReader::MalformedBlock:
+ case BitcodeError::MalformedBlock:
return "Malformed block";
- case BitcodeReader::MalformedGlobalInitializerSet:
+ case BitcodeError::MalformedGlobalInitializerSet:
return "Malformed global initializer set";
- case BitcodeReader::InvalidMultipleBlocks:
+ case BitcodeError::InvalidMultipleBlocks:
return "Invalid multiple blocks";
- case BitcodeReader::NeverResolvedValueFoundInFunction:
+ case BitcodeError::NeverResolvedValueFoundInFunction:
return "Never resolved value found in function";
- case BitcodeReader::InvalidValue:
+ case BitcodeError::NeverResolvedFunctionFromBlockAddress:
+ return "Never resolved function from blockaddress";
+ case BitcodeError::InvalidValue:
return "Invalid value";
}
llvm_unreachable("Unknown error type!");
@@ -3407,33 +3498,54 @@ class BitcodeErrorCategoryType : public std::error_category {
};
}
-const std::error_category &BitcodeReader::BitcodeErrorCategory() {
- static BitcodeErrorCategoryType O;
- return O;
+static ManagedStatic<BitcodeErrorCategoryType> ErrorCategory;
+
+const std::error_category &llvm::BitcodeErrorCategory() {
+ return *ErrorCategory;
}
//===----------------------------------------------------------------------===//
// External interface
//===----------------------------------------------------------------------===//
-/// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
+/// \brief Get a lazy one-at-time loading module from bitcode.
///
-ErrorOr<Module *> llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
- LLVMContext &Context) {
+/// This isn't always used in a lazy context. In particular, it's also used by
+/// \a parseBitcodeFile(). If this is truly lazy, then we need to eagerly pull
+/// in forward-referenced functions from block address references.
+///
+/// \param[in] WillMaterializeAll Set to \c true if the caller promises to
+/// materialize everything -- in particular, if this isn't truly lazy.
+static ErrorOr<Module *>
+getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
+ LLVMContext &Context, bool WillMaterializeAll) {
Module *M = new Module(Buffer->getBufferIdentifier(), Context);
- BitcodeReader *R = new BitcodeReader(Buffer, Context);
+ BitcodeReader *R = new BitcodeReader(Buffer.get(), Context);
M->setMaterializer(R);
- if (std::error_code EC = R->ParseBitcodeInto(M)) {
+
+ auto cleanupOnError = [&](std::error_code EC) {
R->releaseBuffer(); // Never take ownership on error.
delete M; // Also deletes R.
return EC;
- }
+ };
+
+ if (std::error_code EC = R->ParseBitcodeInto(M))
+ return cleanupOnError(EC);
- R->materializeForwardReferencedFunctions();
+ if (!WillMaterializeAll)
+ // Resolve forward references from blockaddresses.
+ if (std::error_code EC = R->materializeForwardReferencedFunctions())
+ return cleanupOnError(EC);
+ Buffer.release(); // The BitcodeReader owns it now.
return M;
}
+ErrorOr<Module *>
+llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
+ LLVMContext &Context) {
+ return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false);
+}
Module *llvm::getStreamedBitcodeModule(const std::string &name,
DataStreamer *streamer,
@@ -3451,14 +3563,16 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name,
return M;
}
-ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
+ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
LLVMContext &Context) {
- ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context);
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ ErrorOr<Module *> ModuleOrErr =
+ getLazyBitcodeModuleImpl(std::move(Buf), Context, true);
if (!ModuleOrErr)
return ModuleOrErr;
Module *M = ModuleOrErr.get();
// Read in the entire module, and destroy the BitcodeReader.
- if (std::error_code EC = M->materializeAllPermanently(true)) {
+ if (std::error_code EC = M->materializeAllPermanently()) {
delete M;
return EC;
}
@@ -3469,12 +3583,11 @@ ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
return M;
}
-std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
+std::string llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer,
LLVMContext &Context) {
- BitcodeReader *R = new BitcodeReader(Buffer, Context);
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ auto R = llvm::make_unique<BitcodeReader>(Buf.release(), Context);
ErrorOr<std::string> Triple = R->parseTriple();
- R->releaseBuffer();
- delete R;
if (Triple.getError())
return "";
return Triple.get();
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 1d4869a..047fef8 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef BITCODE_READER_H
-#define BITCODE_READER_H
+#ifndef LLVM_LIB_BITCODE_READER_BITCODEREADER_H
+#define LLVM_LIB_BITCODE_READER_BITCODEREADER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/Bitcode/BitstreamReader.h"
@@ -22,6 +22,7 @@
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/ValueHandle.h"
+#include <deque>
#include <system_error>
#include <vector>
@@ -138,7 +139,6 @@ class BitcodeReader : public GVMaterializer {
BitcodeReaderMDValueList MDValueList;
std::vector<Comdat *> ComdatList;
SmallVector<Instruction *, 64> InstructionList;
- SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
@@ -180,10 +180,11 @@ class BitcodeReader : public GVMaterializer {
/// stream.
DenseMap<Function*, uint64_t> DeferredFunctionInfo;
- /// BlockAddrFwdRefs - These are blockaddr references to basic blocks. These
- /// are resolved lazily when functions are loaded.
- typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
- DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+ /// These are basic blocks forward-referenced by block addresses. They are
+ /// inserted lazily into functions when they're loaded. The basic block ID is
+ /// its index into the vector.
+ DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
+ std::deque<Function *> BasicBlockFwdRefQueue;
/// UseRelativeIDs - Indicates that we are using a new encoding for
/// instruction operands where most operands in the current
@@ -194,55 +195,36 @@ class BitcodeReader : public GVMaterializer {
/// not need this flag.
bool UseRelativeIDs;
- static const std::error_category &BitcodeErrorCategory();
+ /// True if all functions will be materialized, negating the need to process
+ /// (e.g.) blockaddress forward references.
+ bool WillMaterializeAllForwardRefs;
+
+ /// Functions that have block addresses taken. This is usually empty.
+ SmallPtrSet<const Function *, 4> BlockAddressesTaken;
public:
- enum ErrorType {
- BitcodeStreamInvalidSize,
- ConflictingMETADATA_KINDRecords,
- CouldNotFindFunctionInStream,
- ExpectedConstant,
- InsufficientFunctionProtos,
- InvalidBitcodeSignature,
- InvalidBitcodeWrapperHeader,
- InvalidConstantReference,
- InvalidID, // A read identifier is not found in the table it should be in.
- InvalidInstructionWithNoBB,
- InvalidRecord, // A read record doesn't have the expected size or structure
- InvalidTypeForValue, // Type read OK, but is invalid for its use
- InvalidTYPETable,
- InvalidType, // We were unable to read a type
- MalformedBlock, // We are unable to advance in the stream.
- MalformedGlobalInitializerSet,
- InvalidMultipleBlocks, // We found multiple blocks of a kind that should
- // have only one
- NeverResolvedValueFoundInFunction,
- InvalidValue // Invalid version, inst number, attr number, etc
- };
-
- std::error_code Error(ErrorType E) {
- return std::error_code(E, BitcodeErrorCategory());
- }
+ std::error_code Error(BitcodeError E) { return make_error_code(E); }
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
: Context(C), TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr),
NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
- MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
+ MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
+ WillMaterializeAllForwardRefs(false) {}
explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
: Context(C), TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer),
NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
- MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
+ MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
+ WillMaterializeAllForwardRefs(false) {}
~BitcodeReader() { FreeState(); }
- void materializeForwardReferencedFunctions();
+ std::error_code materializeForwardReferencedFunctions();
void FreeState();
- void releaseBuffer() override;
+ void releaseBuffer();
- bool isMaterializable(const GlobalValue *GV) const override;
bool isDematerializable(const GlobalValue *GV) const override;
- std::error_code Materialize(GlobalValue *GV) override;
+ std::error_code materialize(GlobalValue *GV) override;
std::error_code MaterializeModule(Module *M) override;
void Dematerialize(GlobalValue *GV) override;
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index 72451ec..5e3232e 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -15,41 +15,11 @@ using namespace llvm;
// BitstreamCursor implementation
//===----------------------------------------------------------------------===//
-void BitstreamCursor::operator=(const BitstreamCursor &RHS) {
- freeState();
-
- BitStream = RHS.BitStream;
- NextChar = RHS.NextChar;
- CurWord = RHS.CurWord;
- BitsInCurWord = RHS.BitsInCurWord;
- CurCodeSize = RHS.CurCodeSize;
-
- // Copy abbreviations, and bump ref counts.
- CurAbbrevs = RHS.CurAbbrevs;
- for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
- CurAbbrevs[i]->addRef();
-
- // Copy block scope and bump ref counts.
- BlockScope = RHS.BlockScope;
- for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
- std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
- for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
- Abbrevs[i]->addRef();
- }
-}
-
void BitstreamCursor::freeState() {
// Free all the Abbrevs.
- for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
- CurAbbrevs[i]->dropRef();
CurAbbrevs.clear();
// Free all the Abbrevs in the block scope.
- for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
- std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
- for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
- Abbrevs[i]->dropRef();
- }
BlockScope.clear();
}
@@ -63,10 +33,8 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
// Add the abbrevs specific to this block to the CurAbbrevs list.
if (const BitstreamReader::BlockInfo *Info =
BitStream->getBlockInfo(BlockID)) {
- for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) {
- CurAbbrevs.push_back(Info->Abbrevs[i]);
- CurAbbrevs.back()->addRef();
- }
+ CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
+ Info->Abbrevs.end());
}
// Get the codesize of this block.
@@ -82,16 +50,9 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
return false;
}
-void BitstreamCursor::readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
- SmallVectorImpl<uint64_t> &Vals) {
- assert(Op.isLiteral() && "Not a literal");
- // If the abbrev specifies the literal value to use, use it.
- Vals.push_back(Op.getLiteralValue());
-}
-
-void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op,
- SmallVectorImpl<uint64_t> &Vals) {
- assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+static uint64_t readAbbreviatedField(BitstreamCursor &Cursor,
+ const BitCodeAbbrevOp &Op) {
+ assert(!Op.isLiteral() && "Not to be used with literals!");
// Decode the value as we are commanded.
switch (Op.getEncoding()) {
@@ -99,19 +60,18 @@ void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op,
case BitCodeAbbrevOp::Blob:
llvm_unreachable("Should not reach here");
case BitCodeAbbrevOp::Fixed:
- Vals.push_back(Read((unsigned)Op.getEncodingData()));
- break;
+ return Cursor.Read((unsigned)Op.getEncodingData());
case BitCodeAbbrevOp::VBR:
- Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
- break;
+ return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
case BitCodeAbbrevOp::Char6:
- Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
- break;
+ return BitCodeAbbrevOp::DecodeChar6(Cursor.Read(6));
}
+ llvm_unreachable("invalid abbreviation encoding");
}
-void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) {
- assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+static void skipAbbreviatedField(BitstreamCursor &Cursor,
+ const BitCodeAbbrevOp &Op) {
+ assert(!Op.isLiteral() && "Not to be used with literals!");
// Decode the value as we are commanded.
switch (Op.getEncoding()) {
@@ -119,13 +79,13 @@ void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) {
case BitCodeAbbrevOp::Blob:
llvm_unreachable("Should not reach here");
case BitCodeAbbrevOp::Fixed:
- (void)Read((unsigned)Op.getEncodingData());
+ Cursor.Read((unsigned)Op.getEncodingData());
break;
case BitCodeAbbrevOp::VBR:
- (void)ReadVBR64((unsigned)Op.getEncodingData());
+ Cursor.ReadVBR64((unsigned)Op.getEncodingData());
break;
case BitCodeAbbrevOp::Char6:
- (void)Read(6);
+ Cursor.Read(6);
break;
}
}
@@ -153,7 +113,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) {
if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
Op.getEncoding() != BitCodeAbbrevOp::Blob) {
- skipAbbreviatedField(Op);
+ skipAbbreviatedField(*this, Op);
continue;
}
@@ -167,7 +127,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Read all the elements.
for (; NumElts; --NumElts)
- skipAbbreviatedField(EltEnc);
+ skipAbbreviatedField(*this, EltEnc);
continue;
}
@@ -207,22 +167,22 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
// Read the record code first.
assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
+ unsigned Code;
if (CodeOp.isLiteral())
- readAbbreviatedLiteral(CodeOp, Vals);
+ Code = CodeOp.getLiteralValue();
else
- readAbbreviatedField(CodeOp, Vals);
- unsigned Code = (unsigned)Vals.pop_back_val();
+ Code = readAbbreviatedField(*this, CodeOp);
for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
if (Op.isLiteral()) {
- readAbbreviatedLiteral(Op, Vals);
+ Vals.push_back(Op.getLiteralValue());
continue;
}
if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
Op.getEncoding() != BitCodeAbbrevOp::Blob) {
- readAbbreviatedField(Op, Vals);
+ Vals.push_back(readAbbreviatedField(*this, Op));
continue;
}
@@ -236,7 +196,7 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
// Read all the elements.
for (; NumElts; --NumElts)
- readAbbreviatedField(EltEnc, Vals);
+ Vals.push_back(readAbbreviatedField(*this, EltEnc));
continue;
}
@@ -339,9 +299,8 @@ bool BitstreamCursor::ReadBlockInfoBlock() {
// ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
// appropriate BlockInfo.
- BitCodeAbbrev *Abbv = CurAbbrevs.back();
+ CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
CurAbbrevs.pop_back();
- CurBlockInfo->Abbrevs.push_back(Abbv);
continue;
}
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 3747122..7218ea0 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -18,10 +18,10 @@ using namespace llvm;
/*===-- Operations on modules ---------------------------------------------===*/
int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
- std::string ErrorInfo;
- raw_fd_ostream OS(Path, ErrorInfo, sys::fs::F_None);
+ std::error_code EC;
+ raw_fd_ostream OS(Path, EC, sys::fs::F_None);
- if (!ErrorInfo.empty())
+ if (EC)
return -1;
WriteBitcodeToFile(unwrap(M), OS);
@@ -39,3 +39,11 @@ int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
}
+
+LLVMMemoryBufferRef LLVMWriteBitcodeToMemoryBuffer(LLVMModuleRef M) {
+ std::string Data;
+ raw_string_ostream OS(Data);
+
+ WriteBitcodeToFile(unwrap(M), OS);
+ return wrap(MemoryBuffer::getMemBufferCopy(OS.str()).release());
+}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index dd9282a..6cfc357 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/UseListOrder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -32,12 +33,6 @@
#include <map>
using namespace llvm;
-static cl::opt<bool>
-EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
- cl::desc("Turn on experimental support for "
- "use-list order preservation."),
- cl::init(false), cl::Hidden);
-
/// These are manifest constants used by the bitcode writer. They do not need to
/// be kept in sync with the reader, but need to be consistent within this file.
enum {
@@ -201,6 +196,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NON_LAZY_BIND;
case Attribute::NonNull:
return bitc::ATTR_KIND_NON_NULL;
+ case Attribute::Dereferenceable:
+ return bitc::ATTR_KIND_DEREFERENCEABLE;
case Attribute::NoRedZone:
return bitc::ATTR_KIND_NO_RED_ZONE;
case Attribute::NoReturn:
@@ -272,7 +269,7 @@ static void WriteAttributeGroupTable(const ValueEnumerator &VE,
if (Attr.isEnumAttribute()) {
Record.push_back(0);
Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
- } else if (Attr.isAlignAttribute()) {
+ } else if (Attr.isIntAttribute()) {
Record.push_back(1);
Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
Record.push_back(Attr.getValueAsInt());
@@ -713,18 +710,15 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
static uint64_t GetOptimizationFlags(const Value *V) {
uint64_t Flags = 0;
- if (const OverflowingBinaryOperator *OBO =
- dyn_cast<OverflowingBinaryOperator>(V)) {
+ if (const auto *OBO = dyn_cast<OverflowingBinaryOperator>(V)) {
if (OBO->hasNoSignedWrap())
Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
if (OBO->hasNoUnsignedWrap())
Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
- } else if (const PossiblyExactOperator *PEO =
- dyn_cast<PossiblyExactOperator>(V)) {
+ } else if (const auto *PEO = dyn_cast<PossiblyExactOperator>(V)) {
if (PEO->isExact())
Flags |= 1 << bitc::PEO_EXACT;
- } else if (const FPMathOperator *FPMO =
- dyn_cast<const FPMathOperator>(V)) {
+ } else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
if (FPMO->hasUnsafeAlgebra())
Flags |= FastMathFlags::UnsafeAlgebra;
if (FPMO->hasNoNaNs())
@@ -762,13 +756,13 @@ static void WriteMDNode(const MDNode *N,
static void WriteModuleMetadata(const Module *M,
const ValueEnumerator &VE,
BitstreamWriter &Stream) {
- const ValueEnumerator::ValueList &Vals = VE.getMDValues();
+ const auto &Vals = VE.getMDValues();
bool StartedMetadataBlock = false;
unsigned MDSAbbrev = 0;
SmallVector<uint64_t, 64> Record;
for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
- if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
+ if (const MDNode *N = dyn_cast<MDNode>(Vals[i])) {
if (!N->isFunctionLocal() || !N->getFunction()) {
if (!StartedMetadataBlock) {
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
@@ -776,7 +770,7 @@ static void WriteModuleMetadata(const Module *M,
}
WriteMDNode(N, VE, Stream, Record);
}
- } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) {
+ } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i])) {
if (!StartedMetadataBlock) {
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
@@ -854,7 +848,7 @@ static void WriteMetadataAttachment(const Function &F,
// Write metadata attachments
// METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
- SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
@@ -1431,13 +1425,20 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
break;
}
- case Instruction::Alloca:
+ case Instruction::Alloca: {
Code = bitc::FUNC_CODE_INST_ALLOCA;
Vals.push_back(VE.getTypeID(I.getType()));
Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
- Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+ const AllocaInst &AI = cast<AllocaInst>(I);
+ unsigned AlignRecord = Log2_32(AI.getAlignment()) + 1;
+ assert(Log2_32(Value::MaximumAlignment) + 1 < 1 << 5 &&
+ "not enough bits for maximum alignment");
+ assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64");
+ AlignRecord |= AI.isUsedWithInAlloca() << 5;
+ Vals.push_back(AlignRecord);
break;
+ }
case Instruction::Load:
if (cast<LoadInst>(I).isAtomic()) {
@@ -1598,6 +1599,39 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
Stream.ExitBlock();
}
+static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
+ BitstreamWriter &Stream) {
+ assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
+ unsigned Code;
+ if (isa<BasicBlock>(Order.V))
+ Code = bitc::USELIST_CODE_BB;
+ else
+ Code = bitc::USELIST_CODE_DEFAULT;
+
+ SmallVector<uint64_t, 64> Record;
+ for (unsigned I : Order.Shuffle)
+ Record.push_back(I);
+ Record.push_back(VE.getValueID(Order.V));
+ Stream.EmitRecord(Code, Record);
+}
+
+static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ auto hasMore = [&]() {
+ return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
+ };
+ if (!hasMore())
+ // Nothing to do.
+ return;
+
+ Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+ while (hasMore()) {
+ WriteUseList(VE, std::move(VE.UseListOrders.back()), Stream);
+ VE.UseListOrders.pop_back();
+ }
+ Stream.ExitBlock();
+}
+
/// WriteFunction - Emit a function body to the module stream.
static void WriteFunction(const Function &F, ValueEnumerator &VE,
BitstreamWriter &Stream) {
@@ -1666,6 +1700,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
if (NeedsMetadataAttachment)
WriteMetadataAttachment(F, VE, Stream);
+ if (shouldPreserveBitcodeUseListOrder())
+ WriteUseListBlock(&F, VE, Stream);
VE.purgeFunction();
Stream.ExitBlock();
}
@@ -1831,98 +1867,6 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
-// Sort the Users based on the order in which the reader parses the bitcode
-// file.
-static bool bitcodereader_order(const User *lhs, const User *rhs) {
- // TODO: Implement.
- return true;
-}
-
-static void WriteUseList(const Value *V, const ValueEnumerator &VE,
- BitstreamWriter &Stream) {
-
- // One or zero uses can't get out of order.
- if (V->use_empty() || V->hasNUses(1))
- return;
-
- // Make a copy of the in-memory use-list for sorting.
- SmallVector<const User*, 8> UserList(V->user_begin(), V->user_end());
-
- // Sort the copy based on the order read by the BitcodeReader.
- std::sort(UserList.begin(), UserList.end(), bitcodereader_order);
-
- // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
- // sorted list (i.e., the expected BitcodeReader in-memory use-list).
-
- // TODO: Emit the USELIST_CODE_ENTRYs.
-}
-
-static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
- BitstreamWriter &Stream) {
- VE.incorporateFunction(*F);
-
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI)
- WriteUseList(AI, VE, Stream);
- for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
- ++BB) {
- WriteUseList(BB, VE, Stream);
- for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
- ++II) {
- WriteUseList(II, VE, Stream);
- for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
- OI != E; ++OI) {
- if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
- isa<InlineAsm>(*OI))
- WriteUseList(*OI, VE, Stream);
- }
- }
- }
- VE.purgeFunction();
-}
-
-// Emit use-lists.
-static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
- BitstreamWriter &Stream) {
- Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
-
- // XXX: this modifies the module, but in a way that should never change the
- // behavior of any pass or codegen in LLVM. The problem is that GVs may
- // contain entries in the use_list that do not exist in the Module and are
- // not stored in the .bc file.
- for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
- I != E; ++I)
- I->removeDeadConstantUsers();
-
- // Write the global variables.
- for (Module::const_global_iterator GI = M->global_begin(),
- GE = M->global_end(); GI != GE; ++GI) {
- WriteUseList(GI, VE, Stream);
-
- // Write the global variable initializers.
- if (GI->hasInitializer())
- WriteUseList(GI->getInitializer(), VE, Stream);
- }
-
- // Write the functions.
- for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
- WriteUseList(FI, VE, Stream);
- if (!FI->isDeclaration())
- WriteFunctionUseList(FI, VE, Stream);
- if (FI->hasPrefixData())
- WriteUseList(FI->getPrefixData(), VE, Stream);
- }
-
- // Write the aliases.
- for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
- AI != AE; ++AI) {
- WriteUseList(AI, VE, Stream);
- WriteUseList(AI->getAliasee(), VE, Stream);
- }
-
- Stream.ExitBlock();
-}
-
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
@@ -1933,7 +1877,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
// Analyze the module, enumerating globals, functions, etc.
- ValueEnumerator VE(M);
+ ValueEnumerator VE(*M);
// Emit blockinfo, which defines the standard abbreviations etc.
WriteBlockInfo(VE, Stream);
@@ -1965,9 +1909,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
// Emit names for globals/functions etc.
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
- // Emit use-lists.
- if (EnablePreserveUseListOrdering)
- WriteModuleUseLists(M, VE, Stream);
+ // Emit module-level use-lists.
+ if (shouldPreserveBitcodeUseListOrder())
+ WriteUseListBlock(nullptr, VE, Stream);
// Emit function bodies.
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 15f8034..f065c83 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -18,31 +18,280 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/UseListOrder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
+namespace {
+struct OrderMap {
+ DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
+ unsigned LastGlobalConstantID;
+ unsigned LastGlobalValueID;
+
+ OrderMap() : LastGlobalConstantID(0), LastGlobalValueID(0) {}
+
+ bool isGlobalConstant(unsigned ID) const {
+ return ID <= LastGlobalConstantID;
+ }
+ bool isGlobalValue(unsigned ID) const {
+ return ID <= LastGlobalValueID && !isGlobalConstant(ID);
+ }
+
+ unsigned size() const { return IDs.size(); }
+ std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; }
+ std::pair<unsigned, bool> lookup(const Value *V) const {
+ return IDs.lookup(V);
+ }
+ void index(const Value *V) {
+ // Explicitly sequence get-size and insert-value operations to avoid UB.
+ unsigned ID = IDs.size() + 1;
+ IDs[V].first = ID;
+ }
+};
+}
+
+static void orderValue(const Value *V, OrderMap &OM) {
+ if (OM.lookup(V).first)
+ return;
+
+ if (const Constant *C = dyn_cast<Constant>(V))
+ if (C->getNumOperands() && !isa<GlobalValue>(C))
+ for (const Value *Op : C->operands())
+ if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
+ orderValue(Op, OM);
+
+ // Note: we cannot cache this lookup above, since inserting into the map
+ // changes the map's size, and thus affects the other IDs.
+ OM.index(V);
+}
+
+static OrderMap orderModule(const Module &M) {
+ // This needs to match the order used by ValueEnumerator::ValueEnumerator()
+ // and ValueEnumerator::incorporateFunction().
+ OrderMap OM;
+
+ // In the reader, initializers of GlobalValues are set *after* all the
+ // globals have been read. Rather than awkwardly modeling this behaviour
+ // directly in predictValueUseListOrderImpl(), just assign IDs to
+ // initializers of GlobalValues before GlobalValues themselves to model this
+ // implicitly.
+ for (const GlobalVariable &G : M.globals())
+ if (G.hasInitializer())
+ if (!isa<GlobalValue>(G.getInitializer()))
+ orderValue(G.getInitializer(), OM);
+ for (const GlobalAlias &A : M.aliases())
+ if (!isa<GlobalValue>(A.getAliasee()))
+ orderValue(A.getAliasee(), OM);
+ for (const Function &F : M)
+ if (F.hasPrefixData())
+ if (!isa<GlobalValue>(F.getPrefixData()))
+ orderValue(F.getPrefixData(), OM);
+ OM.LastGlobalConstantID = OM.size();
+
+ // Initializers of GlobalValues are processed in
+ // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather
+ // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
+ // by giving IDs in reverse order.
+ //
+ // Since GlobalValues never reference each other directly (just through
+ // initializers), their relative IDs only matter for determining order of
+ // uses in their initializers.
+ for (const Function &F : M)
+ orderValue(&F, OM);
+ for (const GlobalAlias &A : M.aliases())
+ orderValue(&A, OM);
+ for (const GlobalVariable &G : M.globals())
+ orderValue(&G, OM);
+ OM.LastGlobalValueID = OM.size();
+
+ for (const Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ // Here we need to match the union of ValueEnumerator::incorporateFunction()
+ // and WriteFunction(). Basic blocks are implicitly declared before
+ // anything else (by declaring their size).
+ for (const BasicBlock &BB : F)
+ orderValue(&BB, OM);
+ for (const Argument &A : F.args())
+ orderValue(&A, OM);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ for (const Value *Op : I.operands())
+ if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
+ isa<InlineAsm>(*Op))
+ orderValue(Op, OM);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ orderValue(&I, OM);
+ }
+ return OM;
+}
+
+static void predictValueUseListOrderImpl(const Value *V, const Function *F,
+ unsigned ID, const OrderMap &OM,
+ UseListOrderStack &Stack) {
+ // Predict use-list order for this one.
+ typedef std::pair<const Use *, unsigned> Entry;
+ SmallVector<Entry, 64> List;
+ for (const Use &U : V->uses())
+ // Check if this user will be serialized.
+ if (OM.lookup(U.getUser()).first)
+ List.push_back(std::make_pair(&U, List.size()));
+
+ if (List.size() < 2)
+ // We may have lost some users.
+ return;
+
+ bool IsGlobalValue = OM.isGlobalValue(ID);
+ std::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) {
+ const Use *LU = L.first;
+ const Use *RU = R.first;
+ if (LU == RU)
+ return false;
+
+ auto LID = OM.lookup(LU->getUser()).first;
+ auto RID = OM.lookup(RU->getUser()).first;
+
+ // Global values are processed in reverse order.
+ //
+ // Moreover, initializers of GlobalValues are set *after* all the globals
+ // have been read (despite having earlier IDs). Rather than awkwardly
+ // modeling this behaviour here, orderModule() has assigned IDs to
+ // initializers of GlobalValues before GlobalValues themselves.
+ if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID))
+ return LID < RID;
+
+ // If ID is 4, then expect: 7 6 5 1 2 3.
+ if (LID < RID) {
+ if (RID <= ID)
+ if (!IsGlobalValue) // GlobalValue uses don't get reversed.
+ return true;
+ return false;
+ }
+ if (RID < LID) {
+ if (LID <= ID)
+ if (!IsGlobalValue) // GlobalValue uses don't get reversed.
+ return false;
+ return true;
+ }
+
+ // LID and RID are equal, so we have different operands of the same user.
+ // Assume operands are added in order for all instructions.
+ if (LID <= ID)
+ if (!IsGlobalValue) // GlobalValue uses don't get reversed.
+ return LU->getOperandNo() < RU->getOperandNo();
+ return LU->getOperandNo() > RU->getOperandNo();
+ });
+
+ if (std::is_sorted(
+ List.begin(), List.end(),
+ [](const Entry &L, const Entry &R) { return L.second < R.second; }))
+ // Order is already correct.
+ return;
+
+ // Store the shuffle.
+ Stack.emplace_back(V, F, List.size());
+ assert(List.size() == Stack.back().Shuffle.size() && "Wrong size");
+ for (size_t I = 0, E = List.size(); I != E; ++I)
+ Stack.back().Shuffle[I] = List[I].second;
+}
+
+static void predictValueUseListOrder(const Value *V, const Function *F,
+ OrderMap &OM, UseListOrderStack &Stack) {
+ auto &IDPair = OM[V];
+ assert(IDPair.first && "Unmapped value");
+ if (IDPair.second)
+ // Already predicted.
+ return;
+
+ // Do the actual prediction.
+ IDPair.second = true;
+ if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
+ predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
+
+ // Recursive descent into constants.
+ if (const Constant *C = dyn_cast<Constant>(V))
+ if (C->getNumOperands()) // Visit GlobalValues.
+ for (const Value *Op : C->operands())
+ if (isa<Constant>(Op)) // Visit GlobalValues.
+ predictValueUseListOrder(Op, F, OM, Stack);
+}
+
+static UseListOrderStack predictUseListOrder(const Module &M) {
+ OrderMap OM = orderModule(M);
+
+ // Use-list orders need to be serialized after all the users have been added
+ // to a value, or else the shuffles will be incomplete. Store them per
+ // function in a stack.
+ //
+ // Aside from function order, the order of values doesn't matter much here.
+ UseListOrderStack Stack;
+
+ // We want to visit the functions backward now so we can list function-local
+ // constants in the last Function they're used in. Module-level constants
+ // have already been visited above.
+ for (auto I = M.rbegin(), E = M.rend(); I != E; ++I) {
+ const Function &F = *I;
+ if (F.isDeclaration())
+ continue;
+ for (const BasicBlock &BB : F)
+ predictValueUseListOrder(&BB, &F, OM, Stack);
+ for (const Argument &A : F.args())
+ predictValueUseListOrder(&A, &F, OM, Stack);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ for (const Value *Op : I.operands())
+ if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
+ predictValueUseListOrder(Op, &F, OM, Stack);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ predictValueUseListOrder(&I, &F, OM, Stack);
+ }
+
+ // Visit globals last, since the module-level use-list block will be seen
+ // before the function bodies are processed.
+ for (const GlobalVariable &G : M.globals())
+ predictValueUseListOrder(&G, nullptr, OM, Stack);
+ for (const Function &F : M)
+ predictValueUseListOrder(&F, nullptr, OM, Stack);
+ for (const GlobalAlias &A : M.aliases())
+ predictValueUseListOrder(&A, nullptr, OM, Stack);
+ for (const GlobalVariable &G : M.globals())
+ if (G.hasInitializer())
+ predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
+ for (const GlobalAlias &A : M.aliases())
+ predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
+ for (const Function &F : M)
+ if (F.hasPrefixData())
+ predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
+
+ return Stack;
+}
+
static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
return V.first->getType()->isIntOrIntVectorTy();
}
-/// ValueEnumerator - Enumerate module-level information.
-ValueEnumerator::ValueEnumerator(const Module *M) {
+ValueEnumerator::ValueEnumerator(const Module &M) {
+ if (shouldPreserveBitcodeUseListOrder())
+ UseListOrders = predictUseListOrder(M);
+
// Enumerate the global variables.
- for (Module::const_global_iterator I = M->global_begin(),
- E = M->global_end(); I != E; ++I)
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
EnumerateValue(I);
// Enumerate the functions.
- for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
EnumerateValue(I);
EnumerateAttributes(cast<Function>(I)->getAttributes());
}
// Enumerate the aliases.
- for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I)
EnumerateValue(I);
@@ -50,30 +299,30 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
unsigned FirstConstant = Values.size();
// Enumerate the global variable initializers.
- for (Module::const_global_iterator I = M->global_begin(),
- E = M->global_end(); I != E; ++I)
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
if (I->hasInitializer())
EnumerateValue(I->getInitializer());
// Enumerate the aliasees.
- for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+ for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I)
EnumerateValue(I->getAliasee());
// Enumerate the prefix data constants.
- for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->hasPrefixData())
EnumerateValue(I->getPrefixData());
// Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
- EnumerateValueSymbolTable(M->getValueSymbolTable());
+ EnumerateValueSymbolTable(M.getValueSymbolTable());
EnumerateNamedMetadata(M);
- SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
// Enumerate types used by function bodies and argument lists.
- for (const Function &F : *M) {
+ for (const Function &F : M) {
for (const Argument &A : F.args())
EnumerateType(A.getType());
@@ -179,6 +428,11 @@ void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+ if (shouldPreserveBitcodeUseListOrder())
+ // Optimizing constants makes the use-list order difficult to predict.
+ // Disable it for now when trying to preserve the order.
+ return;
+
std::stable_sort(Values.begin() + CstStart, Values.begin() + CstEnd,
[this](const std::pair<const Value *, unsigned> &LHS,
const std::pair<const Value *, unsigned> &RHS) {
@@ -209,11 +463,12 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
EnumerateValue(VI->getValue());
}
-/// EnumerateNamedMetadata - Insert all of the values referenced by
-/// named metadata in the specified module.
-void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
- for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
- E = M->named_metadata_end(); I != E; ++I)
+/// Insert all of the values referenced by named metadata in the specified
+/// module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module &M) {
+ for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end();
+ I != E; ++I)
EnumerateNamedMDNode(I);
}
@@ -239,31 +494,31 @@ void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
void ValueEnumerator::EnumerateMetadata(const Value *MD) {
assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
- // Enumerate the type of this value.
- EnumerateType(MD->getType());
-
+ // Skip function-local nodes themselves, but walk their operands.
const MDNode *N = dyn_cast<MDNode>(MD);
-
- // In the module-level pass, skip function-local nodes themselves, but
- // do walk their operands.
if (N && N->isFunctionLocal() && N->getFunction()) {
EnumerateMDNodeOperands(N);
return;
}
- // Check to see if it's already in!
- unsigned &MDValueID = MDValueMap[MD];
- if (MDValueID) {
- // Increment use count.
- MDValues[MDValueID-1].second++;
+ // Insert a dummy ID to block the co-recursive call to
+ // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph.
+ //
+ // Return early if there's already an ID.
+ if (!MDValueMap.insert(std::make_pair(MD, 0)).second)
return;
- }
- MDValues.push_back(std::make_pair(MD, 1U));
- MDValueID = MDValues.size();
- // Enumerate all non-function-local operands.
+ // Enumerate the type of this value.
+ EnumerateType(MD->getType());
+
+ // Visit operands first to minimize RAUW.
if (N)
EnumerateMDNodeOperands(N);
+
+ // Replace the dummy ID inserted above with the correct one. MDValueMap may
+ // have changed by inserting operands, so we need a fresh lookup here.
+ MDValues.push_back(MD);
+ MDValueMap[MD] = MDValues.size();
}
/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
@@ -277,12 +532,10 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
// Check to see if it's already in!
unsigned &MDValueID = MDValueMap[N];
- if (MDValueID) {
- // Increment use count.
- MDValues[MDValueID-1].second++;
+ if (MDValueID)
return;
- }
- MDValues.push_back(std::make_pair(N, 1U));
+
+ MDValues.push_back(N);
MDValueID = MDValues.size();
// To incoroporate function-local information visit all function-local
@@ -487,7 +740,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
FnLocalMDVector.push_back(MD);
}
- SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
I->getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
MDNode *N = MDs[i].second;
@@ -510,7 +763,7 @@ void ValueEnumerator::purgeFunction() {
for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
ValueMap.erase(Values[i].first);
for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
- MDValueMap.erase(MDValues[i].first);
+ MDValueMap.erase(MDValues[i]);
for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
ValueMap.erase(BasicBlocks[i]);
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 1c9f38e..563c214 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef VALUE_ENUMERATOR_H
-#define VALUE_ENUMERATOR_H
+#ifndef LLVM_LIB_BITCODE_WRITER_VALUEENUMERATOR_H
+#define LLVM_LIB_BITCODE_WRITER_VALUEENUMERATOR_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/UseListOrder.h"
#include <vector>
namespace llvm {
@@ -42,6 +43,9 @@ public:
// For each value, we remember its Value* and occurrence frequency.
typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+
+ UseListOrderStack UseListOrders;
+
private:
typedef DenseMap<Type*, unsigned> TypeMapType;
TypeMapType TypeMap;
@@ -54,7 +58,7 @@ private:
typedef UniqueVector<const Comdat *> ComdatSetType;
ComdatSetType Comdats;
- ValueList MDValues;
+ std::vector<const Value *> MDValues;
SmallVector<const MDNode *, 8> FunctionLocalMDs;
ValueMapType MDValueMap;
@@ -92,7 +96,7 @@ private:
ValueEnumerator(const ValueEnumerator &) LLVM_DELETED_FUNCTION;
void operator=(const ValueEnumerator &) LLVM_DELETED_FUNCTION;
public:
- ValueEnumerator(const Module *M);
+ ValueEnumerator(const Module &M);
void dump() const;
void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
@@ -130,7 +134,7 @@ public:
}
const ValueList &getValues() const { return Values; }
- const ValueList &getMDValues() const { return MDValues; }
+ const std::vector<const Value *> &getMDValues() const { return MDValues; }
const SmallVectorImpl<const MDNode *> &getFunctionLocalMDValues() const {
return FunctionLocalMDs;
}
@@ -172,7 +176,7 @@ private:
void EnumerateAttributes(AttributeSet PAL);
void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
- void EnumerateNamedMetadata(const Module *M);
+ void EnumerateNamedMetadata(const Module &M);
};
} // End llvm namespace
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 0f38c64..91c1314 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -111,18 +110,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
}
-
-
-AggressiveAntiDepBreaker::
-AggressiveAntiDepBreaker(MachineFunction& MFi,
- const RegisterClassInfo &RCI,
- TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
- AntiDepBreaker(), MF(MFi),
- MRI(MF.getRegInfo()),
- TII(MF.getTarget().getInstrInfo()),
- TRI(MF.getTarget().getRegisterInfo()),
- RegClassInfo(RCI),
- State(nullptr) {
+AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
+ MachineFunction &MFi, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
+ : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
+ TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
+ State(nullptr) {
/* Collect a bitset of all registers that are only broken if they
are on the critical path. */
for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
@@ -262,11 +256,8 @@ static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
- unsigned Reg = P->getReg();
- if (RegSet.count(Reg) == 0) {
+ if (RegSet.insert(P->getReg()).second)
Edges.push_back(&*P);
- RegSet.insert(Reg);
- }
}
}
}
@@ -527,7 +518,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
BV &= RCBV;
}
- DEBUG(dbgs() << " " << RC->getName());
+ DEBUG(dbgs() << " " << TRI->getRegClassName(RC));
}
return BV;
@@ -582,7 +573,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
unsigned Reg = Regs[i];
if (Reg == SuperReg) continue;
bool IsSub = TRI->isSubRegister(SuperReg, Reg);
- assert(IsSub && "Expecting group subregister");
+ // FIXME: remove this once PR18663 has been properly fixed. For now,
+ // return a conservative answer:
+ // assert(IsSub && "Expecting group subregister");
if (!IsSub)
return false;
}
@@ -618,8 +611,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
DEBUG(dbgs() << "\tFind Registers:");
- if (RenameOrder.count(SuperRC) == 0)
- RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
unsigned OrigR = RenameOrder[SuperRC];
unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 2ab9d89..12cf95b 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
-#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
@@ -32,88 +32,83 @@
namespace llvm {
class RegisterClassInfo;
- /// Class AggressiveAntiDepState
/// Contains all the state necessary for anti-dep breaking.
class AggressiveAntiDepState {
public:
- /// RegisterReference - Information about a register reference
- /// within a liverange
+ /// Information about a register reference within a liverange
typedef struct {
- /// Operand - The registers operand
+ /// The registers operand
MachineOperand *Operand;
- /// RC - The register class
+ /// The register class
const TargetRegisterClass *RC;
} RegisterReference;
private:
- /// NumTargetRegs - Number of non-virtual target registers
- /// (i.e. TRI->getNumRegs()).
+ /// Number of non-virtual target registers (i.e. TRI->getNumRegs()).
const unsigned NumTargetRegs;
- /// GroupNodes - Implements a disjoint-union data structure to
+ /// Implements a disjoint-union data structure to
/// form register groups. A node is represented by an index into
/// the vector. A node can "point to" itself to indicate that it
/// is the parent of a group, or point to another node to indicate
/// that it is a member of the same group as that node.
std::vector<unsigned> GroupNodes;
- /// GroupNodeIndices - For each register, the index of the GroupNode
+ /// For each register, the index of the GroupNode
/// currently representing the group that the register belongs to.
/// Register 0 is always represented by the 0 group, a group
/// composed of registers that are not eligible for anti-aliasing.
std::vector<unsigned> GroupNodeIndices;
- /// RegRefs - Map registers to all their references within a live range.
+ /// Map registers to all their references within a live range.
std::multimap<unsigned, RegisterReference> RegRefs;
- /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
std::vector<unsigned> KillIndices;
- /// DefIndices - The index of the most recent complete def (proceding bottom
+ /// The index of the most recent complete def (proceding bottom
/// up), or ~0u if the register is live.
std::vector<unsigned> DefIndices;
public:
AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
- /// GetKillIndices - Return the kill indices.
+ /// Return the kill indices.
std::vector<unsigned> &GetKillIndices() { return KillIndices; }
- /// GetDefIndices - Return the define indices.
+ /// Return the define indices.
std::vector<unsigned> &GetDefIndices() { return DefIndices; }
- /// GetRegRefs - Return the RegRefs map.
+ /// Return the RegRefs map.
std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
- // GetGroup - Get the group for a register. The returned value is
+ // Get the group for a register. The returned value is
// the index of the GroupNode representing the group.
unsigned GetGroup(unsigned Reg);
- // GetGroupRegs - Return a vector of the registers belonging to a
- // group. If RegRefs is non-NULL then only included referenced registers.
+ // Return a vector of the registers belonging to a group.
+ // If RegRefs is non-NULL then only included referenced registers.
void GetGroupRegs(
unsigned Group,
std::vector<unsigned> &Regs,
std::multimap<unsigned,
AggressiveAntiDepState::RegisterReference> *RegRefs);
- // UnionGroups - Union Reg1's and Reg2's groups to form a new
- // group. Return the index of the GroupNode representing the
- // group.
+ // Union Reg1's and Reg2's groups to form a new group.
+ // Return the index of the GroupNode representing the group.
unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
- // LeaveGroup - Remove a register from its current group and place
+ // Remove a register from its current group and place
// it alone in its own group. Return the index of the GroupNode
// representing the registers new group.
unsigned LeaveGroup(unsigned Reg);
- /// IsLive - Return true if Reg is live
+ /// Return true if Reg is live.
bool IsLive(unsigned Reg);
};
- /// Class AggressiveAntiDepBreaker
class AggressiveAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
MachineRegisterInfo &MRI;
@@ -121,12 +116,11 @@ class RegisterClassInfo;
const TargetRegisterInfo *TRI;
const RegisterClassInfo &RegClassInfo;
- /// CriticalPathSet - The set of registers that should only be
+ /// The set of registers that should only be
/// renamed if they are on the critical path.
BitVector CriticalPathSet;
- /// State - The state used to identify and rename anti-dependence
- /// registers.
+ /// The state used to identify and rename anti-dependence registers.
AggressiveAntiDepState *State;
public:
@@ -135,11 +129,10 @@ class RegisterClassInfo;
TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
~AggressiveAntiDepBreaker();
- /// Start - Initialize anti-dep breaking for a new basic block.
+ /// Initialize anti-dep breaking for a new basic block.
void StartBlock(MachineBasicBlock *BB) override;
- /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
- /// path
+ /// Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers.
///
unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
@@ -148,24 +141,24 @@ class RegisterClassInfo;
unsigned InsertPosIndex,
DbgValueVector &DbgValues) override;
- /// Observe - Update liveness information to account for the current
+ /// Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
void Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) override;
- /// Finish - Finish anti-dep breaking for a basic block.
+ /// Finish anti-dep breaking for a basic block.
void FinishBlock() override;
private:
/// Keep track of a position in the allocation order for each regclass.
typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
- /// IsImplicitDefUse - Return true if MO represents a register
+ /// Return true if MO represents a register
/// that is both implicitly used and defined in MI
bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
- /// GetPassthruRegs - If MI implicitly def/uses a register, then
+ /// If MI implicitly def/uses a register, then
/// return that register and all subregisters.
void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 64ff2a7..1e4eaa7 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
-#define LLVM_CODEGEN_ALLOCATIONORDER_H
+#ifndef LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 1bdf312..9a3b790 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -25,6 +25,9 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
using namespace llvm;
/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
@@ -106,15 +109,16 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
}
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+GlobalValue *llvm::ExtractTypeInfo(Value *V) {
V = V->stripPointerCasts();
- GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+ GlobalValue *GV = dyn_cast<GlobalValue>(V);
+ GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
- if (GV && GV->getName() == "llvm.eh.catch.all.value") {
- assert(GV->hasInitializer() &&
+ if (Var && Var->getName() == "llvm.eh.catch.all.value") {
+ assert(Var->hasInitializer() &&
"The EH catch-all value must have an initializer");
- Value *Init = GV->getInitializer();
- GV = dyn_cast<GlobalVariable>(Init);
+ Value *Init = Var->getInitializer();
+ GV = dyn_cast<GlobalValue>(Init);
if (!GV) V = cast<ConstantPointerNull>(Init);
}
@@ -475,7 +479,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -490,8 +494,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!DAG.getTarget().Options.GuaranteedTailCallOpt ||
- !isa<UnreachableInst>(Term)))
+ (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a
@@ -509,8 +512,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
return false;
}
- return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret,
- *DAG.getTarget().getTargetLowering());
+ return returnTypeIsEligibleForTailCall(
+ ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering());
}
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
@@ -607,3 +610,29 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return true;
}
+
+bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
+ if (!GV->hasLinkOnceODRLinkage())
+ return false;
+
+ if (GV->hasUnnamedAddr())
+ return true;
+
+ // If it is a non constant variable, it needs to be uniqued across shared
+ // objects.
+ if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
+ if (!Var->isConstant())
+ return false;
+ }
+
+ // An alias can point to a variable. We could try to resolve the alias to
+ // decide, but for now just don't hide them.
+ if (isa<GlobalAlias>(GV))
+ return false;
+
+ GlobalStatus GS;
+ if (GlobalStatus::analyzeGlobal(GV, GS))
+ return false;
+
+ return !GS.IsCompared;
+}
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 05e5c45..5cb351d 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -4,7 +4,7 @@ codegen_SRC_FILES := \
AggressiveAntiDepBreaker.cpp \
AllocationOrder.cpp \
Analysis.cpp \
- AtomicExpandLoadLinkedPass.cpp \
+ AtomicExpandPass.cpp \
BasicTargetTransformInfo.cpp \
BranchFolding.cpp \
CalcSpillWeights.cpp \
@@ -21,6 +21,7 @@ codegen_SRC_FILES := \
ExecutionDepsFix.cpp \
ExpandISelPseudos.cpp \
ExpandPostRAPseudos.cpp \
+ ForwardControlFlowIntegrity.cpp \
GCMetadata.cpp \
GCMetadataPrinter.cpp \
GCStrategy.cpp \
@@ -29,7 +30,6 @@ codegen_SRC_FILES := \
InlineSpiller.cpp \
InterferenceCache.cpp \
IntrinsicLowering.cpp \
- JITCodeEmitter.cpp \
JumpInstrTables.cpp \
LatencyPriorityQueue.cpp \
LexicalScopes.cpp \
@@ -49,7 +49,7 @@ codegen_SRC_FILES := \
MachineBlockFrequencyInfo.cpp \
MachineBlockPlacement.cpp \
MachineBranchProbabilityInfo.cpp \
- MachineCodeEmitter.cpp \
+ MachineCombiner.cpp \
MachineCopyPropagation.cpp \
MachineCSE.cpp \
MachineDominators.cpp \
@@ -97,7 +97,6 @@ codegen_SRC_FILES := \
ShadowStackGC.cpp \
SjLjEHPrepare.cpp \
SlotIndexes.cpp \
- Spiller.cpp \
SpillPlacement.cpp \
SplitKit.cpp \
StackColoring.cpp \
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index df47f98..a61a8ef 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
-#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -25,9 +25,8 @@
namespace llvm {
-/// AntiDepBreaker - This class works into conjunction with the
-/// post-RA scheduler to rename registers to break register
-/// anti-dependencies.
+/// This class works in conjunction with the post-RA scheduler to rename
+/// registers to break register anti-dependencies (WAR hazards).
class AntiDepBreaker {
public:
typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
@@ -35,29 +34,26 @@ public:
virtual ~AntiDepBreaker();
- /// Start - Initialize anti-dep breaking for a new basic block.
+ /// Initialize anti-dep breaking for a new basic block.
virtual void StartBlock(MachineBasicBlock *BB) =0;
- /// BreakAntiDependencies - Identifiy anti-dependencies within a
- /// basic-block region and break them by renaming registers. Return
- /// the number of anti-dependencies broken.
- ///
+ /// Identifiy anti-dependencies within a basic-block region and break them by
+ /// renaming registers. Return the number of anti-dependencies broken.
virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
DbgValueVector &DbgValues) = 0;
- /// Observe - Update liveness information to account for the current
+ /// Update liveness information to account for the current
/// instruction, which will not be scheduled.
- ///
virtual void Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) =0;
- /// Finish - Finish anti-dep breaking for a basic block.
+ /// Finish anti-dep breaking for a basic block.
virtual void FinishBlock() =0;
- /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating
+ /// Update DBG_VALUE if dependency breaker is updating
/// other machine instruction to use NewReg.
void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 251f5ef..66c6c63 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -108,7 +108,7 @@ void ARMException::endFunction(const MachineFunction *) {
}
void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
@@ -121,9 +121,9 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ for (std::vector<const GlobalValue *>::const_reverse_iterator
I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalVariable *GV = *I;
+ const GlobalValue *GV = *I;
if (VerboseAsm)
Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index 42757d7..802e050 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_ADDRESSPOOL_H__
-#define CODEGEN_ASMPRINTER_ADDRESSPOOL_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index 083cc0d..cb8e96a 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -11,6 +11,7 @@ codegen_asmprinter_SRC_FILES := \
DIEHash.cpp \
DwarfAccelTable.cpp \
DwarfCFIException.cpp \
+ DwarfCompileUnit.cpp \
DwarfDebug.cpp \
DwarfFile.cpp \
DwarfStringPool.cpp \
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f80fdea..8a32713 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -14,11 +14,13 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "Win64Exception.h"
#include "WinCodeViewLineTables.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -49,7 +51,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -98,11 +99,10 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
}
AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
- : MachineFunctionPass(ID),
- TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()),
- OutContext(Streamer.getContext()),
- OutStreamer(Streamer),
- LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) {
+ : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
+ MII(tm.getSubtargetImpl()->getInstrInfo()),
+ OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr),
+ LastFn(0), Counter(~0U), SetCounter(0) {
DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr;
CurrentFnSym = CurrentFnSymForSize = nullptr;
GCMetadataPrinters = nullptr;
@@ -129,12 +129,12 @@ unsigned AsmPrinter::getFunctionNumber() const {
}
const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
- return TM.getTargetLowering()->getObjFileLowering();
+ return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
}
/// getDataLayout - Return information about data layout.
const DataLayout &AsmPrinter::getDataLayout() const {
- return *TM.getDataLayout();
+ return *TM.getSubtargetImpl()->getDataLayout();
}
const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
@@ -173,9 +173,9 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
- OutStreamer.InitSections();
+ OutStreamer.InitSections(false);
- Mang = new Mangler(TM.getDataLayout());
+ Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
// Emit the version-min deplyment target directive if needed.
//
@@ -222,14 +222,12 @@ bool AsmPrinter::doInitialization(Module &M) {
}
if (MAI->doesSupportDebugInformation()) {
- if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) {
+ if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment())
Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
DbgTimerName,
CodeViewLineTablesGroupName));
- } else {
- DD = new DwarfDebug(this, &M);
- Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
- }
+ DD = new DwarfDebug(this, &M);
+ Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
}
EHStreamer *ES = nullptr;
@@ -243,8 +241,13 @@ bool AsmPrinter::doInitialization(Module &M) {
case ExceptionHandling::ARM:
ES = new ARMException(this);
break;
- case ExceptionHandling::WinEH:
- ES = new Win64Exception(this);
+ case ExceptionHandling::ItaniumWinEH:
+ switch (MAI->getWinEHEncodingType()) {
+ default: llvm_unreachable("unsupported unwinding information encoding");
+ case WinEH::EncodingType::Itanium:
+ ES = new Win64Exception(this);
+ break;
+ }
break;
}
if (ES)
@@ -253,33 +256,10 @@ bool AsmPrinter::doInitialization(Module &M) {
}
static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {
- GlobalValue::LinkageTypes Linkage = GV->getLinkage();
- if (Linkage != GlobalValue::LinkOnceODRLinkage)
- return false;
-
if (!MAI.hasWeakDefCanBeHiddenDirective())
return false;
- if (GV->hasUnnamedAddr())
- return true;
-
- // This is only used for MachO, so right now it doesn't really matter how
- // we handle alias. Revisit this once the MachO linker implements aliases.
- if (isa<GlobalAlias>(GV))
- return false;
-
- // If it is a non constant variable, it needs to be uniqued across shared
- // objects.
- if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
- if (!Var->isConstant())
- return false;
- }
-
- GlobalStatus GS;
- if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared)
- return true;
-
- return false;
+ return canBeOmittedFromSymbolTable(GV);
}
void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
@@ -361,7 +341,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
@@ -578,20 +558,24 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
- if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI,
+ FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload\n";
}
- } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot(
+ &MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Reload\n";
- } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE(
+ &MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Spill\n";
}
- } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+ } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot(
+ &MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Spill\n";
}
@@ -605,8 +589,9 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- OutStreamer.AddComment(Twine("implicit-def: ") +
- TM.getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddComment(
+ Twine("implicit-def: ") +
+ TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
OutStreamer.AddBlankLine();
}
@@ -616,7 +601,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
const MachineOperand &Op = MI->getOperand(i);
assert(Op.isReg() && "KILL instruction must have only register operands");
Str += ' ';
- Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+ Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg());
Str += (Op.isDef() ? "<def>" : "<kill>");
}
AP.OutStreamer.AddComment(Str);
@@ -627,21 +612,27 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
/// of DBG_VALUE, returning true if it was able to do so. A false return
/// means the target will need to handle MI in EmitInstruction.
static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
- // This code handles only the 3-operand target-independent form.
- if (MI->getNumOperands() != 3)
+ // This code handles only the 4-operand target-independent form.
+ if (MI->getNumOperands() != 4)
return false;
SmallString<128> Str;
raw_svector_ostream OS(Str);
OS << "DEBUG_VALUE: ";
- DIVariable V(MI->getOperand(2).getMetadata());
+ DIVariable V = MI->getDebugVariable();
if (V.getContext().isSubprogram()) {
StringRef Name = DISubprogram(V.getContext()).getDisplayName();
if (!Name.empty())
OS << Name << ":";
}
- OS << V.getName() << " <- ";
+ OS << V.getName();
+
+ DIExpression Expr = MI->getDebugExpression();
+ if (Expr.isVariablePiece())
+ OS << " [piece offset=" << Expr.getPieceOffset()
+ << " size=" << Expr.getPieceSize() << "]";
+ OS << " <- ";
// The second operand is only an offset if it's an immediate.
bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
@@ -672,7 +663,8 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
Reg = MI->getOperand(0).getReg();
} else {
assert(MI->getOperand(0).isFI() && "Unknown operand type");
- const TargetFrameLowering *TFI = AP.TM.getFrameLowering();
+ const TargetFrameLowering *TFI =
+ AP.TM.getSubtargetImpl()->getFrameLowering();
Offset += TFI->getFrameIndexReference(*AP.MF,
MI->getOperand(0).getIndex(), Reg);
Deref = true;
@@ -686,7 +678,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (Deref)
OS << '[';
- OS << AP.TM.getRegisterInfo()->getName(Reg);
+ OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg);
}
if (Deref)
@@ -709,8 +701,8 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
}
bool AsmPrinter::needsSEHMoves() {
- return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH &&
- MF->getFunction()->needsUnwindTableEntry();
+ return MAI->getExceptionHandlingType() == ExceptionHandling::ItaniumWinEH &&
+ MF->getFunction()->needsUnwindTableEntry();
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
@@ -722,9 +714,6 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
if (needsCFIMoves() == CFI_M_None)
return;
- if (MMI->getCompactUnwindEncoding() != 0)
- OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding());
-
const MachineModuleInfo &MMI = MF->getMMI();
const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
@@ -742,12 +731,10 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
- const MachineInstr *LastMI = nullptr;
for (auto &MBB : *MF) {
// Print a label for the basic block.
EmitBasicBlockStart(MBB);
for (auto &MI : MBB) {
- LastMI = &MI;
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
@@ -804,26 +791,22 @@ void AsmPrinter::EmitFunctionBody() {
}
}
}
- }
- // If the last instruction was a prolog label, then we have a situation where
- // we emitted a prolog but no function body. This results in the ending prolog
- // label equaling the end of function label and an invalid "row" in the
- // FDE. We need to emit a noop in this situation so that the FDE's rows are
- // valid.
- bool RequiresNoop = LastMI && LastMI->isCFIInstruction();
+ EmitBasicBlockEnd(MBB);
+ }
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
- if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
MCInst Noop;
- TM.getInstrInfo()->getNoopForMachoTarget(Noop);
- if (Noop.getOpcode()) {
- OutStreamer.AddComment("avoids zero-length function");
+ TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop);
+ OutStreamer.AddComment("avoids zero-length function");
+
+ // Targets can opt-out of emitting the noop here by leaving the opcode
+ // unspecified.
+ if (Noop.getOpcode())
OutStreamer.EmitInstruction(Noop, getSubtargetInfo());
- } else // Target not mc-ized yet.
- OutStreamer.EmitRawText(StringRef("\tnop\n"));
}
const Function *F = MF->getFunction();
@@ -895,17 +878,18 @@ bool AsmPrinter::doFinalization(Module &M) {
unsigned Arch = Triple(getTargetTriple()).getArch();
bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
MCInst TrapInst;
- TM.getInstrInfo()->getTrap(TrapInst);
+ TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst);
+ unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment());
+
+ // Emit the right section for these functions.
+ OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
for (const auto &KV : JITI->getTables()) {
uint64_t Count = 0;
for (const auto &FunPair : KV.second) {
// Emit the function labels to make this be a function entry point.
MCSymbol *FunSym =
OutContext.GetOrCreateSymbol(FunPair.second->getName());
- OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global);
- // FIXME: JumpTableInstrInfo should store information about the required
- // alignment of table entries and the size of the padding instruction.
- EmitAlignment(3);
+ EmitAlignment(LogAlignment);
if (IsThumb)
OutStreamer.EmitThumbFunc(FunSym);
if (MAI->hasDotTypeDotSizeDirective())
@@ -920,16 +904,16 @@ bool AsmPrinter::doFinalization(Module &M) {
const MCSymbolRefExpr *TargetSymRef =
MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
OutContext);
- TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef);
+ TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch(
+ JumpToFun, TargetSymRef);
OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
++Count;
}
// Emit enough padding instructions to fill up to the next power of two.
- // This assumes that the trap instruction takes 8 bytes or fewer.
uint64_t Remaining = NextPowerOf2(Count) - Count;
for (uint64_t C = 0; C < Remaining; ++C) {
- EmitAlignment(3);
+ EmitAlignment(LogAlignment);
OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
}
@@ -976,24 +960,21 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
- if (MAI->hasSetDirective()) {
- OutStreamer.AddBlankLine();
- for (const auto &Alias : M.aliases()) {
- MCSymbol *Name = getSymbol(&Alias);
+ OutStreamer.AddBlankLine();
+ for (const auto &Alias : M.aliases()) {
+ MCSymbol *Name = getSymbol(&Alias);
- if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
- OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
- else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
- OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
- else
- assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
+ if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+ else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
+ OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
- EmitVisibility(Name, Alias.getVisibility());
+ EmitVisibility(Name, Alias.getVisibility());
- // Emit the directives as assignments aka .set:
- OutStreamer.EmitAssignment(Name,
- lowerConstant(Alias.getAliasee(), *this));
- }
+ // Emit the directives as assignments aka .set:
+ OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee(), *this));
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -1062,23 +1043,14 @@ void AsmPrinter::EmitConstantPool() {
const MachineConstantPoolEntry &CPE = CP[i];
unsigned Align = CPE.getAlignment();
- SectionKind Kind;
- switch (CPE.getRelocationInfo()) {
- default: llvm_unreachable("Unknown section kind");
- case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
- case 1:
- Kind = SectionKind::getReadOnlyWithRelLocal();
- break;
- case 0:
- switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
- case 4: Kind = SectionKind::getMergeableConst4(); break;
- case 8: Kind = SectionKind::getMergeableConst8(); break;
- case 16: Kind = SectionKind::getMergeableConst16();break;
- default: Kind = SectionKind::getMergeableConst(); break;
- }
- }
+ SectionKind Kind =
+ CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
+
+ const Constant *C = nullptr;
+ if (!CPE.isMachineConstantPoolEntry())
+ C = CPE.Val.ConstVal;
- const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+ const MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
// The number of sections are small, just do a linear search from the
// last section to the first.
@@ -1101,13 +1073,22 @@ void AsmPrinter::EmitConstantPool() {
}
// Now print stuff into the calculated sections.
+ const MCSection *CurSection = nullptr;
+ unsigned Offset = 0;
for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
- OutStreamer.SwitchSection(CPSections[i].S);
- EmitAlignment(Log2_32(CPSections[i].Alignment));
-
- unsigned Offset = 0;
for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
unsigned CPI = CPSections[i].CPEs[j];
+ MCSymbol *Sym = GetCPISymbol(CPI);
+ if (!Sym->isUndefined())
+ continue;
+
+ if (CurSection != CPSections[i].S) {
+ OutStreamer.SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+ CurSection = CPSections[i].S;
+ Offset = 0;
+ }
+
MachineConstantPoolEntry CPE = CP[CPI];
// Emit inter-object padding for alignment.
@@ -1116,9 +1097,10 @@ void AsmPrinter::EmitConstantPool() {
OutStreamer.EmitZeros(NewOffset - Offset);
Type *Ty = CPE.getType();
- Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
- OutStreamer.EmitLabel(GetCPISymbol(CPI));
+ Offset = NewOffset +
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty);
+ OutStreamer.EmitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
else
@@ -1131,7 +1113,7 @@ void AsmPrinter::EmitConstantPool() {
/// by the current function to the current output stream.
///
void AsmPrinter::EmitJumpTableInfo() {
- const DataLayout *DL = MF->getTarget().getDataLayout();
+ const DataLayout *DL = MF->getSubtarget().getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (!MJTI) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1156,12 +1138,14 @@ void AsmPrinter::EmitJumpTableInfo() {
} else {
// Otherwise, drop it in the readonly section.
const MCSection *ReadOnlySection =
- getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+ getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
+ /*C=*/nullptr);
OutStreamer.SwitchSection(ReadOnlySection);
JTInDiffSection = true;
}
- EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
+ EmitAlignment(Log2_32(
+ MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout())));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
@@ -1174,17 +1158,17 @@ void AsmPrinter::EmitJumpTableInfo() {
// If this jump table was deleted, ignore it.
if (JTBBs.empty()) continue;
- // For the EK_LabelDifference32 entry, if the target supports .set, emit a
- // .set directive for each unique entry. This reduces the number of
- // relocations the assembler will generate for the jump table.
+ // For the EK_LabelDifference32 entry, if using .set avoids a relocation,
+ /// emit a .set directive for each unique entry.
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
- MAI->hasSetDirective()) {
+ MAI->doesSetDirectiveSuppressesReloc()) {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
const MachineBasicBlock *MBB = JTBBs[ii];
- if (!EmittedSets.insert(MBB)) continue;
+ if (!EmittedSets.insert(MBB).second)
+ continue;
// .set LJTSet, LBB32-base
const MCExpr *LHS =
@@ -1223,8 +1207,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
case MachineJumpTableInfo::EK_Inline:
llvm_unreachable("Cannot emit EK_Inline jump table entry");
case MachineJumpTableInfo::EK_Custom32:
- Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
- OutContext);
+ Value =
+ TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry(
+ MJTI, MBB, UID, OutContext);
break;
case MachineJumpTableInfo::EK_BlockAddress:
// EK_BlockAddress - Each entry is a plain address of block, e.g.:
@@ -1250,34 +1235,30 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
}
case MachineJumpTableInfo::EK_LabelDifference32: {
- // EK_LabelDifference32 - Each entry is the address of the block minus
- // the address of the jump table. This is used for PIC jump tables where
- // gprel32 is not supported. e.g.:
+ // Each entry is the address of the block minus the address of the jump
+ // table. This is used for PIC jump tables where gprel32 is not supported.
+ // e.g.:
// .word LBB123 - LJTI1_2
- // If the .set directive is supported, this is emitted as:
+ // If the .set directive avoids relocations, this is emitted as:
// .set L4_5_set_123, LBB123 - LJTI1_2
// .word L4_5_set_123
-
- // If we have emitted set directives for the jump table entries, print
- // them rather than the entries themselves. If we're emitting PIC, then
- // emit the table entries as differences between two text section labels.
- if (MAI->hasSetDirective()) {
- // If we used .set, reference the .set's symbol.
+ if (MAI->doesSetDirectiveSuppressesReloc()) {
Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
OutContext);
break;
}
- // Otherwise, use the difference as the jump table entry.
Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
- const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
- Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
+ Value = MCBinaryExpr::CreateSub(Value, Base, OutContext);
break;
}
}
assert(Value && "Unknown entry kind!");
- unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
+ unsigned EntrySize =
+ MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout());
OutStreamer.EmitValue(Value, EntrySize);
}
@@ -1387,7 +1368,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
// Emit the function pointers in the target-specific order
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
unsigned Align = Log2_32(DL->getPointerPrefAlignment());
std::stable_sort(Structors.begin(), Structors.end(),
[](const Structor &L,
@@ -1450,9 +1431,9 @@ void AsmPrinter::EmitInt32(int Value) const {
OutStreamer.EmitIntValue(Value, 4);
}
-/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
-/// in bytes of the directive is specified by Size and Hi/Lo specify the
-/// labels. This implicitly uses .set if it is available.
+/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
+/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
+/// .set if it avoids relocations.
void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
unsigned Size) const {
// Get the Hi-Lo expression.
@@ -1461,7 +1442,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
MCSymbolRefExpr::Create(Lo, OutContext),
OutContext);
- if (!MAI->hasSetDirective()) {
+ if (!MAI->doesSetDirectiveSuppressesReloc()) {
OutStreamer.EmitValue(Diff, Size);
return;
}
@@ -1472,36 +1453,6 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
OutStreamer.EmitSymbolValue(SetLabel, Size);
}
-/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
-/// where the size in bytes of the directive is specified by Size and Hi/Lo
-/// specify the labels. This implicitly uses .set if it is available.
-void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
- const MCSymbol *Lo,
- unsigned Size) const {
-
- // Emit Hi+Offset - Lo
- // Get the Hi+Offset expression.
- const MCExpr *Plus =
- MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext),
- MCConstantExpr::Create(Offset, OutContext),
- OutContext);
-
- // Get the Hi+Offset-Lo expression.
- const MCExpr *Diff =
- MCBinaryExpr::CreateSub(Plus,
- MCSymbolRefExpr::Create(Lo, OutContext),
- OutContext);
-
- if (!MAI->hasSetDirective())
- OutStreamer.EmitValue(Diff, Size);
- else {
- // Otherwise, emit with .set (aka assignment).
- MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
- OutStreamer.EmitAssignment(SetLabel, Diff);
- OutStreamer.EmitSymbolValue(SetLabel, Size);
- }
-}
-
/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
/// where the size in bytes of the directive is specified by Size and Label
/// specifies the label. This implicitly uses .set if it is available.
@@ -1531,7 +1482,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// if required for correctness.
//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
- if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
+ if (GV)
+ NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(),
+ NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
@@ -1577,8 +1530,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C =
- ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(
+ CE, AP.TM.getSubtargetImpl()->getDataLayout()))
if (C != CE)
return lowerConstant(C, AP);
@@ -1592,7 +1545,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
// Generate a symbolic expression for the byte address
APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
@@ -1616,7 +1569,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
return lowerConstant(CE->getOperand(0), AP);
case Instruction::IntToPtr: {
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
@@ -1626,7 +1579,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
@@ -1699,7 +1652,8 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64) return -1;
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType());
uint64_t Value = CI->getZExtValue();
// Make sure the constant is at least 8 bits long and has a power
@@ -1743,7 +1697,9 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, AP.TM);
if (Value != -1) {
- uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
+ uint64_t Bytes =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
return AP.OutStreamer.EmitFill(Bytes, Value);
@@ -1793,7 +1749,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
}
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
unsigned Size = DL.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
@@ -1808,7 +1764,9 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) {
int Value = isRepeatedByteSequence(CA, AP.TM);
if (Value != -1) {
- uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
+ uint64_t Bytes =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ CA->getType());
AP.OutStreamer.EmitFill(Bytes, Value);
}
else {
@@ -1821,7 +1779,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
emitGlobalConstantImpl(CV->getOperand(i), AP);
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
unsigned Size = DL.getTypeAllocSize(CV->getType());
unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
@@ -1831,7 +1789,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) {
// Print the fields in successive locations. Pad to align if needed!
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
unsigned Size = DL->getTypeAllocSize(CS->getType());
const StructLayout *Layout = DL->getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
@@ -1881,7 +1839,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.TM.getDataLayout()->isBigEndian() &&
+ if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() &&
!CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
@@ -1900,13 +1858,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
}
// Emit the tail padding for the long double.
- const DataLayout &DL = *AP.TM.getDataLayout();
+ const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout();
AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
DL.getTypeStoreSize(CFP->getType()));
}
static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
unsigned BitWidth = CI->getBitWidth();
// Copy the value as we may massage the layout for constants whose bit width
@@ -1952,7 +1910,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// Emit the extra bits after the 64-bits chunks.
// Emit a directive that fills the expected size.
- uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType());
+ uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
(ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
@@ -1962,7 +1921,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
- const DataLayout *DL = AP.TM.getDataLayout();
+ const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout();
uint64_t Size = DL->getTypeAllocSize(CV->getType());
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size);
@@ -2027,7 +1986,8 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) {
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
if (Size)
emitGlobalConstantImpl(CV, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
@@ -2056,7 +2016,7 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
/// temporary label with the specified stem and unique ID.
MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
Name + Twine(ID));
}
@@ -2064,7 +2024,7 @@ MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
/// GetTempSymbol - Return an assembler temporary label with the specified
/// stem.
MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
Name);
}
@@ -2080,7 +2040,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol
(Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ "_" + Twine(CPID));
@@ -2094,7 +2054,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
/// GetJTSetSymbol - Return the symbol for the specified jump table .set
/// FIXME: privatize to AsmPrinter.
MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
return OutContext.GetOrCreateSymbol
(Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
Twine(UID) + "_set_" + Twine(MBBID));
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 02cd12b..05f6a68 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -130,7 +131,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
default:
llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr:
- return TM.getDataLayout()->getPointerSize();
+ return TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
case dwarf::DW_EH_PE_udata2:
return 2;
case dwarf::DW_EH_PE_udata4:
@@ -214,13 +215,10 @@ static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset,
Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
}
-/// Emit a dwarf register operation for describing
-/// - a small value occupying only part of a register or
-/// - a small register representing only part of a value.
-static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits,
- unsigned OffsetInBits) {
- assert(SizeInBits > 0 && "zero-sized piece");
- unsigned SizeOfByte = 8;
+void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits,
+ unsigned OffsetInBits) const {
+ assert(SizeInBits > 0 && "piece has size zero");
+ const unsigned SizeOfByte = 8;
if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece");
Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits));
@@ -249,13 +247,13 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
unsigned PieceSizeInBits,
unsigned PieceOffsetInBits) const {
assert(MLoc.isReg() && "MLoc must be a register");
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
// If this is a valid register number, emit it.
if (Reg >= 0) {
emitDwarfRegOp(Streamer, Reg);
- emitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits);
+ EmitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits);
return;
}
@@ -266,19 +264,19 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
if (Reg >= 0) {
unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg());
unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned Offset = TRI->getSubRegIdxOffset(Idx);
+ unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
OutStreamer.AddComment("super-register");
emitDwarfRegOp(Streamer, Reg);
- if (PieceOffsetInBits == Offset) {
- emitDwarfOpPiece(Streamer, Size, Offset);
+ if (PieceOffsetInBits == RegOffset) {
+ EmitDwarfOpPiece(Streamer, Size, RegOffset);
} else {
// If this is part of a variable in a sub-register at a
// non-zero offset, we need to manually shift the value into
// place, since the DW_OP_piece describes the part of the
// variable, not the position of the subregister.
- emitDwarfOpPiece(Streamer, Size, PieceOffsetInBits);
- if (Offset)
- emitDwarfOpShr(Streamer, Offset);
+ if (RegOffset)
+ emitDwarfOpShr(Streamer, RegOffset);
+ EmitDwarfOpPiece(Streamer, Size, PieceOffsetInBits);
}
return;
}
@@ -312,7 +310,7 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
if (Reg >= 0 && Intersection.any()) {
OutStreamer.AddComment("sub-register");
emitDwarfRegOp(Streamer, Reg);
- emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset);
+ EmitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset);
CurPos = Offset + Size;
// Mark it as emitted.
@@ -331,7 +329,7 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
const MachineLocation &MLoc,
bool Indirect) const {
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
if (Reg < 0) {
// We assume that pointers are always in an addressable register.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index 2825367..31867dd 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__
-#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 46ee0c8..cca5f22 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -110,14 +111,14 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
HasDiagHandler = true;
}
- MemoryBuffer *Buffer;
+ std::unique_ptr<MemoryBuffer> Buffer;
if (isNullTerminated)
Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
else
Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
- SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+ SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI));
@@ -146,6 +147,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
+ if (MF) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
+ }
// Don't implicitly switch to the text section before the asm.
int Res = Parser->Run(/*NoInitialTextSection*/ true,
@@ -500,7 +505,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
const char *Code) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
if (!strcmp(Code, "private")) {
OS << DL->getPrivateGlobalPrefix();
} else if (!strcmp(Code, "comment")) {
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 6c01d65..0cc8353 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_BYTESTREAMER_H
-#define LLVM_CODEGEN_BYTESTREAMER_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index f555f21..e6b7d64 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter
DIEHash.cpp
DwarfAccelTable.cpp
DwarfCFIException.cpp
+ DwarfCompileUnit.cpp
DwarfDebug.cpp
DwarfFile.cpp
DwarfStringPool.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index c3dcd9c..50ea369 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -12,12 +12,15 @@
//===----------------------------------------------------------------------===//
#include "DIE.h"
+
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
@@ -370,6 +373,29 @@ void DIEString::print(raw_ostream &O) const {
// DIEEntry Implementation
//===----------------------------------------------------------------------===//
+/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the
+/// directive is specified by Size and Hi/Lo specify the labels.
+static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi,
+ uint64_t Offset, const MCSymbol *Lo,
+ unsigned Size) {
+ MCContext &Context = Streamer.getContext();
+
+ // Emit Hi+Offset - Lo
+ // Get the Hi+Offset expression.
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context),
+ MCConstantExpr::Create(Offset, Context), Context);
+
+ // Get the Hi+Offset-Lo expression.
+ const MCExpr *Diff = MCBinaryExpr::CreateSub(
+ Plus, MCSymbolRefExpr::Create(Lo, Context), Context);
+
+ // Otherwise, emit with .set (aka assignment).
+ MCSymbol *SetLabel = Context.CreateTempSymbol();
+ Streamer.EmitAssignment(SetLabel, Diff);
+ Streamer.EmitSymbolValue(SetLabel, Size);
+}
+
/// EmitValue - Emit debug information entry offset.
///
void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
@@ -388,9 +414,9 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
DIEEntry::getRefAddrSize(AP));
else
- AP->EmitLabelOffsetDifference(CU->getSectionSym(), Addr,
- CU->getSectionSym(),
- DIEEntry::getRefAddrSize(AP));
+ emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr,
+ CU->getSectionSym(),
+ DIEEntry::getRefAddrSize(AP));
} else
AP->EmitInt32(Entry.getOffset());
}
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index ef05f17..e310aef 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DIE_H__
-#define CODEGEN_ASMPRINTER_DIE_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -381,10 +381,10 @@ public:
///
class DIEString : public DIEValue {
const DIEValue *Access;
- const StringRef Str;
+ StringRef Str;
public:
- DIEString(const DIEValue *Acc, const StringRef S)
+ DIEString(const DIEValue *Acc, StringRef S)
: DIEValue(isString), Access(Acc), Str(S) {}
/// getString - Grab the string out of the object.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index c2fad59..b2a3ba8 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -261,7 +261,7 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
return;
}
- // otherwise, b) use the letter 'T' as a the marker, ...
+ // otherwise, b) use the letter 'T' as the marker, ...
addULEB128('T');
addULEB128(Attribute);
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index 175d660..872aa0e 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DIEHASH_H__
-#define CODEGEN_ASMPRINTER_DIEHASH_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index a66d08e..0c2a5e5 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -8,25 +8,25 @@
//===----------------------------------------------------------------------===//
#include "DbgValueHistoryCalculator.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <map>
-#include <set>
+using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
-namespace llvm {
-
// \brief If @MI is a DBG_VALUE with debug value described by a
// defined register, returns the number of this register.
// In the other case, returns 0.
static unsigned isDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue());
- assert(MI.getNumOperands() == 3);
+ assert(MI.getNumOperands() == 4);
// If location of variable is described using a register (directly or
// indirecltly), this register is always a first operand.
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
@@ -36,7 +36,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
const MachineInstr &MI) {
// Instruction range should start with a DBG_VALUE instruction for the
// variable.
- assert(MI.isDebugValue() && MI.getDebugVariable() == Var);
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
auto &Ranges = VarInstrRanges[Var];
if (!Ranges.empty() && Ranges.back().second == nullptr &&
Ranges.back().first->isIdenticalTo(&MI)) {
@@ -96,6 +96,19 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars,
VarSet.push_back(Var);
}
+// \brief Terminate the location range for variables described by register at
+// @I by inserting @ClobberingInstr to their history.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
+ RegDescribedVarsMap::iterator I,
+ DbgValueHistoryMap &HistMap,
+ const MachineInstr &ClobberingInstr) {
+ // Iterate over all variables described by this register and add this
+ // instruction to their history, clobbering it.
+ for (const auto &Var : I->second)
+ HistMap.endInstrRange(Var, ClobberingInstr);
+ RegVars.erase(I);
+}
+
// \brief Terminate the location range for variables described by register
// @RegNo by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
@@ -104,22 +117,26 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
const auto &I = RegVars.find(RegNo);
if (I == RegVars.end())
return;
- // Iterate over all variables described by this register and add this
- // instruction to their history, clobbering it.
- for (const auto &Var : I->second)
- HistMap.endInstrRange(Var, ClobberingInstr);
- RegVars.erase(I);
+ clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
}
-// \brief Collect all registers clobbered by @MI and insert them to @Regs.
-static void collectClobberedRegisters(const MachineInstr &MI,
+// \brief Collect all registers clobbered by @MI and apply the functor
+// @Func to their RegNo.
+// @Func should be a functor with a void(unsigned) signature. We're
+// not using std::function here for performance reasons. It has a
+// small but measurable impact. By using a functor instead of a
+// std::set& here, we can avoid the overhead of constructing
+// temporaries in calculateDbgValueHistory, which has a significant
+// performance impact.
+template<typename Callable>
+static void applyToClobberedRegisters(const MachineInstr &MI,
const TargetRegisterInfo *TRI,
- std::set<unsigned> &Regs) {
+ Callable Func) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef() || !MO.getReg())
continue;
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Regs.insert(*AI);
+ Func(*AI);
}
}
@@ -133,11 +150,12 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
// as the return instruction.
DebugLoc LastLoc = LastMI->getDebugLoc();
auto Res = LastMI;
- for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend();
- ++I) {
+ for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)),
+ E = MBB.rend();
+ I != E; ++I) {
if (I->getDebugLoc() != LastLoc)
return Res;
- Res = std::prev(I.base());
+ Res = &*I;
}
// If all instructions have the same debug location, assume whole MBB is
// an epilogue.
@@ -145,25 +163,26 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
}
// \brief Collect registers that are modified in the function body (their
-// contents is changed only in the prologue and epilogue).
+// contents is changed outside of the prologue and epilogue).
static void collectChangingRegs(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
- std::set<unsigned> &Regs) {
+ BitVector &Regs) {
for (const auto &MBB : *MF) {
auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
- bool IsInEpilogue = false;
+
for (const auto &MI : MBB) {
- IsInEpilogue |= &MI == FirstEpilogueInst;
- if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue)
- collectClobberedRegisters(MI, TRI, Regs);
+ if (&MI == FirstEpilogueInst)
+ break;
+ if (!MI.getFlag(MachineInstr::FrameSetup))
+ applyToClobberedRegisters(MI, TRI, [&](unsigned r) { Regs.set(r); });
}
}
}
-void calculateDbgValueHistory(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &Result) {
- std::set<unsigned> ChangingRegs;
+void llvm::calculateDbgValueHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &Result) {
+ BitVector ChangingRegs(TRI->getNumRegs());
collectChangingRegs(MF, TRI, ChangingRegs);
RegDescribedVarsMap RegVars;
@@ -172,17 +191,18 @@ void calculateDbgValueHistory(const MachineFunction *MF,
if (!MI.isDebugValue()) {
// Not a DBG_VALUE instruction. It may clobber registers which describe
// some variables.
- std::set<unsigned> MIClobberedRegs;
- collectClobberedRegisters(MI, TRI, MIClobberedRegs);
- for (unsigned RegNo : MIClobberedRegs) {
- if (ChangingRegs.count(RegNo))
+ applyToClobberedRegisters(MI, TRI, [&](unsigned RegNo) {
+ if (ChangingRegs.test(RegNo))
clobberRegisterUses(RegVars, RegNo, Result, MI);
- }
+ });
continue;
}
assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
- const MDNode *Var = MI.getDebugVariable();
+ // Use the base variable (without any DW_OP_piece expressions)
+ // as index into History. The full variables including the
+ // piece expressions are attached to the MI.
+ DIVariable Var = MI.getDebugVariable();
if (unsigned PrevReg = Result.getRegisterForVar(Var))
dropRegDescribedVar(RegVars, PrevReg, Var);
@@ -196,11 +216,12 @@ void calculateDbgValueHistory(const MachineFunction *MF,
// Make sure locations for register-described variables are valid only
// until the end of the basic block (unless it's the last basic block, in
// which case let their liveness run off to the end of the function).
- if (!MBB.empty() && &MBB != &MF->back()) {
- for (unsigned RegNo : ChangingRegs)
- clobberRegisterUses(RegVars, RegNo, Result, MBB.back());
+ if (!MBB.empty() && &MBB != &MF->back()) {
+ for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
+ auto CurElem = I++; // CurElem can be erased below.
+ if (ChangingRegs.test(CurElem->first))
+ clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
+ }
}
}
}
-
-}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index b9177f0..4b62007 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_
-#define CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -28,9 +28,11 @@ class DbgValueHistoryMap {
// range. If end is not specified, location is valid until the start
// instruction of the next instruction range, or until the end of the
// function.
+public:
typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
typedef SmallVector<InstrRange, 4> InstrRanges;
typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap;
+private:
InstrRangesMap VarInstrRanges;
public:
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 3beb799..6cca985 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -7,14 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__
-#define CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCSymbol.h"
namespace llvm {
-class DwarfCompileUnit;
class MDNode;
/// \brief This struct describes location entries emitted in the .debug_loc
/// section.
@@ -26,25 +26,30 @@ class DebugLocEntry {
public:
/// A single location or constant.
struct Value {
- Value(const MDNode *Var, int64_t i)
- : Variable(Var), EntryKind(E_Integer) {
+ Value(const MDNode *Var, const MDNode *Expr, int64_t i)
+ : Variable(Var), Expression(Expr), EntryKind(E_Integer) {
Constant.Int = i;
}
- Value(const MDNode *Var, const ConstantFP *CFP)
- : Variable(Var), EntryKind(E_ConstantFP) {
+ Value(const MDNode *Var, const MDNode *Expr, const ConstantFP *CFP)
+ : Variable(Var), Expression(Expr), EntryKind(E_ConstantFP) {
Constant.CFP = CFP;
}
- Value(const MDNode *Var, const ConstantInt *CIP)
- : Variable(Var), EntryKind(E_ConstantInt) {
+ Value(const MDNode *Var, const MDNode *Expr, const ConstantInt *CIP)
+ : Variable(Var), Expression(Expr), EntryKind(E_ConstantInt) {
Constant.CIP = CIP;
}
- Value(const MDNode *Var, MachineLocation Loc)
- : Variable(Var), EntryKind(E_Location), Loc(Loc) {
+ Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
+ : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+ assert(DIVariable(Var).Verify());
+ assert(DIExpression(Expr).Verify());
}
// The variable to which this location entry corresponds.
const MDNode *Variable;
+ // Any complex address location expression for this Value.
+ const MDNode *Expression;
+
// Type of entry that this represents.
enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
enum EntryType EntryKind;
@@ -59,23 +64,6 @@ public:
// Or a location in the machine frame.
MachineLocation Loc;
- bool operator==(const Value &other) const {
- if (EntryKind != other.EntryKind)
- return false;
-
- switch (EntryKind) {
- case E_Location:
- return Loc == other.Loc;
- case E_Integer:
- return Constant.Int == other.Constant.Int;
- case E_ConstantFP:
- return Constant.CFP == other.Constant.CFP;
- case E_ConstantInt:
- return Constant.CIP == other.Constant.CIP;
- }
- llvm_unreachable("unhandled EntryKind");
- }
-
bool isLocation() const { return EntryKind == E_Location; }
bool isInt() const { return EntryKind == E_Integer; }
bool isConstantFP() const { return EntryKind == E_ConstantFP; }
@@ -84,40 +72,114 @@ public:
const ConstantFP *getConstantFP() const { return Constant.CFP; }
const ConstantInt *getConstantInt() const { return Constant.CIP; }
MachineLocation getLoc() const { return Loc; }
- const MDNode *getVariable() const { return Variable; }
+ const MDNode *getVariableNode() const { return Variable; }
+ DIVariable getVariable() const { return DIVariable(Variable); }
+ bool isVariablePiece() const { return getExpression().isVariablePiece(); }
+ DIExpression getExpression() const { return DIExpression(Expression); }
+ friend bool operator==(const Value &, const Value &);
+ friend bool operator<(const Value &, const Value &);
};
+
private:
- /// A list of locations/constants belonging to this entry.
+ /// A nonempty list of locations/constants belonging to this entry,
+ /// sorted by offset.
SmallVector<Value, 1> Values;
- /// The compile unit that this location entry is referenced by.
- const DwarfCompileUnit *Unit;
-
public:
- DebugLocEntry() : Begin(nullptr), End(nullptr), Unit(nullptr) {}
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E,
- Value Val, const DwarfCompileUnit *U)
- : Begin(B), End(E), Unit(U) {
+ DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
+ : Begin(B), End(E) {
Values.push_back(std::move(Val));
}
+ /// \brief If this and Next are describing different pieces of the same
+ // variable, merge them by appending Next's values to the current
+ // list of values.
+ // Return true if the merge was successful.
+ bool MergeValues(const DebugLocEntry &Next) {
+ if (Begin == Next.Begin) {
+ DIExpression Expr(Values[0].Expression);
+ DIVariable Var(Values[0].Variable);
+ DIExpression NextExpr(Next.Values[0].Expression);
+ DIVariable NextVar(Next.Values[0].Variable);
+ if (Var == NextVar && Expr.isVariablePiece() &&
+ NextExpr.isVariablePiece()) {
+ addValues(Next.Values);
+ End = Next.End;
+ return true;
+ }
+ }
+ return false;
+ }
+
/// \brief Attempt to merge this DebugLocEntry with Next and return
/// true if the merge was successful. Entries can be merged if they
/// share the same Loc/Constant and if Next immediately follows this
/// Entry.
- bool Merge(const DebugLocEntry &Next) {
+ bool MergeRanges(const DebugLocEntry &Next) {
+ // If this and Next are describing the same variable, merge them.
if ((End == Next.Begin && Values == Next.Values)) {
End = Next.End;
return true;
}
return false;
}
+
const MCSymbol *getBeginSym() const { return Begin; }
const MCSymbol *getEndSym() const { return End; }
- const DwarfCompileUnit *getCU() const { return Unit; }
- const ArrayRef<Value> getValues() const { return Values; }
- void addValue(Value Val) { Values.push_back(Val); }
+ ArrayRef<Value> getValues() const { return Values; }
+ void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
+ Values.append(Vals.begin(), Vals.end());
+ sortUniqueValues();
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
+ return V.isVariablePiece();
+ }) && "value must be a piece");
+ }
+
+ // Sort the pieces by offset.
+ // Remove any duplicate entries by dropping all but the first.
+ void sortUniqueValues() {
+ std::sort(Values.begin(), Values.end());
+ Values.erase(std::unique(Values.begin(), Values.end(),
+ [](const Value &A, const Value &B) {
+ return A.getVariable() == B.getVariable() &&
+ A.getExpression() == B.getExpression();
+ }),
+ Values.end());
+ }
};
+/// Compare two Values for equality.
+inline bool operator==(const DebugLocEntry::Value &A,
+ const DebugLocEntry::Value &B) {
+ if (A.EntryKind != B.EntryKind)
+ return false;
+
+ if (A.Expression != B.Expression)
+ return false;
+
+ if (A.Variable != B.Variable)
+ return false;
+
+ switch (A.EntryKind) {
+ case DebugLocEntry::Value::E_Location:
+ return A.Loc == B.Loc;
+ case DebugLocEntry::Value::E_Integer:
+ return A.Constant.Int == B.Constant.Int;
+ case DebugLocEntry::Value::E_ConstantFP:
+ return A.Constant.CFP == B.Constant.CFP;
+ case DebugLocEntry::Value::E_ConstantInt:
+ return A.Constant.CIP == B.Constant.CIP;
+ }
+ llvm_unreachable("unhandled EntryKind");
+}
+
+/// Compare two pieces based on their offset.
+inline bool operator<(const DebugLocEntry::Value &A,
+ const DebugLocEntry::Value &B) {
+ return A.getExpression().getPieceOffset() <
+ B.getExpression().getPieceOffset();
}
+
+}
+
#endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h
index 7a51c7b..2a4f58f 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocList.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocList.h
@@ -7,16 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__
-#define CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H
-#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/SmallVector.h"
#include "DebugLocEntry.h"
namespace llvm {
+class DwarfCompileUnit;
+class MCSymbol;
struct DebugLocList {
MCSymbol *Label;
+ DwarfCompileUnit *CU;
SmallVector<DebugLocEntry, 4> List;
};
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index e9527c4..7e87566 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -13,6 +13,7 @@
#include "DwarfAccelTable.h"
#include "DIE.h"
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
@@ -174,7 +175,8 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
// Walk through the buckets and emit the full data for each element in
// the bucket. For the string case emit the dies and the various offsets.
// Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
+ MCSymbol *StrSym) {
uint64_t PrevHash = UINT64_MAX;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
@@ -183,13 +185,14 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
// Remember to emit the label for our offset.
Asm->OutStreamer.EmitLabel((*HI)->Sym);
Asm->OutStreamer.AddComment((*HI)->Str);
- Asm->EmitSectionOffset((*HI)->Data.StrSym,
- D->getStringPool().getSectionSymbol());
+ Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym);
Asm->OutStreamer.AddComment("Num DIEs");
Asm->EmitInt32((*HI)->Data.Values.size());
for (HashDataContents *HD : (*HI)->Data.Values) {
// Emit the DIE offset
- Asm->EmitInt32(HD->Die->getOffset());
+ DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit());
+ assert(CU && "Accelerated DIE should belong to a CU.");
+ Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset());
// If we have multiple Atoms emit that info too.
// FIXME: A bit of a hack, we either emit only one atom or all info.
if (HeaderData.Atoms.size() > 1) {
@@ -206,7 +209,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
}
// Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) {
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
+ MCSymbol *StrSym) {
// Emit the header.
EmitHeader(Asm);
@@ -220,7 +224,7 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) {
EmitOffsets(Asm, SecBegin);
// Emit the hash data.
- EmitData(Asm, D);
+ EmitData(Asm, D, StrSym);
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index a3cc95f..3cdf678 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
-#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
#include "DIE.h"
#include "llvm/ADT/ArrayRef.h"
@@ -62,7 +62,7 @@
namespace llvm {
class AsmPrinter;
-class DwarfFile;
+class DwarfDebug;
class DwarfAccelTable {
@@ -140,7 +140,7 @@ public:
private:
struct TableHeaderData {
uint32_t die_offset_base;
- SmallVector<Atom, 1> Atoms;
+ SmallVector<Atom, 3> Atoms;
TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
: die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
@@ -223,7 +223,7 @@ private:
void EmitBuckets(AsmPrinter *);
void EmitHashes(AsmPrinter *);
void EmitOffsets(AsmPrinter *, MCSymbol *);
- void EmitData(AsmPrinter *, DwarfFile *D);
+ void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym);
// Allocator for HashData and HashDataContents.
BumpPtrAllocator Allocator;
@@ -248,7 +248,7 @@ public:
void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
char Flags = 0);
void FinalizeTable(AsmPrinter *, StringRef);
- void Emit(AsmPrinter *, MCSymbol *, DwarfFile *);
+ void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym);
#ifndef NDEBUG
void print(raw_ostream &O);
void dump() { print(dbgs()); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 74215aa..0dc52da 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -51,7 +51,7 @@ void DwarfCFIException::endModule() {
if (moveTypeModule == AsmPrinter::CFI_M_Debug)
Asm->OutStreamer.EmitCFISections(false, true);
- if (!Asm->MAI->isExceptionHandlingDwarf())
+ if (!Asm->MAI->usesItaniumLSDAForExceptions())
return;
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 0000000..2f1b0e5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,862 @@
+#include "DwarfCompileUnit.h"
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
+ AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU)
+ : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU),
+ Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) {
+ insertDIE(Node, &getUnitDie());
+}
+
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+
+ // Don't use the address pool in non-fission or in the skeleton unit itself.
+ // FIXME: Once GDB supports this, it's probably worthwhile using the address
+ // pool from the skeleton - maybe even in non-fission (possibly fewer
+ // relocations by sharing them in the pool, but we have other ideas about how
+ // to reduce the number of relocations as well/instead).
+ if (!DD->useSplitDwarf() || !Skeleton)
+ return addLocalLabelAddress(Die, Attribute, Label);
+
+ if (Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
+ unsigned idx = DD->getAddressPool().getIndex(Label);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+ Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+}
+
+void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
+ dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+ if (Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
+ Die.addValue(Attribute, dwarf::DW_FORM_addr,
+ Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label)
+ : new (DIEValueAllocator) DIEInteger(0));
+}
+
+unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If we print assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+
+ // FIXME: add a better feature test than hasRawTextSupport. Even better,
+ // extend .file to support this.
+ return Asm->OutStreamer.EmitDwarfFileDirective(
+ 0, DirName, FileName,
+ Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID());
+}
+
+// Return const expression if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return nullptr;
+
+ // First operand points to a global struct.
+ Value *Ptr = CE->getOperand(0);
+ if (!isa<GlobalValue>(Ptr) ||
+ !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
+ return nullptr;
+
+ // Second operand is zero.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return nullptr;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return nullptr;
+
+ return CE;
+}
+
+/// getOrCreateGlobalVariableDIE - get or create global variable DIE.
+DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
+ // Check for pre-existence.
+ if (DIE *Die = getDIE(GV))
+ return Die;
+
+ assert(GV.isGlobalVariable());
+
+ DIScope GVContext = DD->resolve(GV.getContext());
+ DIType GTy = DD->resolve(GV.getType());
+
+ // Construct the context before querying for the existence of the DIE in
+ // case such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(GVContext);
+
+ // Add to map.
+ DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
+ DIScope DeclContext;
+
+ if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) {
+ DeclContext = resolve(SDMDecl.getContext());
+ assert(SDMDecl.isStaticMember() && "Expected static member decl");
+ assert(GV.isDefinition());
+ // We need the declaration DIE that is in the static member's class.
+ DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
+ addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
+ } else {
+ DeclContext = resolve(GV.getContext());
+ // Add name and type.
+ addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+ addType(*VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV.isLocalToUnit())
+ addFlag(*VariableDIE, dwarf::DW_AT_external);
+
+ // Add line number info.
+ addSourceLine(*VariableDIE, GV);
+ }
+
+ if (!GV.isDefinition())
+ addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+
+ // Add location.
+ bool addToAccelTable = false;
+ bool isGlobalVariable = GV.getGlobal() != nullptr;
+ if (isGlobalVariable) {
+ addToAccelTable = true;
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
+ if (GV.getGlobal()->isThreadLocal()) {
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by a custom OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ }
+
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ // Add the linkage name.
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty())
+ // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
+ // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
+ // TAG_variable.
+ addString(*VariableDIE,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+ : dwarf::DW_AT_MIPS_linkage_name,
+ GlobalValue::getRealLinkageName(LinkageName));
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+ addConstantValue(*VariableDIE, CI, GTy);
+ } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) {
+ addToAccelTable = true;
+ // GV is a merged global.
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ Value *Ptr = CE->getOperand(0);
+ MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ }
+
+ if (addToAccelTable) {
+ DD->addAccelName(GV.getName(), *VariableDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+ DD->addAccelName(GV.getLinkageName(), *VariableDIE);
+ }
+
+ addGlobalName(GV.getName(), *VariableDIE, DeclContext);
+ return VariableDIE;
+}
+
+void DwarfCompileUnit::addRange(RangeSpan Range) {
+ bool SameAsPrevCU = this == DD->getPrevCU();
+ DD->setPrevCU(this);
+ // If we have no current ranges just add the range and return, otherwise,
+ // check the current section and CU against the previous section and CU we
+ // emitted into and the subprogram was contained within. If these are the
+ // same then extend our current range, otherwise add this as a new range.
+ if (CURanges.empty() || !SameAsPrevCU ||
+ (&CURanges.back().getEnd()->getSection() !=
+ &Range.getEnd()->getSection())) {
+ CURanges.push_back(Range);
+ return;
+ }
+
+ CURanges.back().setEnd(Range.getEnd());
+}
+
+void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label,
+ const MCSymbol *Sec) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ addLabel(Die, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Label);
+ else
+ addSectionDelta(Die, Attribute, Label, Sec);
+}
+
+void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
+ // Define start line table label for each Compile Unit.
+ MCSymbol *LineTableStartSym =
+ Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
+
+ stmtListIndex = UnitDie.getValues().size();
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section. For split dwarf this is
+ // left in the skeleton CU and so not included.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
+ addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
+ DwarfLineSectionSym);
+}
+
+void DwarfCompileUnit::applyStmtList(DIE &D) {
+ D.addValue(dwarf::DW_AT_stmt_list,
+ UnitDie.getAbbrev().getData()[stmtListIndex].getForm(),
+ UnitDie.getValues()[stmtListIndex]);
+}
+
+void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
+ const MCSymbol *End) {
+ assert(Begin && "Begin label should not be null!");
+ assert(End && "End label should not be null!");
+ assert(Begin->isDefined() && "Invalid starting label");
+ assert(End->isDefined() && "Invalid end label");
+
+ addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
+ if (DD->getDwarfVersion() < 4)
+ addLabelAddress(D, dwarf::DW_AT_high_pc, End);
+ else
+ addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+}
+
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
+DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) {
+ DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
+
+ attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym());
+ if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
+ *DD->getCurrentFunction()))
+ addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
+
+ // Only include DW_AT_frame_base in full debug info
+ if (!includeMinimalInlineScopes()) {
+ const TargetRegisterInfo *RI =
+ Asm->TM.getSubtargetImpl()->getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ }
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ DD->addSubprogramNames(SP, *SPDie);
+
+ return *SPDie;
+}
+
+// Construct a DIE for this scope.
+void DwarfCompileUnit::constructScopeDIE(
+ LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren) {
+ if (!Scope || !Scope->getScopeNode())
+ return;
+
+ DIScope DS(Scope->getScopeNode());
+
+ assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
+ "Only handle inlined subprograms here, use "
+ "constructSubprogramScopeDIE for non-inlined "
+ "subprograms");
+
+ SmallVector<std::unique_ptr<DIE>, 8> Children;
+
+ // We try to create the scope DIE first, then the children DIEs. This will
+ // avoid creating un-used children then removing them later when we find out
+ // the scope DIE is null.
+ std::unique_ptr<DIE> ScopeDIE;
+ if (Scope->getParent() && DS.isSubprogram()) {
+ ScopeDIE = constructInlinedScopeDIE(Scope);
+ if (!ScopeDIE)
+ return;
+ // We create children when the scope DIE is not null.
+ createScopeChildrenDIE(Scope, Children);
+ } else {
+ // Early exit when we know the scope DIE is going to be null.
+ if (DD->isLexicalScopeDIENull(Scope))
+ return;
+
+ unsigned ChildScopeCount;
+
+ // We create children here when we know the scope DIE is not going to be
+ // null and the children will be added to the scope DIE.
+ createScopeChildrenDIE(Scope, Children, &ChildScopeCount);
+
+ // Skip imported directives in gmlt-like data.
+ if (!includeMinimalInlineScopes()) {
+ // There is no need to emit empty lexical block DIE.
+ for (const auto &E : DD->findImportedEntitiesForScope(DS))
+ Children.push_back(
+ constructImportedEntityDIE(DIImportedEntity(E.second)));
+ }
+
+ // If there are only other scopes as children, put them directly in the
+ // parent instead, as this scope would serve no purpose.
+ if (Children.size() == ChildScopeCount) {
+ FinalChildren.insert(FinalChildren.end(),
+ std::make_move_iterator(Children.begin()),
+ std::make_move_iterator(Children.end()));
+ return;
+ }
+ ScopeDIE = constructLexicalScopeDIE(Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
+ }
+
+ // Add children
+ for (auto &I : Children)
+ ScopeDIE->addChild(std::move(I));
+
+ FinalChildren.push_back(std::move(ScopeDIE));
+}
+
+void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+ Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Value);
+}
+
+void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
+ SmallVector<RangeSpan, 2> Range) {
+ // Emit offset in .debug_range as a relocatable label. emitDIE will handle
+ // emitting it appropriately.
+ auto *RangeSectionSym = DD->getRangeSectionSym();
+
+ RangeSpanList List(
+ Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()),
+ std::move(Range));
+
+ // Under fission, ranges are specified by constant offsets relative to the
+ // CU's DW_AT_GNU_ranges_base.
+ if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ else
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+
+ // Add the range list to the set of ranges to be emitted.
+ (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List));
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
+ if (Ranges.size() == 1) {
+ const auto &single = Ranges.front();
+ attachLowHighPC(Die, single.getStart(), single.getEnd());
+ } else
+ addScopeRangeList(Die, std::move(Ranges));
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) {
+ SmallVector<RangeSpan, 2> List;
+ List.reserve(Ranges.size());
+ for (const InsnRange &R : Ranges)
+ List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first),
+ DD->getLabelAfterInsn(R.second)));
+ attachRangesOrLowHighPC(Die, std::move(List));
+}
+
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
+ assert(Scope->getScopeNode());
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
+ assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
+
+ auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine);
+ addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ // Add the call site information to the DIE.
+ DILocation DL(Scope->getInlinedAt());
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
+ getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ DD->addSubprogramNames(InlinedSP, *ScopeDIE);
+
+ return ScopeDIE;
+}
+
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
+ if (DD->isLexicalScopeDIENull(Scope))
+ return nullptr;
+
+ auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ return ScopeDIE;
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
+ bool Abstract) {
+ auto D = constructVariableDIEImpl(DV, Abstract);
+ DV.setDIE(*D);
+ return D;
+}
+
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract) {
+ // Define variable debug information entry.
+ auto VariableDie = make_unique<DIE>(DV.getTag());
+
+ if (Abstract) {
+ applyVariableAttributes(DV, *VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV.getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ if (const MachineInstr *DVInsn = DV.getMInsn()) {
+ assert(DVInsn->getNumOperands() == 4);
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ // If the second operand is an immediate, this is an indirect value.
+ if (DVInsn->getOperand(1).isImm()) {
+ MachineLocation Location(RegOp.getReg(),
+ DVInsn->getOperand(1).getImm());
+ addVariableAddress(DV, *VariableDie, Location);
+ } else if (RegOp.getReg())
+ addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
+ } else if (DVInsn->getOperand(0).isImm())
+ addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
+ else if (DVInsn->getOperand(0).isFPImm())
+ addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
+ DV.getType());
+
+ return VariableDie;
+ }
+
+ // .. else use frame index.
+ int FI = DV.getFrameIndex();
+ if (FI != ~0) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI =
+ Asm->TM.getSubtargetImpl()->getFrameLowering();
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, *VariableDie, Location);
+ }
+
+ return VariableDie;
+}
+
+std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE(
+ DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer) {
+ auto Var = constructVariableDIE(DV, Scope.isAbstractScope());
+ if (DV.isObjectPointer())
+ ObjectPointer = Var.get();
+ return Var;
+}
+
+DIE *DwarfCompileUnit::createScopeChildrenDIE(
+ LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &Children,
+ unsigned *ChildScopeCount) {
+ DIE *ObjectPointer = nullptr;
+
+ for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope))
+ Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
+
+ unsigned ChildCountWithoutScopes = Children.size();
+
+ for (LexicalScope *LS : Scope->getChildren())
+ constructScopeDIE(LS, Children);
+
+ if (ChildScopeCount)
+ *ChildScopeCount = Children.size() - ChildCountWithoutScopes;
+
+ return ObjectPointer;
+}
+
+void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
+ assert(Scope && Scope->getScopeNode());
+ assert(!Scope->getInlinedAt());
+ assert(!Scope->isAbstractScope());
+ DISubprogram Sub(Scope->getScopeNode());
+
+ assert(Sub.isSubprogram());
+
+ DD->getProcessedSPNodes().insert(Sub);
+
+ DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
+
+ // If this is a variadic function, add an unspecified parameter.
+ DITypeArray FnArgs = Sub.getType().getTypeArray();
+
+ // Collect lexical scope children first.
+ // ObjectPointer might be a local (non-argument) local variable if it's a
+ // block's synthetic this pointer.
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
+ addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+
+ // If we have a single element of null, it is a function that returns void.
+ // If we have more than one elements and the last one is null, it is a
+ // variadic function.
+ if (FnArgs.getNumElements() > 1 &&
+ !FnArgs.getElement(FnArgs.getNumElements() - 1) &&
+ !includeMinimalInlineScopes())
+ ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
+}
+
+DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
+ DIE &ScopeDIE) {
+ // We create children when the scope DIE is not null.
+ SmallVector<std::unique_ptr<DIE>, 8> Children;
+ DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
+
+ // Add children
+ for (auto &I : Children)
+ ScopeDIE.addChild(std::move(I));
+
+ return ObjectPointer;
+}
+
+void
+DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+ DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
+ if (AbsDef)
+ return;
+
+ DISubprogram SP(Scope->getScopeNode());
+
+ DIE *ContextDIE;
+
+ if (includeMinimalInlineScopes())
+ ContextDIE = &getUnitDie();
+ // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
+ // the important distinction that the DIDescriptor is not associated with the
+ // DIE (since the DIDescriptor will be associated with the concrete DIE, if
+ // any). It could be refactored to some common utility function.
+ else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+ ContextDIE = &getUnitDie();
+ getOrCreateSubprogramDIE(SPDecl);
+ } else
+ ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+
+ // Passing null as the associated DIDescriptor because the abstract definition
+ // shouldn't be found by lookup.
+ AbsDef =
+ &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor());
+ applySubprogramAttributesToDefinition(SP, *AbsDef);
+
+ if (!includeMinimalInlineScopes())
+ addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, *AbsDef))
+ addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
+}
+
+std::unique_ptr<DIE>
+DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) {
+ assert(Module.Verify() &&
+ "Use one of the MDNode * overloads to handle invalid metadata");
+ std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag());
+ insertDIE(Module, IMDie.get());
+ DIE *EntityDie;
+ DIDescriptor Entity = resolve(Module.getEntity());
+ if (Entity.isNameSpace())
+ EntityDie = getOrCreateNameSpace(DINameSpace(Entity));
+ else if (Entity.isSubprogram())
+ EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity));
+ else if (Entity.isType())
+ EntityDie = getOrCreateTypeDIE(DIType(Entity));
+ else if (Entity.isGlobalVariable())
+ EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity));
+ else
+ EntityDie = getDIE(Entity);
+ assert(EntityDie);
+ addSourceLine(*IMDie, Module.getLineNumber(),
+ Module.getContext().getFilename(),
+ Module.getContext().getDirectory());
+ addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
+ StringRef Name = Module.getName();
+ if (!Name.empty())
+ addString(*IMDie, dwarf::DW_AT_name, Name);
+
+ return IMDie;
+}
+
+void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
+ DIE *D = getDIE(SP);
+ if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) {
+ if (D)
+ // If this subprogram has an abstract definition, reference that
+ addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
+ } else {
+ if (!D && !includeMinimalInlineScopes())
+ // Lazily construct the subprogram if we didn't see either concrete or
+ // inlined versions during codegen. (except in -gmlt ^ where we want
+ // to omit these entirely)
+ D = getOrCreateSubprogramDIE(SP);
+ if (D)
+ // And attach the attributes
+ applySubprogramAttributesToDefinition(SP, *D);
+ }
+}
+void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
+ assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram");
+ assert(SP.isDefinition() &&
+ "CU's subprogram list contains a subprogram declaration");
+ DIArray Variables = SP.getVariables();
+ if (Variables.getNumElements() == 0)
+ return;
+
+ DIE *SPDIE = DU->getAbstractSPDies().lookup(SP);
+ if (!SPDIE)
+ SPDIE = getDIE(SP);
+ assert(SPDIE);
+ for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+ DIVariable DV(Variables.getElement(vi));
+ assert(DV.isVariable());
+ DbgVariable NewVar(DV, DIExpression(nullptr), DD);
+ auto VariableDie = constructVariableDIE(NewVar);
+ applyVariableAttributes(NewVar, *VariableDie);
+ SPDIE->addChild(std::move(VariableDie));
+ }
+}
+
+void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const {
+ // Don't bother labeling the .dwo unit, as its offset isn't used.
+ if (!Skeleton)
+ Asm->OutStreamer.EmitLabel(LabelBegin);
+
+ DwarfUnit::emitHeader(ASectionSym);
+}
+
+/// addGlobalName - Add a new global name to the compile unit.
+void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
+ DIScope Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ GlobalNames[FullName] = &Die;
+}
+
+/// Add a new global type to the unit.
+void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die,
+ DIScope Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Ty.getName().str();
+ GlobalTypes[FullName] = &Die;
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a
+/// DbgVariable based on provided MachineLocation.
+void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location) {
+ if (DV.variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV.isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location,
+ DV.getVariable().isIndirect());
+}
+
+/// Add an address attribute to a die based on the location provided.
+void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location,
+ bool Indirect) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+
+ if (Location.isReg() && !Indirect)
+ addRegisterOpPiece(*Loc, Location.getReg());
+ else {
+ addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+ if (Indirect && !Location.isReg()) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ }
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, Loc);
+}
+
+/// Start with the address based on the location provided, and generate the
+/// DWARF information necessary to find the actual variable given the extra
+/// address information encoded in the DbgVariable, starting from the starting
+/// location. Add the DWARF information to the die.
+void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+ unsigned N = DV.getNumAddrElements();
+ unsigned i = 0;
+ if (Location.isReg()) {
+ if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_plus) {
+ assert(!DV.getVariable().isIndirect() &&
+ "double indirection not handled");
+ // If first address element is OpPlus then emit
+ // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+ addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1));
+ i = 2;
+ } else if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_deref) {
+ assert(!DV.getVariable().isIndirect() &&
+ "double indirection not handled");
+ addRegisterOpPiece(*Loc, Location.getReg(),
+ DV.getExpression().getPieceSize(),
+ DV.getExpression().getPieceOffset());
+ i = 3;
+ } else
+ addRegisterOpPiece(*Loc, Location.getReg());
+ } else
+ addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+
+ for (; i < N; ++i) {
+ uint64_t Element = DV.getAddrElement(i);
+ if (Element == dwarf::DW_OP_plus) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
+
+ } else if (Element == dwarf::DW_OP_deref) {
+ if (!Location.isReg())
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ } else if (Element == dwarf::DW_OP_piece) {
+ const unsigned SizeOfByte = 8;
+ unsigned PieceOffsetInBits = DV.getAddrElement(++i) * SizeOfByte;
+ unsigned PieceSizeInBits = DV.getAddrElement(++i) * SizeOfByte;
+ // Emit DW_OP_bit_piece Size Offset.
+ assert(PieceSizeInBits > 0 && "piece has zero size");
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece);
+ addUInt(*Loc, dwarf::DW_FORM_udata, PieceSizeInBits);
+ addUInt(*Loc, dwarf::DW_FORM_udata, PieceOffsetInBits);
+ } else
+ llvm_unreachable("unknown DIBuilder Opcode");
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, Loc);
+}
+
+/// Add a Dwarf loclistptr attribute data and value.
+void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
+ unsigned Index) {
+ DIEValue *Value = new (DIEValueAllocator) DIELocList(Index);
+ dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4;
+ Die.addValue(Attribute, Form, Value);
+}
+
+void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie) {
+ StringRef Name = Var.getName();
+ if (!Name.empty())
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, Var.getVariable());
+ addType(VariableDie, Var.getType());
+ if (Var.isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+}
+
+/// Add a Dwarf expression attribute data and value.
+void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
+ const MCExpr *Expr) {
+ DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
+ Die.addValue((dwarf::Attribute)0, Form, Value);
+}
+
+void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP,
+ DIE &SPDie) {
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+ applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
+ addGlobalName(SP.getName(), SPDie, Context);
+}
+
+bool DwarfCompileUnit::isDwoUnit() const {
+ return DD->useSplitDwarf() && Skeleton;
+}
+
+bool DwarfCompileUnit::includeMinimalInlineScopes() const {
+ return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly ||
+ (DD->useSplitDwarf() && !Skeleton);
+}
+} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 0000000..e521f39
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,250 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DwarfUnit.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/DebugInfo.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+class DwarfFile;
+class MCSymbol;
+class LexicalScope;
+
+class DwarfCompileUnit : public DwarfUnit {
+ /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
+ /// the need to search for it in applyStmtList.
+ unsigned stmtListIndex;
+
+ /// Skeleton unit associated with this unit.
+ DwarfCompileUnit *Skeleton;
+
+ /// A label at the start of the non-dwo section related to this unit.
+ MCSymbol *SectionSym;
+
+ /// The start of the unit within its section.
+ MCSymbol *LabelBegin;
+
+ /// GlobalNames - A map of globally visible named entities for this unit.
+ StringMap<const DIE *> GlobalNames;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ StringMap<const DIE *> GlobalTypes;
+
+ // List of range lists for a given compile unit, separate from the ranges for
+ // the CU itself.
+ SmallVector<RangeSpanList, 1> CURangeLists;
+
+ // List of ranges for a given compile unit.
+ SmallVector<RangeSpan, 2> CURanges;
+
+ // The base address of this unit, if any. Used for relative references in
+ // ranges/locs.
+ const MCSymbol *BaseAddress;
+
+ /// \brief Construct a DIE for the given DbgVariable without initializing the
+ /// DbgVariable's DIE reference.
+ std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract);
+
+ bool isDwoUnit() const override;
+
+ bool includeMinimalInlineScopes() const;
+
+public:
+ DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU);
+
+ DwarfCompileUnit *getSkeleton() const {
+ return Skeleton;
+ }
+
+ void initStmtList(MCSymbol *DwarfLineSectionSym);
+
+ /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
+ void applyStmtList(DIE &D);
+
+ /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
+ DIE *getOrCreateGlobalVariableDIE(DIGlobalVariable GV);
+
+ /// addLabelAddress - Add a dwarf label attribute data and value using
+ /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ /// addLocalLabelAddress - Add a dwarf label attribute data and value using
+ /// DW_FORM_addr only.
+ void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ /// addSectionDelta - Add a label delta attribute data and value.
+ void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+ const MCSymbol *Lo);
+
+ DwarfCompileUnit &getCU() override { return *this; }
+
+ unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+
+ /// addRange - Add an address range to the list of ranges for this unit.
+ void addRange(RangeSpan Range);
+
+ void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
+
+ /// addSectionLabel - Add a Dwarf section label attribute data and value.
+ ///
+ void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec);
+
+ /// \brief Find DIE for the given subprogram and attach appropriate
+ /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+ /// variables in this scope then create and insert DIEs for these
+ /// variables.
+ DIE &updateSubprogramScopeDIE(DISubprogram SP);
+
+ void constructScopeDIE(LexicalScope *Scope,
+ SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren);
+
+ /// \brief A helper function to construct a RangeSpanList for a given
+ /// lexical scope.
+ void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range);
+
+ void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges);
+
+ void attachRangesOrLowHighPC(DIE &D,
+ const SmallVectorImpl<InsnRange> &Ranges);
+ /// \brief This scope represents inlined body of a function. Construct
+ /// DIE to represent this concrete inlined copy of the function.
+ std::unique_ptr<DIE> constructInlinedScopeDIE(LexicalScope *Scope);
+
+ /// \brief Construct new DW_TAG_lexical_block for this scope and
+ /// attach DW_AT_low_pc/DW_AT_high_pc labels.
+ std::unique_ptr<DIE> constructLexicalScopeDIE(LexicalScope *Scope);
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
+ bool Abstract = false);
+
+ std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
+ const LexicalScope &Scope,
+ DIE *&ObjectPointer);
+
+ /// A helper function to create children of a Scope DIE.
+ DIE *createScopeChildrenDIE(LexicalScope *Scope,
+ SmallVectorImpl<std::unique_ptr<DIE>> &Children,
+ unsigned *ChildScopeCount = nullptr);
+
+ /// \brief Construct a DIE for this subprogram scope.
+ void constructSubprogramScopeDIE(LexicalScope *Scope);
+
+ DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
+
+ void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+
+ /// \brief Construct import_module DIE.
+ std::unique_ptr<DIE>
+ constructImportedEntityDIE(const DIImportedEntity &Module);
+
+ void finishSubprogramDefinition(DISubprogram SP);
+
+ void collectDeadVariables(DISubprogram SP);
+
+ /// Set the skeleton unit associated with this unit.
+ void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
+
+ MCSymbol *getSectionSym() const {
+ assert(Section);
+ return SectionSym;
+ }
+
+ /// Pass in the SectionSym even though we could recreate it in every compile
+ /// unit (type units will have actually distinct symbols once they're in
+ /// comdat sections).
+ void initSection(const MCSection *Section, MCSymbol *SectionSym) {
+ DwarfUnit::initSection(Section);
+ this->SectionSym = SectionSym;
+
+ // Don't bother labeling the .dwo unit, as its offset isn't used.
+ if (!Skeleton)
+ LabelBegin =
+ Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+ }
+
+ unsigned getLength() {
+ return sizeof(uint32_t) + // Length field
+ getHeaderSize() + UnitDie.getSize();
+ }
+
+ void emitHeader(const MCSymbol *ASectionSym) const override;
+
+ MCSymbol *getLabelBegin() const {
+ assert(Section);
+ return LabelBegin;
+ }
+
+ /// Add a new global name to the compile unit.
+ void addGlobalName(StringRef Name, DIE &Die, DIScope Context) override;
+
+ /// Add a new global type to the compile unit.
+ void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) override;
+
+ const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
+ const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
+
+ /// Add DW_AT_location attribute for a DbgVariable based on provided
+ /// MachineLocation.
+ void addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location);
+ /// Add an address attribute to a die based on the location provided.
+ void addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location, bool Indirect = false);
+
+ /// Start with the address based on the location provided, and generate the
+ /// DWARF information necessary to find the actual variable (navigating the
+ /// extra location information encoded in the type) based on the starting
+ /// location. Add the DWARF information to the die.
+ void addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location);
+
+ /// Add a Dwarf loclistptr attribute data and value.
+ void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
+ void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
+
+ /// Add a Dwarf expression attribute data and value.
+ void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+
+ void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
+
+ /// getRangeLists - Get the vector of range lists.
+ const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+ return (Skeleton ? Skeleton : this)->CURangeLists;
+ }
+
+ /// getRanges - Get the list of ranges for this unit.
+ const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
+ SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); }
+
+ void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
+ const MCSymbol *getBaseAddress() const { return BaseAddress; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 77860c0..230ea46 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "ByteStreamer.h"
#include "DwarfDebug.h"
+
+#include "ByteStreamer.h"
+#include "DwarfCompileUnit.h"
#include "DIE.h"
#include "DIEHash.h"
#include "DwarfUnit.h"
@@ -36,6 +38,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
@@ -47,6 +50,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
@@ -149,7 +153,7 @@ DIType DbgVariable::getType() const {
if (tag == dwarf::DW_TAG_pointer_type)
subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom());
- DIArray Elements = DICompositeType(subType).getTypeArray();
+ DIArray Elements = DICompositeType(subType).getElements();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDerivedType DT(Elements.getElement(i));
if (getName() == DT.getName())
@@ -165,10 +169,11 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
- : Asm(A), MMI(Asm->MMI), FirstCU(nullptr), PrevLabel(nullptr),
- GlobalRangeCount(0), InfoHolder(A, "info_string", DIEValueAllocator),
+ : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0),
+ InfoHolder(A, *this, "info_string", DIEValueAllocator),
UsedNonDefaultText(false),
- SkeletonHolder(A, "skel_string", DIEValueAllocator),
+ SkeletonHolder(A, *this, "skel_string", DIEValueAllocator),
+ IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -188,8 +193,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Turn on accelerator tables for Darwin by default, pubnames by
// default for non-Darwin, and handle split dwarf.
- bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin();
-
if (DwarfAccelTables == Default)
HasDwarfAccelTables = IsDarwin;
else
@@ -308,26 +311,6 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
return false;
}
-// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
-// and DW_AT_high_pc attributes. If there are global variables in this
-// scope then create and insert DIEs for these variables.
-DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU,
- DISubprogram SP) {
- DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP);
-
- attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym);
-
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- SPCU.addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
-
- // Add name to the name table, we do this here because we're guaranteed
- // to have concrete versions of our DW_TAG_subprogram nodes.
- addSubprogramNames(SP, *SPDie);
-
- return *SPDie;
-}
-
/// Check whether we should create a DIE for the given Scope, return true
/// if we don't create a DIE (the corresponding DIE is null).
bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
@@ -344,271 +327,30 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
// We don't create a DIE if we have a single Range and the end label
// is null.
- SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
- MCSymbol *End = getLabelAfterInsn(RI->second);
- return !End;
-}
-
-static void addSectionLabel(AsmPrinter &Asm, DwarfUnit &U, DIE &D,
- dwarf::Attribute A, const MCSymbol *L,
- const MCSymbol *Sec) {
- if (Asm.MAI->doesDwarfUseRelocationsAcrossSections())
- U.addSectionLabel(D, A, L);
- else
- U.addSectionDelta(D, A, L, Sec);
-}
-
-void DwarfDebug::addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE,
- const SmallVectorImpl<InsnRange> &Range) {
- // Emit offset in .debug_range as a relocatable label. emitDIE will handle
- // emitting it appropriately.
- MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++);
-
- // Under fission, ranges are specified by constant offsets relative to the
- // CU's DW_AT_GNU_ranges_base.
- if (useSplitDwarf())
- TheCU.addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
- DwarfDebugRangeSectionSym);
- else
- addSectionLabel(*Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
- DwarfDebugRangeSectionSym);
-
- RangeSpanList List(RangeSym);
- for (const InsnRange &R : Range) {
- RangeSpan Span(getLabelBeforeInsn(R.first), getLabelAfterInsn(R.second));
- List.addRange(std::move(Span));
- }
-
- // Add the range list to the set of ranges to be emitted.
- TheCU.addRangeList(std::move(List));
-}
-
-void DwarfDebug::attachRangesOrLowHighPC(DwarfCompileUnit &TheCU, DIE &Die,
- const SmallVectorImpl<InsnRange> &Ranges) {
- assert(!Ranges.empty());
- if (Ranges.size() == 1)
- attachLowHighPC(TheCU, Die, getLabelBeforeInsn(Ranges.front().first),
- getLabelAfterInsn(Ranges.front().second));
- else
- addScopeRangeList(TheCU, Die, Ranges);
-}
-
-// Construct new DW_TAG_lexical_block for this scope and attach
-// DW_AT_low_pc/DW_AT_high_pc labels.
-std::unique_ptr<DIE>
-DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- if (isLexicalScopeDIENull(Scope))
- return nullptr;
-
- auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block);
- if (Scope->isAbstractScope())
- return ScopeDIE;
-
- attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges());
-
- return ScopeDIE;
+ return !getLabelAfterInsn(Ranges.front().second);
}
-// This scope represents inlined body of a function. Construct DIE to
-// represent this concrete inlined copy of the function.
-std::unique_ptr<DIE>
-DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- assert(Scope->getScopeNode());
- DIScope DS(Scope->getScopeNode());
- DISubprogram InlinedSP = getDISubprogram(DS);
- // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
- // was inlined from another compile unit.
- DIE *OriginDIE = AbstractSPDies[InlinedSP];
- assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
-
- auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine);
- TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
-
- attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges());
-
- InlinedSubprogramDIEs.insert(OriginDIE);
-
- // Add the call site information to the DIE.
- DILocation DL(Scope->getInlinedAt());
- TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
- TheCU.getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
- TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
-
- // Add name to the name table, we do this here because we're guaranteed
- // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
- addSubprogramNames(InlinedSP, *ScopeDIE);
-
- return ScopeDIE;
-}
-
-static std::unique_ptr<DIE> constructVariableDIE(DwarfCompileUnit &TheCU,
- DbgVariable &DV,
- const LexicalScope &Scope,
- DIE *&ObjectPointer) {
- auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope());
- if (DV.isObjectPointer())
- ObjectPointer = Var.get();
- return Var;
-}
-
-DIE *DwarfDebug::createScopeChildrenDIE(
- DwarfCompileUnit &TheCU, LexicalScope *Scope,
- SmallVectorImpl<std::unique_ptr<DIE>> &Children) {
- DIE *ObjectPointer = nullptr;
-
- // Collect arguments for current function.
- if (LScopes.isCurrentFunctionScope(Scope)) {
- for (DbgVariable *ArgDV : CurrentFnArguments)
- if (ArgDV)
- Children.push_back(
- constructVariableDIE(TheCU, *ArgDV, *Scope, ObjectPointer));
-
- // If this is a variadic function, add an unspecified parameter.
- DISubprogram SP(Scope->getScopeNode());
- DIArray FnArgs = SP.getType().getTypeArray();
- if (FnArgs.getElement(FnArgs.getNumElements() - 1)
- .isUnspecifiedParameter()) {
- Children.push_back(
- make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
- }
- }
-
- // Collect lexical scope children first.
- for (DbgVariable *DV : ScopeVariables.lookup(Scope))
- Children.push_back(constructVariableDIE(TheCU, *DV, *Scope, ObjectPointer));
-
- for (LexicalScope *LS : Scope->getChildren())
- if (std::unique_ptr<DIE> Nested = constructScopeDIE(TheCU, LS))
- Children.push_back(std::move(Nested));
- return ObjectPointer;
+template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) {
+ F(CU);
+ if (auto *SkelCU = CU.getSkeleton())
+ F(*SkelCU);
}
-void DwarfDebug::createAndAddScopeChildren(DwarfCompileUnit &TheCU,
- LexicalScope *Scope, DIE &ScopeDIE) {
- // We create children when the scope DIE is not null.
- SmallVector<std::unique_ptr<DIE>, 8> Children;
- if (DIE *ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children))
- TheCU.addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
-
- // Add children
- for (auto &I : Children)
- ScopeDIE.addChild(std::move(I));
-}
-
-void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
+void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
assert(Scope && Scope->getScopeNode());
assert(Scope->isAbstractScope());
assert(!Scope->getInlinedAt());
- DISubprogram SP(Scope->getScopeNode());
+ const MDNode *SP = Scope->getScopeNode();
ProcessedSPNodes.insert(SP);
- DIE *&AbsDef = AbstractSPDies[SP];
- if (AbsDef)
- return;
-
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- DwarfCompileUnit &SPCU = *SPMap[SP];
- DIE *ContextDIE;
-
- // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
- // the important distinction that the DIDescriptor is not associated with the
- // DIE (since the DIDescriptor will be associated with the concrete DIE, if
- // any). It could be refactored to some common utility function.
- if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
- ContextDIE = &SPCU.getUnitDie();
- SPCU.getOrCreateSubprogramDIE(SPDecl);
- } else
- ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext()));
-
- // Passing null as the associated DIDescriptor because the abstract definition
- // shouldn't be found by lookup.
- AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE,
- DIDescriptor());
- SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef);
-
- SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
- createAndAddScopeChildren(SPCU, Scope, *AbsDef);
-}
-
-DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- assert(Scope && Scope->getScopeNode());
- assert(!Scope->getInlinedAt());
- assert(!Scope->isAbstractScope());
- DISubprogram Sub(Scope->getScopeNode());
-
- assert(Sub.isSubprogram());
-
- ProcessedSPNodes.insert(Sub);
-
- DIE &ScopeDIE = updateSubprogramScopeDIE(TheCU, Sub);
-
- createAndAddScopeChildren(TheCU, Scope, ScopeDIE);
-
- return ScopeDIE;
-}
-
-// Construct a DIE for this scope.
-std::unique_ptr<DIE> DwarfDebug::constructScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope) {
- if (!Scope || !Scope->getScopeNode())
- return nullptr;
-
- DIScope DS(Scope->getScopeNode());
-
- assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
- "Only handle inlined subprograms here, use "
- "constructSubprogramScopeDIE for non-inlined "
- "subprograms");
-
- SmallVector<std::unique_ptr<DIE>, 8> Children;
-
- // We try to create the scope DIE first, then the children DIEs. This will
- // avoid creating un-used children then removing them later when we find out
- // the scope DIE is null.
- std::unique_ptr<DIE> ScopeDIE;
- if (Scope->getParent() && DS.isSubprogram()) {
- ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
- if (!ScopeDIE)
- return nullptr;
- // We create children when the scope DIE is not null.
- createScopeChildrenDIE(TheCU, Scope, Children);
- } else {
- // Early exit when we know the scope DIE is going to be null.
- if (isLexicalScopeDIENull(Scope))
- return nullptr;
-
- // We create children here when we know the scope DIE is not going to be
- // null and the children will be added to the scope DIE.
- createScopeChildrenDIE(TheCU, Scope, Children);
-
- // There is no need to emit empty lexical block DIE.
- std::pair<ImportedEntityMap::const_iterator,
- ImportedEntityMap::const_iterator> Range =
- std::equal_range(ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(),
- std::pair<const MDNode *, const MDNode *>(DS, nullptr),
- less_first());
- if (Children.empty() && Range.first == Range.second)
- return nullptr;
- ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
- assert(ScopeDIE && "Scope DIE should not be null.");
- for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second;
- ++i)
- constructImportedEntityDIE(TheCU, i->second, *ScopeDIE);
- }
-
- // Add children
- for (auto &I : Children)
- ScopeDIE->addChild(std::move(I));
-
- return ScopeDIE;
+ auto &CU = SPMap[SP];
+ forBothCUs(*CU, [&](DwarfCompileUnit &CU) {
+ CU.constructAbstractSubprogramScopeDIE(Scope);
+ });
}
void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
@@ -629,6 +371,8 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
DwarfCompileUnit &NewCU = *OwnedUnit;
DIE &Die = NewCU.getUnitDie();
InfoHolder.addUnit(std::move(OwnedUnit));
+ if (useSplitDwarf())
+ NewCU.setSkeleton(constructSkeletonCU(NewCU));
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
@@ -665,14 +409,10 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
- if (!FirstCU)
- FirstCU = &NewCU;
-
- if (useSplitDwarf()) {
+ if (useSplitDwarf())
NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
DwarfInfoDWOSectionSym);
- NewCU.setSkeleton(constructSkeletonCU(NewCU));
- } else
+ else
NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
DwarfInfoSectionSym);
@@ -681,44 +421,12 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
return NewCU;
}
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const MDNode *N) {
+void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const MDNode *N) {
DIImportedEntity Module(N);
assert(Module.Verify());
if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext()))
- constructImportedEntityDIE(TheCU, Module, *D);
-}
-
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const MDNode *N, DIE &Context) {
- DIImportedEntity Module(N);
- assert(Module.Verify());
- return constructImportedEntityDIE(TheCU, Module, Context);
-}
-
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity &Module,
- DIE &Context) {
- assert(Module.Verify() &&
- "Use one of the MDNode * overloads to handle invalid metadata");
- DIE &IMDie = TheCU.createAndAddDIE(Module.getTag(), Context, Module);
- DIE *EntityDie;
- DIDescriptor Entity = resolve(Module.getEntity());
- if (Entity.isNameSpace())
- EntityDie = TheCU.getOrCreateNameSpace(DINameSpace(Entity));
- else if (Entity.isSubprogram())
- EntityDie = TheCU.getOrCreateSubprogramDIE(DISubprogram(Entity));
- else if (Entity.isType())
- EntityDie = TheCU.getOrCreateTypeDIE(DIType(Entity));
- else
- EntityDie = TheCU.getDIE(Entity);
- TheCU.addSourceLine(IMDie, Module.getLineNumber(),
- Module.getContext().getFilename(),
- Module.getContext().getDirectory());
- TheCU.addDIEEntry(IMDie, dwarf::DW_AT_import, *EntityDie);
- StringRef Name = Module.getName();
- if (!Name.empty())
- TheCU.addString(IMDie, dwarf::DW_AT_name, Name);
+ D->addChild(TheCU.constructImportedEntityDIE(Module));
}
// Emit all Dwarf sections that should come prior to the content. Create
@@ -732,8 +440,6 @@ void DwarfDebug::beginModule() {
FunctionDIs = makeSubprogramMap(*M);
- // If module has named metadata anchors then use them, otherwise scan the
- // module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes)
return;
@@ -756,13 +462,18 @@ void DwarfDebug::beginModule() {
ScopesWithImportedEntities.end(), less_first());
DIArray GVs = CUNode.getGlobalVariables();
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
- CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
+ CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
DIArray SPs = CUNode.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
SPMap.insert(std::make_pair(SPs.getElement(i), &CU));
DIArray EnumTypes = CUNode.getEnumTypes();
- for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
- CU.getOrCreateTypeDIE(EnumTypes.getElement(i));
+ for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) {
+ DIType Ty(EnumTypes.getElement(i));
+ // The enum types array by design contains pointers to
+ // MDNodes rather than DIRefs. Unique them here.
+ DIType UniqueTy(resolve(Ty.getRef()));
+ CU.getOrCreateTypeDIE(UniqueTy);
+ }
DIArray RetainedTypes = CUNode.getRetainedTypes();
for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
DIType Ty(RetainedTypes.getElement(i));
@@ -774,7 +485,7 @@ void DwarfDebug::beginModule() {
// Emit imported_modules last so that the relevant context is already
// available.
for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
- constructImportedEntityDIE(CU, ImportedEntities.getElement(i));
+ constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i));
}
// Tell MMI that we have debug info.
@@ -787,9 +498,7 @@ void DwarfDebug::beginModule() {
void DwarfDebug::finishVariableDefinitions() {
for (const auto &Var : ConcreteVariables) {
DIE *VariableDie = Var->getDIE();
- // FIXME: There shouldn't be any variables without DIEs.
- if (!VariableDie)
- continue;
+ assert(VariableDie);
// FIXME: Consider the time-space tradeoff of just storing the unit pointer
// in the ConcreteVariables list, rather than looking it up again here.
// DIE::getUnit isn't simple - it walks parent pointers, etc.
@@ -805,36 +514,10 @@ void DwarfDebug::finishVariableDefinitions() {
}
void DwarfDebug::finishSubprogramDefinitions() {
- const Module *M = MMI->getModule();
-
- NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- for (MDNode *N : CU_Nodes->operands()) {
- DICompileUnit TheCU(N);
- // Construct subprogram DIE and add variables DIEs.
- DwarfCompileUnit *SPCU =
- static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
- DIArray Subprograms = TheCU.getSubprograms();
- for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
- DISubprogram SP(Subprograms.getElement(i));
- // Perhaps the subprogram is in another CU (such as due to comdat
- // folding, etc), in which case ignore it here.
- if (SPMap[SP] != SPCU)
- continue;
- DIE *D = SPCU->getDIE(SP);
- if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) {
- if (D)
- // If this subprogram has an abstract definition, reference that
- SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
- } else {
- if (!D)
- // Lazily construct the subprogram if we didn't see either concrete or
- // inlined versions during codegen.
- D = SPCU->getOrCreateSubprogramDIE(SP);
- // And attach the attributes
- SPCU->applySubprogramAttributesToDefinition(SP, *D);
- }
- }
- }
+ for (const auto &P : SPMap)
+ forBothCUs(*P.second, [&](DwarfCompileUnit &CU) {
+ CU.finishSubprogramDefinition(DISubprogram(P.first));
+ });
}
@@ -854,26 +537,7 @@ void DwarfDebug::collectDeadVariables() {
DISubprogram SP(Subprograms.getElement(i));
if (ProcessedSPNodes.count(SP) != 0)
continue;
- assert(SP.isSubprogram() &&
- "CU's subprogram list contains a non-subprogram");
- assert(SP.isDefinition() &&
- "CU's subprogram list contains a subprogram declaration");
- DIArray Variables = SP.getVariables();
- if (Variables.getNumElements() == 0)
- continue;
-
- DIE *SPDIE = AbstractSPDies.lookup(SP);
- if (!SPDIE)
- SPDIE = SPCU->getDIE(SP);
- assert(SPDIE);
- for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
- DIVariable DV(Variables.getElement(vi));
- assert(DV.isVariable());
- DbgVariable NewVar(DV, this);
- auto VariableDie = SPCU->constructVariableDIE(NewVar);
- SPCU->applyVariableAttributes(NewVar, *VariableDie);
- SPDIE->addChild(std::move(VariableDie));
- }
+ SPCU->collectDeadVariables(SP);
}
}
}
@@ -889,66 +553,52 @@ void DwarfDebug::finalizeModuleInfo() {
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
- for (const auto &TheU : getUnits()) {
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
- TheU->constructContainingTypeDIEs();
+ TheCU.constructContainingTypeDIEs();
// Add CU specific attributes if we need to add any.
- if (TheU->getUnitDie().getTag() == dwarf::DW_TAG_compile_unit) {
- // If we're splitting the dwarf out now that we've got the entire
- // CU then add the dwo id to it.
- DwarfCompileUnit *SkCU =
- static_cast<DwarfCompileUnit *>(TheU->getSkeleton());
- if (useSplitDwarf()) {
- // Emit a unique identifier for this CU.
- uint64_t ID = DIEHash(Asm).computeCUSignature(TheU->getUnitDie());
- TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
- dwarf::DW_FORM_data8, ID);
- SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
- dwarf::DW_FORM_data8, ID);
-
- // We don't keep track of which addresses are used in which CU so this
- // is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty())
- addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(),
- dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym,
- DwarfAddrSectionSym);
- if (!TheU->getRangeLists().empty())
- addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(),
- dwarf::DW_AT_GNU_ranges_base,
- DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym);
- }
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then add the dwo id to it.
+ auto *SkCU = TheCU.getSkeleton();
+ if (useSplitDwarf()) {
+ // Emit a unique identifier for this CU.
+ uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie());
+ TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if (!AddrPool.isEmpty())
+ SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
+ DwarfAddrSectionSym, DwarfAddrSectionSym);
+ if (!SkCU->getRangeLists().empty())
+ SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
+ DwarfDebugRangeSectionSym,
+ DwarfDebugRangeSectionSym);
+ }
- // If we have code split among multiple sections or non-contiguous
- // ranges of code then emit a DW_AT_ranges attribute on the unit that will
- // remain in the .o file, otherwise add a DW_AT_low_pc.
- // FIXME: We should use ranges allow reordering of code ala
- // .subsections_via_symbols in mach-o. This would mean turning on
- // ranges for all subprogram DIEs for mach-o.
- DwarfCompileUnit &U =
- SkCU ? *SkCU : static_cast<DwarfCompileUnit &>(*TheU);
- unsigned NumRanges = TheU->getRanges().size();
- if (NumRanges) {
- if (NumRanges > 1) {
- addSectionLabel(*Asm, U, U.getUnitDie(), dwarf::DW_AT_ranges,
- Asm->GetTempSymbol("cu_ranges", U.getUniqueID()),
- DwarfDebugRangeSectionSym);
-
- // A DW_AT_low_pc attribute may also be specified in combination with
- // DW_AT_ranges to specify the default base address for use in
- // location lists (see Section 2.6.2) and range lists (see Section
- // 2.17.3).
- U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- 0);
- } else {
- RangeSpan &Range = TheU->getRanges().back();
- U.addLocalLabelAddress(U.getUnitDie(), dwarf::DW_AT_low_pc,
- Range.getStart());
- U.addLabelDelta(U.getUnitDie(), dwarf::DW_AT_high_pc, Range.getEnd(),
- Range.getStart());
- }
- }
+ // If we have code split among multiple sections or non-contiguous
+ // ranges of code then emit a DW_AT_ranges attribute on the unit that will
+ // remain in the .o file, otherwise add a DW_AT_low_pc.
+ // FIXME: We should use ranges allow reordering of code ala
+ // .subsections_via_symbols in mach-o. This would mean turning on
+ // ranges for all subprogram DIEs for mach-o.
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+ if (unsigned NumRanges = TheCU.getRanges().size()) {
+ if (NumRanges > 1)
+ // A DW_AT_low_pc attribute may also be specified in combination with
+ // DW_AT_ranges to specify the default base address for use in
+ // location lists (see Section 2.6.2) and range lists (see Section
+ // 2.17.3).
+ U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+ else
+ TheCU.setBaseAddress(TheCU.getRanges().front().getStart());
+ U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
}
@@ -1010,7 +660,10 @@ void DwarfDebug::endModule() {
assert(CurFn == nullptr);
assert(CurMI == nullptr);
- if (!FirstCU)
+ // If we aren't actually generating debug info (check beginModule -
+ // conditionalized on !DisableDebugInfoPrinting and the presence of the
+ // llvm.dbg.cu metadata node)
+ if (!DwarfInfoSectionSym)
return;
// End any existing sections.
@@ -1064,9 +717,6 @@ void DwarfDebug::endModule() {
// clean up.
SPMap.clear();
AbstractVariables.clear();
-
- // Reset these for the next Module if we have one.
- FirstCU = nullptr;
}
// Find abstract variable, if any, associated with Var.
@@ -1092,8 +742,8 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
void DwarfDebug::createAbstractVariable(const DIVariable &Var,
LexicalScope *Scope) {
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, this);
- addScopeVariable(Scope, AbsDbgVariable.get());
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this);
+ InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
AbstractVariables[Var] = std::move(AbsDbgVariable);
}
@@ -1117,55 +767,35 @@ DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
createAbstractVariable(Cleansed, Scope);
}
-// If Var is a current function argument then add it to CurrentFnArguments list.
-bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) {
- if (!LScopes.isCurrentFunctionScope(Scope))
- return false;
- DIVariable DV = Var->getVariable();
- if (DV.getTag() != dwarf::DW_TAG_arg_variable)
- return false;
- unsigned ArgNo = DV.getArgNumber();
- if (ArgNo == 0)
- return false;
-
- size_t Size = CurrentFnArguments.size();
- if (Size == 0)
- CurrentFnArguments.resize(CurFn->getFunction()->arg_size());
- // llvm::Function argument size is not good indicator of how many
- // arguments does the function have at source level.
- if (ArgNo > Size)
- CurrentFnArguments.resize(ArgNo * 2);
- CurrentFnArguments[ArgNo - 1] = Var;
- return true;
-}
-
// Collect variable information from side table maintained by MMI.
void DwarfDebug::collectVariableInfoFromMMITable(
- SmallPtrSet<const MDNode *, 16> &Processed) {
+ SmallPtrSetImpl<const MDNode *> &Processed) {
for (const auto &VI : MMI->getVariableDbgInfo()) {
if (!VI.Var)
continue;
Processed.insert(VI.Var);
- DIVariable DV(VI.Var);
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
+ DIVariable DV(VI.Var);
+ DIExpression Expr(VI.Expr);
ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
- ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this));
DbgVariable *RegVar = ConcreteVariables.back().get();
RegVar->setFrameIndex(VI.Slot);
- addScopeVariable(Scope, RegVar);
+ InfoHolder.addScopeVariable(Scope, RegVar);
}
}
// Get .debug_loc entry for the instruction range starting at MI.
static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
+ const MDNode *Expr = MI->getDebugExpression();
const MDNode *Var = MI->getDebugVariable();
- assert(MI->getNumOperands() == 3);
+ assert(MI->getNumOperands() == 4);
if (MI->getOperand(0).isReg()) {
MachineLocation MLoc;
// If the second operand is an immediate, this is a
@@ -1174,24 +804,138 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
MLoc.set(MI->getOperand(0).getReg());
else
MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
- return DebugLocEntry::Value(Var, MLoc);
+ return DebugLocEntry::Value(Var, Expr, MLoc);
}
if (MI->getOperand(0).isImm())
- return DebugLocEntry::Value(Var, MI->getOperand(0).getImm());
+ return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getImm());
if (MI->getOperand(0).isFPImm())
- return DebugLocEntry::Value(Var, MI->getOperand(0).getFPImm());
+ return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getFPImm());
if (MI->getOperand(0).isCImm())
- return DebugLocEntry::Value(Var, MI->getOperand(0).getCImm());
+ return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getCImm());
- llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
+ llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
}
-// Find variables for each lexical scope.
+/// Determine whether two variable pieces overlap.
+static bool piecesOverlap(DIExpression P1, DIExpression P2) {
+ if (!P1.isVariablePiece() || !P2.isVariablePiece())
+ return true;
+ unsigned l1 = P1.getPieceOffset();
+ unsigned l2 = P2.getPieceOffset();
+ unsigned r1 = l1 + P1.getPieceSize();
+ unsigned r2 = l2 + P2.getPieceSize();
+ // True where [l1,r1[ and [r1,r2[ overlap.
+ return (l1 < r2) && (l2 < r1);
+}
+
+/// Build the location list for all DBG_VALUEs in the function that
+/// describe the same variable. If the ranges of several independent
+/// pieces of the same variable overlap partially, split them up and
+/// combine the ranges. The resulting DebugLocEntries are will have
+/// strict monotonically increasing begin addresses and will never
+/// overlap.
+//
+// Input:
+//
+// Ranges History [var, loc, piece ofs size]
+// 0 | [x, (reg0, piece 0, 32)]
+// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
+// 2 | | ...
+// 3 | [clobber reg0]
+// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x.
+//
+// Output:
+//
+// [0-1] [x, (reg0, piece 0, 32)]
+// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)]
+// [3-4] [x, (reg1, piece 32, 32)]
+// [4- ] [x, (mem, piece 0, 64)]
void
-DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::InstrRanges &Ranges) {
+ SmallVector<DebugLocEntry::Value, 4> OpenRanges;
+
+ for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+ const MachineInstr *Begin = I->first;
+ const MachineInstr *End = I->second;
+ assert(Begin->isDebugValue() && "Invalid History entry");
+
+ // Check if a variable is inaccessible in this range.
+ if (Begin->getNumOperands() > 1 &&
+ Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) {
+ OpenRanges.clear();
+ continue;
+ }
+
+ // If this piece overlaps with any open ranges, truncate them.
+ DIExpression DIExpr = Begin->getDebugExpression();
+ auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+ [&](DebugLocEntry::Value R) {
+ return piecesOverlap(DIExpr, R.getExpression());
+ });
+ OpenRanges.erase(Last, OpenRanges.end());
+
+ const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
+ assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
+
+ const MCSymbol *EndLabel;
+ if (End != nullptr)
+ EndLabel = getLabelAfterInsn(End);
+ else if (std::next(I) == Ranges.end())
+ EndLabel = FunctionEndSym;
+ else
+ EndLabel = getLabelBeforeInsn(std::next(I)->first);
+ assert(EndLabel && "Forgot label after instruction ending a range!");
+ DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
+
+ auto Value = getDebugLocValue(Begin);
+ DebugLocEntry Loc(StartLabel, EndLabel, Value);
+ bool couldMerge = false;
+
+ // If this is a piece, it may belong to the current DebugLocEntry.
+ if (DIExpr.isVariablePiece()) {
+ // Add this value to the list of open ranges.
+ OpenRanges.push_back(Value);
+
+ // Attempt to add the piece to the last entry.
+ if (!DebugLoc.empty())
+ if (DebugLoc.back().MergeValues(Loc))
+ couldMerge = true;
+ }
+
+ if (!couldMerge) {
+ // Need to add a new DebugLocEntry. Add all values from still
+ // valid non-overlapping pieces.
+ if (OpenRanges.size())
+ Loc.addValues(OpenRanges);
+
+ DebugLoc.push_back(std::move(Loc));
+ }
+
+ // Attempt to coalesce the ranges of two otherwise identical
+ // DebugLocEntries.
+ auto CurEntry = DebugLoc.rbegin();
+ auto PrevEntry = std::next(CurEntry);
+ if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
+ DebugLoc.pop_back();
+
+ DEBUG({
+ dbgs() << CurEntry->getValues().size() << " Values:\n";
+ for (auto Value : CurEntry->getValues()) {
+ Value.getVariable()->dump();
+ Value.getExpression()->dump();
+ }
+ dbgs() << "-----\n";
+ });
+ }
+}
+
+
+// Find variables for each lexical scope.
+void
+DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
+ SmallPtrSetImpl<const MDNode *> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMMITable(Processed);
@@ -1206,10 +950,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
continue;
LexicalScope *Scope = nullptr;
- if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
- DISubprogram(DV.getContext()).describes(CurFn->getFunction()))
- Scope = LScopes.getCurrentFunctionScope();
- else if (MDNode *IA = DV.getInlinedAt()) {
+ if (MDNode *IA = DV.getInlinedAt()) {
DebugLoc DL = DebugLoc::getFromDILocation(IA);
Scope = LScopes.findInlinedScope(DebugLoc::get(
DL.getLine(), DL.getCol(), DV.getContext(), IA));
@@ -1225,7 +966,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
DbgVariable *RegVar = ConcreteVariables.back().get();
- addScopeVariable(Scope, RegVar);
+ InfoHolder.addScopeVariable(Scope, RegVar);
// Check if the first DBG_VALUE is valid for the rest of the function.
if (Ranges.size() == 1 && Ranges.front().second == nullptr)
@@ -1236,53 +977,26 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
DebugLocList &LocList = DotDebugLocEntries.back();
+ LocList.CU = &TheCU;
LocList.Label =
Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
- SmallVector<DebugLocEntry, 4> &DebugLoc = LocList.List;
- for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
- const MachineInstr *Begin = I->first;
- const MachineInstr *End = I->second;
- assert(Begin->isDebugValue() && "Invalid History entry");
-
- // Check if a variable is unaccessible in this range.
- if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() &&
- !Begin->getOperand(0).getReg())
- continue;
- DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin);
- if (End != nullptr)
- DEBUG(dbgs() << "\t" << *End);
- else
- DEBUG(dbgs() << "\tNULL\n");
-
- const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
- assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
-
- const MCSymbol *EndLabel;
- if (End != nullptr)
- EndLabel = getLabelAfterInsn(End);
- else if (std::next(I) == Ranges.end())
- EndLabel = FunctionEndSym;
- else
- EndLabel = getLabelBeforeInsn(std::next(I)->first);
- assert(EndLabel && "Forgot label after instruction ending a range!");
- DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU);
- if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc))
- DebugLoc.push_back(std::move(Loc));
- }
+ // Build the location list for this variable.
+ buildLocationList(LocList.List, Ranges);
}
// Collect info for variables that were optimized out.
- DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
+ DIArray Variables = SP.getVariables();
for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
DIVariable DV(Variables.getElement(i));
assert(DV.isVariable());
- if (!Processed.insert(DV))
+ if (!Processed.insert(DV).second)
continue;
if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
- ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
- addScopeVariable(Scope, ConcreteVariables.back().get());
+ DIExpression NoExpr;
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this));
+ InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get());
}
}
}
@@ -1458,7 +1172,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
Asm->OutStreamer.EmitLabel(FunctionBeginSym);
// Calculate history for local variables.
- calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues);
+ calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(),
+ DbgValues);
// Request labels for the full history.
for (const auto &I : DbgValues) {
@@ -1468,10 +1183,24 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// The first mention of a function argument gets the FunctionBeginSym
// label, so arguments are visible when breaking at function entry.
- DIVariable DV(I.first);
- if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
- getDISubprogram(DV.getContext()).describes(MF->getFunction()))
+ DIVariable DIVar(Ranges.front().first->getDebugVariable());
+ if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
+ getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
+ if (Ranges.front().first->getDebugExpression().isVariablePiece()) {
+ // Mark all non-overlapping initial pieces.
+ for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
+ DIExpression Piece = I->first->getDebugExpression();
+ if (std::all_of(Ranges.begin(), I,
+ [&](DbgValueHistoryMap::InstrRange Pred) {
+ return !piecesOverlap(Piece, Pred.first->getDebugExpression());
+ }))
+ LabelsBeforeInsn[I->first] = FunctionBeginSym;
+ else
+ break;
+ }
+ }
+ }
for (const auto &Range : Ranges) {
requestLabelBeforeInsn(Range.first);
@@ -1497,56 +1226,16 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
}
-void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
- if (addCurrentFnArgument(Var, LS))
- return;
- SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
- DIVariable DV = Var->getVariable();
- // Variables with positive arg numbers are parameters.
- if (unsigned ArgNum = DV.getArgNumber()) {
- // Keep all parameters in order at the start of the variable list to ensure
- // function types are correct (no out-of-order parameters)
- //
- // This could be improved by only doing it for optimized builds (unoptimized
- // builds have the right order to begin with), searching from the back (this
- // would catch the unoptimized case quickly), or doing a binary search
- // rather than linear search.
- SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin();
- while (I != Vars.end()) {
- unsigned CurNum = (*I)->getVariable().getArgNumber();
- // A local (non-parameter) variable has been found, insert immediately
- // before it.
- if (CurNum == 0)
- break;
- // A later indexed parameter has been found, insert immediately before it.
- if (CurNum > ArgNum)
- break;
- ++I;
- }
- Vars.insert(I, Var);
- return;
- }
-
- Vars.push_back(Var);
-}
-
// Gather and emit post-function debug information.
void DwarfDebug::endFunction(const MachineFunction *MF) {
- // Every beginFunction(MF) call should be followed by an endFunction(MF) call,
- // though the beginFunction may not be called at all.
- // We should handle both cases.
- if (!CurFn)
- CurFn = MF;
- else
- assert(CurFn == MF);
- assert(CurFn != nullptr);
+ assert(CurFn == MF &&
+ "endFunction should be called with the same function as beginFunction");
if (!MMI->hasDebugInfo() || LScopes.empty() ||
!FunctionDIs.count(MF->getFunction())) {
// If we don't have a lexical scope for this function then there will
// be a hole in the range information. Keep note of this by setting the
// previously used section to nullptr.
- PrevSection = nullptr;
PrevCU = nullptr;
CurFn = nullptr;
return;
@@ -1560,45 +1249,63 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ DISubprogram SP(FnScope->getScopeNode());
+ DwarfCompileUnit &TheCU = *SPMap.lookup(SP);
+
SmallPtrSet<const MDNode *, 16> ProcessedVars;
- collectVariableInfo(ProcessedVars);
+ collectVariableInfo(TheCU, SP, ProcessedVars);
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- DwarfCompileUnit &TheCU = *SPMap.lookup(FnScope->getScopeNode());
+ // Add the range of this function to the list of ranges for the CU.
+ TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym));
+
+ // Under -gmlt, skip building the subprogram if there are no inlined
+ // subroutines inside it.
+ if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly &&
+ LScopes.getAbstractScopesList().empty() && !IsDarwin) {
+ assert(InfoHolder.getScopeVariables().empty());
+ assert(DbgValues.empty());
+ // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed
+ // by a -gmlt CU. Add a test and remove this assertion.
+ assert(AbstractVariables.empty());
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ return;
+ }
+#ifndef NDEBUG
+ size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
+#endif
// Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
DISubprogram SP(AScope->getScopeNode());
- if (!SP.isSubprogram())
- continue;
+ assert(SP.isSubprogram());
// Collect info for variables that were optimized out.
DIArray Variables = SP.getVariables();
for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
DIVariable DV(Variables.getElement(i));
assert(DV && DV.isVariable());
- if (!ProcessedVars.insert(DV))
+ if (!ProcessedVars.insert(DV).second)
continue;
ensureAbstractVariableIsCreated(DV, DV.getContext());
+ assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
+ && "ensureAbstractVariableIsCreated inserted abstract scopes");
}
- constructAbstractSubprogramScopeDIE(TheCU, AScope);
+ constructAbstractSubprogramScopeDIE(AScope);
}
- DIE &CurFnDIE = constructSubprogramScopeDIE(TheCU, FnScope);
- if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn))
- TheCU.addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
-
- // Add the range of this function to the list of ranges for the CU.
- RangeSpan Span(FunctionBeginSym, FunctionEndSym);
- TheCU.addRange(std::move(Span));
- PrevSection = Asm->getCurrentSection();
- PrevCU = &TheCU;
+ TheCU.constructSubprogramScopeDIE(FnScope);
+ if (auto *SkelCU = TheCU.getSkeleton())
+ if (!LScopes.getAbstractScopesList().empty())
+ SkelCU->constructSubprogramScopeDIE(FnScope);
// Clear debug info
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
- ScopeVariables.clear();
- CurrentFnArguments.clear();
+ InfoHolder.getScopeVariables().clear();
DbgValues.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
@@ -1618,8 +1325,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
assert(Scope.isScope());
Fn = Scope.getFilename();
Dir = Scope.getDirectory();
- if (Scope.isLexicalBlock())
- Discriminator = DILexicalBlock(S).getDiscriminator();
+ if (Scope.isLexicalBlockFile())
+ Discriminator = DILexicalBlockFile(S).getDiscriminator();
unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
@@ -1640,9 +1347,12 @@ void DwarfDebug::emitSectionLabels() {
// Dwarf sections base addresses.
DwarfInfoSectionSym =
emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
- if (useSplitDwarf())
+ if (useSplitDwarf()) {
DwarfInfoDWOSectionSym =
emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
+ DwarfTypesDWOSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
+ }
DwarfAbbrevSectionSym =
emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
if (useSplitDwarf())
@@ -1726,7 +1436,7 @@ void DwarfDebug::emitDIE(DIE &Die) {
void DwarfDebug::emitDebugInfo() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- Holder.emitUnits(this, DwarfAbbrevSectionSym);
+ Holder.emitUnits(DwarfAbbrevSectionSym);
}
// Emit the abbreviation section.
@@ -1760,54 +1470,41 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(1);
}
-// Emit visible names into a hashed accelerator table section.
-void DwarfDebug::emitAccelNames() {
- AccelNames.FinalizeTable(Asm, "Names");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelNamesSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
+ StringRef TableName, StringRef SymName) {
+ Accel.FinalizeTable(Asm, TableName);
+ Asm->OutStreamer.SwitchSection(Section);
+ auto *SectionBegin = Asm->GetTempSymbol(SymName);
Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- AccelNames.Emit(Asm, SectionBegin, &InfoHolder);
+ Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym);
+}
+
+// Emit visible names into a hashed accelerator table section.
+void DwarfDebug::emitAccelNames() {
+ emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
+ "Names", "names_begin");
}
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
- AccelObjC.FinalizeTable(Asm, "ObjC");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelObjCSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
- Asm->OutStreamer.EmitLabel(SectionBegin);
-
- // Emit the full data.
- AccelObjC.Emit(Asm, SectionBegin, &InfoHolder);
+ emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
+ "ObjC", "objc_begin");
}
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
- AccelNamespace.FinalizeTable(Asm, "namespac");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelNamespaceSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
- Asm->OutStreamer.EmitLabel(SectionBegin);
-
- // Emit the full data.
- AccelNamespace.Emit(Asm, SectionBegin, &InfoHolder);
+ emitAccel(AccelNamespace,
+ Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
+ "namespac", "namespac_begin");
}
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
-
- AccelTypes.FinalizeTable(Asm, "types");
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfAccelTypesSection());
- MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
- Asm->OutStreamer.EmitLabel(SectionBegin);
-
- // Emit the full data.
- AccelTypes.Emit(Asm, SectionBegin, &InfoHolder);
+ emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
+ "types", "types_begin");
}
// Public name handling.
@@ -1874,12 +1571,13 @@ void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
: Asm->getObjFileLowering().getDwarfPubNamesSection();
- emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfUnit::getGlobalNames);
+ emitDebugPubSection(GnuStyle, PSec, "Names",
+ &DwarfCompileUnit::getGlobalNames);
}
void DwarfDebug::emitDebugPubSection(
bool GnuStyle, const MCSection *PSec, StringRef Name,
- const StringMap<const DIE *> &(DwarfUnit::*Accessor)() const) {
+ const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
for (const auto &NU : CUMap) {
DwarfCompileUnit *TheU = NU.second;
@@ -1888,7 +1586,7 @@ void DwarfDebug::emitDebugPubSection(
if (Globals.empty())
continue;
- if (auto Skeleton = static_cast<DwarfCompileUnit *>(TheU->getSkeleton()))
+ if (auto *Skeleton = TheU->getSkeleton())
TheU = Skeleton;
unsigned ID = TheU->getUniqueID();
@@ -1910,7 +1608,7 @@ void DwarfDebug::emitDebugPubSection(
Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym());
Asm->OutStreamer.AddComment("Compilation Unit Length");
- Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4);
+ Asm->EmitInt32(TheU->getLength());
// Emit the pubnames for this compilation unit.
for (const auto &GI : Globals) {
@@ -1943,7 +1641,8 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
: Asm->getObjFileLowering().getDwarfPubTypesSection();
- emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes);
+ emitDebugPubSection(GnuStyle, PSec, "Types",
+ &DwarfCompileUnit::getGlobalTypes);
}
// Emit visible names into a debug str section.
@@ -1952,12 +1651,67 @@ void DwarfDebug::emitDebugStr() {
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
}
+/// Emits an optimal (=sorted) sequence of DW_OP_pieces.
+void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
+ const DITypeIdentifierMap &Map,
+ ArrayRef<DebugLocEntry::Value> Values) {
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
+ return P.isVariablePiece();
+ }) && "all values are expected to be pieces");
+ assert(std::is_sorted(Values.begin(), Values.end()) &&
+ "pieces are expected to be sorted");
+
+ unsigned Offset = 0;
+ for (auto Piece : Values) {
+ DIExpression Expr = Piece.getExpression();
+ unsigned PieceOffset = Expr.getPieceOffset();
+ unsigned PieceSize = Expr.getPieceSize();
+ assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
+ if (Offset < PieceOffset) {
+ // The DWARF spec seriously mandates pieces with no locations for gaps.
+ Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*8);
+ Offset += PieceOffset-Offset;
+ }
+
+ Offset += PieceSize;
+
+ const unsigned SizeOfByte = 8;
+#ifndef NDEBUG
+ DIVariable Var = Piece.getVariable();
+ assert(!Var.isIndirect() && "indirect address for piece");
+ unsigned VarSize = Var.getSizeInBits(Map);
+ assert(PieceSize+PieceOffset <= VarSize/SizeOfByte
+ && "piece is larger than or outside of variable");
+ assert(PieceSize*SizeOfByte != VarSize
+ && "piece covers entire variable");
+#endif
+ if (Piece.isLocation() && Piece.getLoc().isReg())
+ Asm->EmitDwarfRegOpPiece(Streamer,
+ Piece.getLoc(),
+ PieceSize*SizeOfByte);
+ else {
+ emitDebugLocValue(Streamer, Piece);
+ Asm->EmitDwarfOpPiece(Streamer, PieceSize*SizeOfByte);
+ }
+ }
+}
+
+
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocEntry &Entry) {
- assert(Entry.getValues().size() == 1 &&
- "multi-value entries are not supported yet.");
const DebugLocEntry::Value Value = Entry.getValues()[0];
- DIVariable DV(Value.getVariable());
+ if (Value.isVariablePiece())
+ // Emit all pieces that belong to the same variable and range.
+ return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues());
+
+ assert(Entry.getValues().size() == 1 && "only pieces may have >1 value");
+ emitDebugLocValue(Streamer, Value);
+}
+
+void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
+ const DebugLocEntry::Value &Value) {
+ DIVariable DV = Value.getVariable();
+ // Regular entry.
if (Value.isInt()) {
DIBasicType BTy(resolve(DV.getType()));
if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
@@ -1970,24 +1724,25 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
}
} else if (Value.isLocation()) {
MachineLocation Loc = Value.getLoc();
- if (!DV.hasComplexAddress())
+ DIExpression Expr = Value.getExpression();
+ if (!Expr)
// Regular entry.
Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
else {
// Complex address entry.
- unsigned N = DV.getNumAddrElements();
+ unsigned N = Expr.getNumElements();
unsigned i = 0;
- if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+ if (N >= 2 && Expr.getElement(0) == dwarf::DW_OP_plus) {
if (Loc.getOffset()) {
i = 2;
Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
- Streamer.EmitSLEB128(DV.getAddrElement(1));
+ Streamer.EmitSLEB128(Expr.getElement(1));
} else {
// If first address element is OpPlus then emit
// DW_OP_breg + Offset instead of DW_OP_reg + Offset.
- MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1));
+ MachineLocation TLoc(Loc.getReg(), Expr.getElement(1));
Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect());
i = 2;
}
@@ -1997,13 +1752,16 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
// Emit remaining complex address elements.
for (; i < N; ++i) {
- uint64_t Element = DV.getAddrElement(i);
- if (Element == DIBuilder::OpPlus) {
+ uint64_t Element = Expr.getElement(i);
+ if (Element == dwarf::DW_OP_plus) {
Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
- Streamer.EmitULEB128(DV.getAddrElement(++i));
- } else if (Element == DIBuilder::OpDeref) {
+ Streamer.EmitULEB128(Expr.getElement(++i));
+ } else if (Element == dwarf::DW_OP_deref) {
if (!Loc.isReg())
Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
+ } else if (Element == dwarf::DW_OP_piece) {
+ i += 3;
+ // handled in emitDebugLocEntry.
} else
llvm_unreachable("unknown Opcode found in complex address");
}
@@ -2035,14 +1793,12 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getDataLayout().getPointerSize();
for (const auto &DebugLoc : DotDebugLocEntries) {
Asm->OutStreamer.EmitLabel(DebugLoc.Label);
+ const DwarfCompileUnit *CU = DebugLoc.CU;
for (const auto &Entry : DebugLoc.List) {
// Set up the range. This range is relative to the entry point of the
// compile unit. This is a hard coded 0 for low_pc when we're emitting
// ranges, or the DW_AT_low_pc on the compile unit otherwise.
- const DwarfCompileUnit *CU = Entry.getCU();
- if (CU->getRanges().size() == 1) {
- // Grab the begin symbol from the first range as our base.
- const MCSymbol *Base = CU->getRanges()[0].getStart();
+ if (auto *Base = CU->getBaseAddress()) {
Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size);
Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size);
} else {
@@ -2172,6 +1928,10 @@ void DwarfDebug::emitDebugARanges() {
for (DwarfCompileUnit *CU : CUs) {
std::vector<ArangeSpan> &List = Spans[CU];
+ // Describe the skeleton CU's offset and length, not the dwo file's.
+ if (auto *Skel = CU->getSkeleton())
+ CU = Skel;
+
// Emit size of content not including length itself.
unsigned ContentSize =
sizeof(int16_t) + // DWARF ARange version number
@@ -2194,7 +1954,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer.AddComment("DWARF Arange version number");
Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
- Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym());
+ Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym());
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(PtrSize);
Asm->OutStreamer.AddComment("Segment Size (in bytes)");
@@ -2238,6 +1998,9 @@ void DwarfDebug::emitDebugRanges() {
for (const auto &I : CUMap) {
DwarfCompileUnit *TheCU = I.second;
+ if (auto *Skel = TheCU->getSkeleton())
+ TheCU = Skel;
+
// Iterate over the misc ranges for the compile units in the module.
for (const RangeSpanList &List : TheCU->getRangeLists()) {
// Emit our symbol so we can find the beginning of the range.
@@ -2248,9 +2011,7 @@ void DwarfDebug::emitDebugRanges() {
const MCSymbol *End = Range.getEnd();
assert(Begin && "Range without a begin symbol?");
assert(End && "Range without an end symbol?");
- if (TheCU->getRanges().size() == 1) {
- // Grab the begin symbol from the first range as our base.
- const MCSymbol *Base = TheCU->getRanges()[0].getStart();
+ if (auto *Base = TheCU->getBaseAddress()) {
Asm->EmitLabelDifference(Begin, Base, Size);
Asm->EmitLabelDifference(End, Base, Size);
} else {
@@ -2263,23 +2024,6 @@ void DwarfDebug::emitDebugRanges() {
Asm->OutStreamer.EmitIntValue(0, Size);
Asm->OutStreamer.EmitIntValue(0, Size);
}
-
- // Now emit a range for the CU itself.
- if (TheCU->getRanges().size() > 1) {
- Asm->OutStreamer.EmitLabel(
- Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID()));
- for (const RangeSpan &Range : TheCU->getRanges()) {
- const MCSymbol *Begin = Range.getStart();
- const MCSymbol *End = Range.getEnd();
- assert(Begin && "Range without a begin symbol?");
- assert(End && "Range without an end symbol?");
- Asm->OutStreamer.EmitSymbolValue(Begin, Size);
- Asm->OutStreamer.EmitSymbolValue(End, Size);
- }
- // And terminate the list with two 0 values.
- Asm->OutStreamer.EmitIntValue(0, Size);
- Asm->OutStreamer.EmitIntValue(0, Size);
- }
}
}
@@ -2287,11 +2031,11 @@ void DwarfDebug::emitDebugRanges() {
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfUnit> NewU) {
- NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
- U.getCUNode().getSplitDebugFilename());
+ NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ U.getCUNode().getSplitDebugFilename());
if (!CompilationDir.empty())
- NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
addGnuPubAttributes(*NewU, Die);
@@ -2316,31 +2060,13 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
return NewCU;
}
-// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name,
-// DW_AT_addr_base.
-DwarfTypeUnit &DwarfDebug::constructSkeletonTU(DwarfTypeUnit &TU) {
- DwarfCompileUnit &CU = static_cast<DwarfCompileUnit &>(
- *SkeletonHolder.getUnits()[TU.getCU().getUniqueID()]);
-
- auto OwnedUnit = make_unique<DwarfTypeUnit>(TU.getUniqueID(), CU, Asm, this,
- &SkeletonHolder);
- DwarfTypeUnit &NewTU = *OwnedUnit;
- NewTU.setTypeSignature(TU.getTypeSignature());
- NewTU.setType(nullptr);
- NewTU.initSection(
- Asm->getObjFileLowering().getDwarfTypesSection(TU.getTypeSignature()));
-
- initSkeletonUnit(TU, NewTU.getUnitDie(), std::move(OwnedUnit));
- return NewTU;
-}
-
// Emit the .debug_info.dwo section for separated dwarf. This contains the
// compile units that would normally be in debug_info.
void DwarfDebug::emitDebugInfoDWO() {
assert(useSplitDwarf() && "No split dwarf debug info?");
// Don't pass an abbrev symbol, using a constant zero instead so as not to
// emit relocations into the dwo file.
- InfoHolder.emitUnits(this, /* AbbrevSymbol */ nullptr);
+ InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr);
}
// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
@@ -2364,9 +2090,8 @@ void DwarfDebug::emitDebugStrDWO() {
assert(useSplitDwarf() && "No split dwarf?");
const MCSection *OffSec =
Asm->getObjFileLowering().getDwarfStrOffDWOSection();
- const MCSymbol *StrSym = DwarfStrSectionSym;
InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
- OffSec, StrSym);
+ OffSec);
}
MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
@@ -2406,9 +2131,9 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- auto OwnedUnit =
- make_unique<DwarfTypeUnit>(InfoHolder.getUnits().size(), CU, Asm, this,
- &InfoHolder, getDwoLineTable(CU));
+ auto OwnedUnit = make_unique<DwarfTypeUnit>(
+ InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm,
+ this, &InfoHolder, getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
TU = &NewTU;
@@ -2421,15 +2146,13 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
uint64_t Signature = makeTypeSignature(Identifier);
NewTU.setTypeSignature(Signature);
- if (!useSplitDwarf())
+ if (useSplitDwarf())
+ NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
+ else {
CU.applyStmtList(UnitDie);
-
- // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools
- // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it.
- NewTU.initSection(
- useSplitDwarf()
- ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature)
- : Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ NewTU.initSection(
+ Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ }
NewTU.setType(NewTU.createTypeDIE(CTy));
@@ -2457,29 +2180,12 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// If the type wasn't dependent on fission addresses, finish adding the type
// and all its dependent types.
- for (auto &TU : TypeUnitsToAdd) {
- if (useSplitDwarf())
- TU.first->setSkeleton(constructSkeletonTU(*TU.first));
+ for (auto &TU : TypeUnitsToAdd)
InfoHolder.addUnit(std::move(TU.first));
- }
}
CU.addDIETypeSignature(RefDie, NewTU);
}
-void DwarfDebug::attachLowHighPC(DwarfCompileUnit &Unit, DIE &D,
- MCSymbol *Begin, MCSymbol *End) {
- assert(Begin && "Begin label should not be null!");
- assert(End && "End label should not be null!");
- assert(Begin->isDefined() && "Invalid starting label");
- assert(End->isDefined() && "Invalid end label");
-
- Unit.addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
- if (DwarfVersion < 4)
- Unit.addLabelAddress(D, dwarf::DW_AT_high_pc, End);
- else
- Unit.addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
-}
-
// Accelerator table mutators - add each name along with its companion
// DIE to the proper table while ensuring that the name that we're going
// to reference is in the string table. We do this since the names we
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ffe4843..48c2809 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
-#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#include "DwarfFile.h"
#include "AsmPrinterHandler.h"
@@ -70,6 +70,7 @@ public:
/// \brief This class is used to track local variable information.
class DbgVariable {
DIVariable Var; // Variable Descriptor.
+ DIExpression Expr; // Complex address location expression.
DIE *TheDIE; // Variable DIE.
unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
@@ -78,18 +79,22 @@ class DbgVariable {
public:
/// Construct a DbgVariable from a DIVariable.
- DbgVariable(DIVariable V, DwarfDebug *DD)
- : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr),
- FrameIndex(~0), DD(DD) {}
+ DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD)
+ : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U),
+ MInsn(nullptr), FrameIndex(~0), DD(DD) {
+ assert(Var.Verify() && Expr.Verify());
+ }
/// Construct a DbgVariable from a DEBUG_VALUE.
/// AbstractVar may be NULL.
DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
- : Var(DbgValue->getDebugVariable()), TheDIE(nullptr),
- DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {}
+ : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()),
+ TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue),
+ FrameIndex(~0), DD(DD) {}
// Accessors.
DIVariable getVariable() const { return Var; }
+ DIExpression getExpression() const { return Expr; }
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
@@ -124,14 +129,14 @@ public:
bool variableHasComplexAddress() const {
assert(Var.isVariable() && "Invalid complex DbgVariable!");
- return Var.hasComplexAddress();
+ return Expr.getNumElements() > 0;
}
bool isBlockByrefVariable() const;
unsigned getNumAddrElements() const {
assert(Var.isVariable() && "Invalid complex DbgVariable!");
- return Var.getNumAddrElements();
+ return Expr.getNumElements();
}
- uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); }
+ uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); }
DIType getType() const;
private:
@@ -159,26 +164,15 @@ class DwarfDebug : public AsmPrinterHandler {
// All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
- // Handle to the compile unit used for the inline extension handling,
- // this is just so that the DIEValue allocator has a place to store
- // the particular elements.
- // FIXME: Store these off of DwarfDebug instead?
- DwarfCompileUnit *FirstCU;
-
// Maps MDNode with its corresponding DwarfCompileUnit.
MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
// Maps subprogram MDNode with its corresponding DwarfCompileUnit.
- DenseMap<const MDNode *, DwarfCompileUnit *> SPMap;
+ MapVector<const MDNode *, DwarfCompileUnit *> SPMap;
// Maps a CU DIE with its corresponding DwarfCompileUnit.
DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
- /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
- /// be shared across CUs, that is why we keep the map here instead
- /// of in DwarfCompileUnit.
- DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
-
// List of all labels used in aranges generation.
std::vector<SymbolCU> ArangeLabels;
@@ -189,19 +183,8 @@ class DwarfDebug : public AsmPrinterHandler {
typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType;
SectionMapType SectionMap;
- // List of arguments for current function.
- SmallVector<DbgVariable *, 8> CurrentFnArguments;
-
LexicalScopes LScopes;
- // Collection of abstract subprogram DIEs.
- DenseMap<const MDNode *, DIE *> AbstractSPDies;
-
- // Collection of dbg variables of a scope.
- typedef DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >
- ScopeVariablesMap;
- ScopeVariablesMap ScopeVariables;
-
// Collection of abstract variables.
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
@@ -210,10 +193,6 @@ class DwarfDebug : public AsmPrinterHandler {
// can refer to them in spite of insertions into this list.
SmallVector<DebugLocList, 4> DotDebugLocEntries;
- // Collection of subprogram DIEs that are marked (at the end of the module)
- // as DW_AT_inline.
- SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
-
// This is a collection of subprogram MDNodes that are processed to
// create DIEs.
SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
@@ -243,10 +222,6 @@ class DwarfDebug : public AsmPrinterHandler {
// If nonnull, stores the current machine instruction we're processing.
const MachineInstr *CurMI;
- // If nonnull, stores the section that the previous function was allocated to
- // emitting.
- const MCSection *PrevSection;
-
// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU;
@@ -258,6 +233,7 @@ class DwarfDebug : public AsmPrinterHandler {
MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym;
+ MCSymbol *DwarfTypesDWOSectionSym;
MCSymbol *DwarfStrDWOSectionSym;
MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
@@ -322,6 +298,7 @@ class DwarfDebug : public AsmPrinterHandler {
// True iff there are multiple CUs in this module.
bool SingleCU;
+ bool IsDarwin;
AddressPool AddrPool;
@@ -334,8 +311,6 @@ class DwarfDebug : public AsmPrinterHandler {
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
- void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
-
const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() {
return InfoHolder.getUnits();
}
@@ -350,45 +325,8 @@ class DwarfDebug : public AsmPrinterHandler {
void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
const MDNode *Scope);
- /// \brief Find DIE for the given subprogram and attach appropriate
- /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
- /// variables in this scope then create and insert DIEs for these
- /// variables.
- DIE &updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP);
-
- /// \brief A helper function to check whether the DIE for a given Scope is
- /// going to be null.
- bool isLexicalScopeDIENull(LexicalScope *Scope);
-
- /// \brief A helper function to construct a RangeSpanList for a given
- /// lexical scope.
- void addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE,
- const SmallVectorImpl<InsnRange> &Range);
-
- /// \brief Construct new DW_TAG_lexical_block for this scope and
- /// attach DW_AT_low_pc/DW_AT_high_pc labels.
- std::unique_ptr<DIE> constructLexicalScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
-
- /// \brief This scope represents inlined body of a function. Construct
- /// DIE to represent this concrete inlined copy of the function.
- std::unique_ptr<DIE> constructInlinedScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
-
- /// \brief Construct a DIE for this scope.
- std::unique_ptr<DIE> constructScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
- void createAndAddScopeChildren(DwarfCompileUnit &TheCU, LexicalScope *Scope,
- DIE &ScopeDIE);
/// \brief Construct a DIE for this abstract scope.
- void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
- /// \brief Construct a DIE for this subprogram scope.
- DIE &constructSubprogramScopeDIE(DwarfCompileUnit &TheCU,
- LexicalScope *Scope);
- /// A helper function to create children of a Scope DIE.
- DIE *createScopeChildrenDIE(DwarfCompileUnit &TheCU, LexicalScope *Scope,
- SmallVectorImpl<std::unique_ptr<DIE>> &Children);
+ void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
/// \brief Emit initial Dwarf sections with a label at the start of each one.
void emitSectionLabels();
@@ -424,6 +362,10 @@ class DwarfDebug : public AsmPrinterHandler {
/// the line matrix.
void emitEndOfLineMatrix(unsigned SectionEnd);
+ /// \brief Emit a specified accelerator table.
+ void emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
+ StringRef TableName, StringRef SymName);
+
/// \brief Emit visible names into a hashed accelerator table section.
void emitAccelNames();
@@ -449,10 +391,9 @@ class DwarfDebug : public AsmPrinterHandler {
/// index.
void emitDebugPubTypes(bool GnuStyle = false);
- void
- emitDebugPubSection(bool GnuStyle, const MCSection *PSec, StringRef Name,
- const StringMap<const DIE *> &(DwarfUnit::*Accessor)()
- const);
+ void emitDebugPubSection(
+ bool GnuStyle, const MCSection *PSec, StringRef Name,
+ const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
/// \brief Emit visible names into a debug str section.
void emitDebugStr();
@@ -507,15 +448,8 @@ class DwarfDebug : public AsmPrinterHandler {
DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit);
/// \brief Construct imported_module or imported_declaration DIE.
- void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N);
-
- /// \brief Construct import_module DIE.
- void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N,
- DIE &Context);
-
- /// \brief Construct import_module DIE.
- void constructImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity &Module, DIE &Context);
+ void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const MDNode *N);
/// \brief Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
@@ -527,38 +461,29 @@ class DwarfDebug : public AsmPrinterHandler {
/// ending of a scope.
void identifyScopeMarkers();
- /// \brief If Var is an current function argument that add it in
- /// CurrentFnArguments list.
- bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope);
-
/// \brief Populate LexicalScope entries with variables' info.
- void collectVariableInfo(SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+ void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
+ SmallPtrSetImpl<const MDNode *> &ProcessedVars);
+
+ /// \brief Build the location list for all DBG_VALUEs in the
+ /// function that describe the same variable.
+ void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::InstrRanges &Ranges);
/// \brief Collect variable information from the side table maintained
/// by MMI.
- void collectVariableInfoFromMMITable(SmallPtrSet<const MDNode *, 16> &P);
+ void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P);
/// \brief Ensure that a label will be emitted before MI.
void requestLabelBeforeInsn(const MachineInstr *MI) {
LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
}
- /// \brief Return Label preceding the instruction.
- MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
-
/// \brief Ensure that a label will be emitted after MI.
void requestLabelAfterInsn(const MachineInstr *MI) {
LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
}
- /// \brief Return Label immediately following the instruction.
- MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
-
- void attachRangesOrLowHighPC(DwarfCompileUnit &Unit, DIE &D,
- const SmallVectorImpl<InsnRange> &Ranges);
- void attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin,
- MCSymbol *End);
-
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -567,13 +492,6 @@ public:
~DwarfDebug() override;
- void insertDIE(const MDNode *TypeMD, DIE *Die) {
- MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
- }
- DIE *getDIE(const MDNode *TypeMD) {
- return MDTypeNodeToDieMap.lookup(TypeMD);
- }
-
/// \brief Emit all Dwarf sections that should come prior to the
/// content.
void beginModule();
@@ -626,11 +544,15 @@ public:
/// Returns the section symbol for the .debug_loc section.
MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; }
- /// Returns the previous section that was emitted into.
- const MCSection *getPrevSection() const { return PrevSection; }
+ /// Returns the section symbol for the .debug_str section.
+ MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; }
+
+ /// Returns the section symbol for the .debug_ranges section.
+ MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; }
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
+ void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
/// Returns the entries for the .debug_loc section.
const SmallVectorImpl<DebugLocList> &
@@ -641,6 +563,13 @@ public:
/// \brief Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
+ /// \brief emit a single value for the debug loc section.
+ void emitDebugLocValue(ByteStreamer &Streamer,
+ const DebugLocEntry::Value &Value);
+ /// Emits an optimal (=sorted) sequence of DW_OP_pieces.
+ void emitLocPieces(ByteStreamer &Streamer,
+ const DITypeIdentifierMap &Map,
+ ArrayRef<DebugLocEntry::Value> Values);
/// Emit the location for a debug loc entry, including the size header.
void emitDebugLocEntryLocation(const DebugLocEntry &Entry);
@@ -674,6 +603,40 @@ public:
void addAccelNamespace(StringRef Name, const DIE &Die);
void addAccelType(StringRef Name, const DIE &Die, char Flags);
+
+ const MachineFunction *getCurrentFunction() const { return CurFn; }
+ const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; }
+ const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; }
+
+ iterator_range<ImportedEntityMap::const_iterator>
+ findImportedEntitiesForScope(const MDNode *Scope) const {
+ return make_range(std::equal_range(
+ ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
+ std::pair<const MDNode *, const MDNode *>(Scope, nullptr),
+ less_first()));
+ }
+
+ /// \brief A helper function to check whether the DIE for a given Scope is
+ /// going to be null.
+ bool isLexicalScopeDIENull(LexicalScope *Scope);
+
+ /// \brief Return Label preceding the instruction.
+ MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// \brief Return Label immediately following the instruction.
+ MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+ // FIXME: Consider rolling ranges up into DwarfDebug since we use a single
+ // range_base anyway, so there's no need to keep them as separate per-CU range
+ // lists. (though one day we might end up with a range.dwo section, in which
+ // case it'd go to DwarfFile)
+ unsigned getNextRangeNumber() { return GlobalRangeCount++; }
+
+ // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
+
+ SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
+ return ProcessedSPNodes;
+ }
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 0440fce..e8867c0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
-#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
#include "EHStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -81,39 +81,6 @@ public:
/// endFunction - Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
-
-class Win64Exception : public EHStreamer {
- /// shouldEmitPersonality - Per-function flag to indicate if personality
- /// info should be emitted.
- bool shouldEmitPersonality;
-
- /// shouldEmitLSDA - Per-function flag to indicate if the LSDA
- /// should be emitted.
- bool shouldEmitLSDA;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
-public:
- //===--------------------------------------------------------------------===//
- // Main entry points.
- //
- Win64Exception(AsmPrinter *A);
- virtual ~Win64Exception();
-
- /// endModule - Emit all exception information that should come after the
- /// content.
- void endModule() override;
-
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
- void beginFunction(const MachineFunction *MF) override;
-
- /// endFunction - Gather and emit post-function exception information.
- void endFunction(const MachineFunction *) override;
-};
-
} // End of namespace llvm
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 737ee54..50180ea 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -18,8 +18,9 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
-DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
- : Asm(AP), StrPool(DA, *Asm, Pref) {}
+DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
+ BumpPtrAllocator &DA)
+ : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {}
DwarfFile::~DwarfFile() {}
@@ -48,25 +49,18 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
// Emit the various dwarf units to the unit section USection with
// the abbreviations going into ASection.
-void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) {
+void DwarfFile::emitUnits(const MCSymbol *ASectionSym) {
for (const auto &TheU : CUs) {
DIE &Die = TheU->getUnitDie();
const MCSection *USection = TheU->getSection();
Asm->OutStreamer.SwitchSection(USection);
- // Emit the compile units header.
- Asm->OutStreamer.EmitLabel(TheU->getLabelBegin());
-
- // Emit size of content not including length itself
- Asm->OutStreamer.AddComment("Length of Unit");
- Asm->EmitInt32(TheU->getHeaderSize() + Die.getSize());
-
TheU->emitHeader(ASectionSym);
- DD->emitDIE(Die);
- Asm->OutStreamer.EmitLabel(TheU->getLabelEnd());
+ DD.emitDIE(Die);
}
}
+
// Compute the size and offset for each DIE.
void DwarfFile::computeSizeAndOffsets() {
// Offset from the first CU in the debug info section is 0 initially.
@@ -149,8 +143,44 @@ void DwarfFile::emitAbbrevs(const MCSection *Section) {
// Emit strings into a string section.
void DwarfFile::emitStrings(const MCSection *StrSection,
- const MCSection *OffsetSection,
- const MCSymbol *StrSecSym) {
- StrPool.emit(*Asm, StrSection, OffsetSection, StrSecSym);
+ const MCSection *OffsetSection) {
+ StrPool.emit(*Asm, StrSection, OffsetSection);
+}
+
+void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+ SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
+ DIVariable DV = Var->getVariable();
+ // Variables with positive arg numbers are parameters.
+ if (unsigned ArgNum = DV.getArgNumber()) {
+ // Keep all parameters in order at the start of the variable list to ensure
+ // function types are correct (no out-of-order parameters)
+ //
+ // This could be improved by only doing it for optimized builds (unoptimized
+ // builds have the right order to begin with), searching from the back (this
+ // would catch the unoptimized case quickly), or doing a binary search
+ // rather than linear search.
+ auto I = Vars.begin();
+ while (I != Vars.end()) {
+ unsigned CurNum = (*I)->getVariable().getArgNumber();
+ // A local (non-parameter) variable has been found, insert immediately
+ // before it.
+ if (CurNum == 0)
+ break;
+ // A later indexed parameter has been found, insert immediately before it.
+ if (CurNum > ArgNum)
+ break;
+ // FIXME: There are still some cases where two inlined functions are
+ // conflated together (two calls to the same function at the same
+ // location (eg: via a macro, or without column info, etc)) and then
+ // their arguments are conflated as well.
+ assert((LS->getParent() || CurNum != ArgNum) &&
+ "Duplicate argument for top level (non-inlined) function");
+ ++I;
+ }
+ Vars.insert(I, Var);
+ return;
+ }
+
+ Vars.push_back(Var);
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 3985eb2..9d64bfc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFFILE_H__
-#define CODEGEN_ASMPRINTER_DWARFFILE_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
@@ -24,10 +24,13 @@
namespace llvm {
class AsmPrinter;
+class DbgVariable;
class DwarfUnit;
class DIEAbbrev;
class MCSymbol;
class DIE;
+class DISubprogram;
+class LexicalScope;
class StringRef;
class DwarfDebug;
class MCSection;
@@ -35,6 +38,8 @@ class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
+ DwarfDebug &DD;
+
// Used to uniquely define abbreviations.
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -46,8 +51,20 @@ class DwarfFile {
DwarfStringPool StrPool;
+ // Collection of dbg variables of a scope.
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables;
+
+ // Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+ /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
+ /// be shared across CUs, that is why we keep the map here instead
+ /// of in DwarfCompileUnit.
+ DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
+
public:
- DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
+ DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
+ BumpPtrAllocator &DA);
~DwarfFile();
@@ -67,18 +84,34 @@ public:
/// \brief Emit all of the units to the section listed with the given
/// abbreviation section.
- void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym);
+ void emitUnits(const MCSymbol *ASectionSym);
/// \brief Emit a set of abbreviations to the specific section.
void emitAbbrevs(const MCSection *);
/// \brief Emit all of the strings to the section given.
void emitStrings(const MCSection *StrSection,
- const MCSection *OffsetSection = nullptr,
- const MCSymbol *StrSecSym = nullptr);
+ const MCSection *OffsetSection = nullptr);
/// \brief Returns the string pool.
DwarfStringPool &getStringPool() { return StrPool; }
+
+ void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() {
+ return ScopeVariables;
+ }
+
+ DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
+ return AbstractSPDies;
+ }
+
+ void insertDIE(const MDNode *TypeMD, DIE *Die) {
+ MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
+ }
+ DIE *getDIE(const MDNode *TypeMD) {
+ return MDTypeNodeToDieMap.lookup(TypeMD);
+ }
};
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 72cab60..d76b66c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -12,14 +12,11 @@
using namespace llvm;
-MCSymbol *DwarfStringPool::getSectionSymbol() { return SectionSymbol; }
-
static std::pair<MCSymbol *, unsigned> &
getEntry(AsmPrinter &Asm,
StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool,
StringRef Prefix, StringRef Str) {
- std::pair<MCSymbol *, unsigned> &Entry =
- Pool.GetOrCreateValue(Str).getValue();
+ std::pair<MCSymbol *, unsigned> &Entry = Pool[Str];
if (!Entry.first) {
Entry.second = Pool.size() - 1;
Entry.first = Asm.GetTempSymbol(Prefix, Entry.second);
@@ -36,8 +33,7 @@ unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) {
}
void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection,
- const MCSection *OffsetSection,
- const MCSymbol *StrSecSym) {
+ const MCSection *OffsetSection) {
if (Pool.empty())
return;
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index c1615fb..ab32c1b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_STRINGPOOL_H__
-#define CODEGEN_ASMPRINTER_STRINGPOOL_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -28,18 +28,13 @@ class StringRef;
class DwarfStringPool {
StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool;
StringRef Prefix;
- MCSymbol *SectionSymbol;
public:
DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix)
- : Pool(A), Prefix(Prefix), SectionSymbol(Asm.GetTempSymbol(Prefix)) {}
+ : Pool(A), Prefix(Prefix) {}
void emit(AsmPrinter &Asm, const MCSection *StrSection,
- const MCSection *OffsetSection = nullptr,
- const MCSymbol *StrSecSym = nullptr);
-
- /// \brief Returns the entry into the start of the pool.
- MCSymbol *getSectionSymbol();
+ const MCSection *OffsetSection = nullptr);
/// \brief Returns an entry into the string pool with the given
/// string text.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 9538bee..919d9d2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "DwarfUnit.h"
+
#include "DwarfAccelTable.h"
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/IR/Constants.h"
@@ -30,6 +32,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -44,20 +47,12 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
: UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A),
- DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr),
- Skeleton(nullptr) {
+ DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
assert(UnitTag == dwarf::DW_TAG_compile_unit ||
UnitTag == dwarf::DW_TAG_type_unit);
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
-DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
- AsmPrinter *A, DwarfDebug *DW,
- DwarfFile *DWU)
- : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU) {
- insertDIE(Node, &getUnitDie());
-}
-
DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
MCDwarfDwoLineTable *SplitLineTable)
@@ -146,7 +141,7 @@ static bool isShareableAcrossCUs(DIDescriptor D) {
/// will be kept in DwarfDebug for shareable DIEs.
DIE *DwarfUnit::getDIE(DIDescriptor D) const {
if (isShareableAcrossCUs(D))
- return DD->getDIE(D);
+ return DU->getDIE(D);
return MDNodeToDieMap.lookup(D);
}
@@ -155,7 +150,7 @@ DIE *DwarfUnit::getDIE(DIDescriptor D) const {
/// will be kept in DwarfDebug for shareable DIEs.
void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
if (isShareableAcrossCUs(Desc)) {
- DD->insertDIE(Desc, D);
+ DU->insertDIE(Desc, D);
return;
}
MDNodeToDieMap.insert(std::make_pair(Desc, D));
@@ -206,10 +201,14 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
/// table.
void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
-
- if (!DD->useSplitDwarf())
+ if (!isDwoUnit())
return addLocalString(Die, Attribute, String);
+ addIndexedString(Die, Attribute, String);
+}
+
+void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute,
+ StringRef String) {
unsigned idx = DU->getStringPool().getIndex(*Asm, String);
DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
@@ -224,31 +223,12 @@ void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute,
DIEValue *Value;
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
Value = new (DIEValueAllocator) DIELabel(Symb);
- else {
- MCSymbol *StringPool = DU->getStringPool().getSectionSymbol();
- Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
- }
+ else
+ Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym());
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
Die.addValue(Attribute, dwarf::DW_FORM_strp, Str);
}
-/// addExpr - Add a Dwarf expression attribute data and value.
-///
-void DwarfUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) {
- DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
- Die.addValue((dwarf::Attribute)0, Form, Value);
-}
-
-/// addLocationList - Add a Dwarf loclistptr attribute data and value.
-///
-void DwarfUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
- unsigned Index) {
- DIEValue *Value = new (DIEValueAllocator) DIELocList(Index);
- dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4;
- Die.addValue(Attribute, Form, Value);
-}
-
/// addLabel - Add a Dwarf label attribute data and value.
///
void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
@@ -261,16 +241,6 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
addLabel(Die, (dwarf::Attribute)0, Form, Label);
}
-/// addSectionLabel - Add a Dwarf section label attribute data and value.
-///
-void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label) {
- if (DD->getDwarfVersion() >= 4)
- addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label);
- else
- addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label);
-}
-
/// addSectionOffset - Add an offset into a section attribute data and value.
///
void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
@@ -281,45 +251,6 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
}
-/// addLabelAddress - Add a dwarf label attribute data and value using
-/// DW_FORM_addr or DW_FORM_GNU_addr_index.
-///
-void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label) {
-
- if (!DD->useSplitDwarf())
- return addLocalLabelAddress(Die, Attribute, Label);
-
- if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
- unsigned idx = DD->getAddressPool().getIndex(Label);
- DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
- Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
-}
-
-void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
- dwarf::Attribute Attribute,
- const MCSymbol *Label) {
- if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
- Die.addValue(Attribute, dwarf::DW_FORM_addr,
- Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label)
- : new (DIEValueAllocator) DIEInteger(0));
-}
-
-unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
- // If we print assembly, we can't separate .file entries according to
- // compile units. Thus all files will belong to the default compile unit.
-
- // FIXME: add a better feature test than hasRawTextSupport. Even better,
- // extend .file to support this.
- return Asm->OutStreamer.EmitDwarfFileDirective(
- 0, DirName, FileName,
- Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID());
-}
-
unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
return SplitLineTable ? SplitLineTable->getFile(DirName, FileName)
: getCU().getOrCreateSourceID(FileName, DirName);
@@ -339,16 +270,6 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
}
}
-/// addSectionDelta - Add a section label delta attribute data and value.
-///
-void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo) {
- DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
- Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Value);
-}
-
void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
@@ -477,22 +398,12 @@ void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
}
-/// addVariableAddress - Add DW_AT_location attribute for a
-/// DbgVariable based on provided MachineLocation.
-void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
- MachineLocation Location) {
- if (DV.variableHasComplexAddress())
- addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
- else if (DV.isBlockByrefVariable())
- addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
- else
- addAddress(Die, dwarf::DW_AT_location, Location,
- DV.getVariable().isIndirect());
-}
-
/// addRegisterOp - Add register operand.
-void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+// FIXME: Ideally, this would share the implementation with
+// AsmPrinter::EmitDwarfRegOpPiece.
+void DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+ unsigned SizeInBits, unsigned OffsetInBits) {
+ const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
int DWReg = RI->getDwarfRegNum(Reg, false);
bool isSubRegister = DWReg < 0;
@@ -511,7 +422,7 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
return;
}
- // Emit register
+ // Emit register.
if (DWReg < 32)
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
else {
@@ -519,18 +430,34 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
addUInt(TheDie, dwarf::DW_FORM_udata, DWReg);
}
- // Emit Mask
- if (isSubRegister) {
- unsigned Size = RI->getSubRegIdxSize(Idx);
- unsigned Offset = RI->getSubRegIdxOffset(Idx);
- if (Offset > 0) {
+ // Emit mask.
+ bool isPiece = SizeInBits > 0;
+ if (isSubRegister || isPiece) {
+ const unsigned SizeOfByte = 8;
+ unsigned RegSizeInBits = RI->getSubRegIdxSize(Idx);
+ unsigned RegOffsetInBits = RI->getSubRegIdxOffset(Idx);
+ unsigned PieceSizeInBits = std::max(SizeInBits, RegSizeInBits);
+ unsigned PieceOffsetInBits = OffsetInBits ? OffsetInBits : RegOffsetInBits;
+ assert(RegSizeInBits >= SizeInBits && "register smaller than value");
+
+ if (RegOffsetInBits != PieceOffsetInBits) {
+ // Manually shift the value into place, since the DW_OP_piece
+ // describes the part of the variable, not the position of the
+ // subregister.
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(TheDie, dwarf::DW_FORM_data1, RegOffsetInBits);
+ addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_shr);
+ }
+
+ if (PieceOffsetInBits > 0 || PieceSizeInBits % SizeOfByte) {
+ assert(PieceSizeInBits > 0 && "piece has zero size");
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece);
- addUInt(TheDie, dwarf::DW_FORM_data1, Size);
- addUInt(TheDie, dwarf::DW_FORM_data1, Offset);
- } else {
- unsigned ByteSize = Size / 8; // Assuming 8 bits per byte.
+ addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits);
+ addUInt(TheDie, dwarf::DW_FORM_data1, PieceOffsetInBits);
+ } else {
+ assert(PieceSizeInBits > 0 && "piece has zero size");
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece);
- addUInt(TheDie, dwarf::DW_FORM_data1, ByteSize);
+ addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits/SizeOfByte);
}
}
}
@@ -538,9 +465,9 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
/// addRegisterOffset - Add register offset.
void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
int64_t Offset) {
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
unsigned DWReg = RI->getDwarfRegNum(Reg, false);
- const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = Asm->TM.getSubtargetImpl()->getRegisterInfo();
if (Reg == TRI->getFrameRegister(*Asm->MF))
// If variable offset is based in frame register then use fbreg.
addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
@@ -553,63 +480,6 @@ void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
addSInt(TheDie, dwarf::DW_FORM_sdata, Offset);
}
-/// addAddress - Add an address attribute to a die based on the location
-/// provided.
-void DwarfUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
- const MachineLocation &Location, bool Indirect) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
-
- if (Location.isReg() && !Indirect)
- addRegisterOp(*Loc, Location.getReg());
- else {
- addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
- if (Indirect && !Location.isReg()) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- }
- }
-
- // Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
-}
-
-/// addComplexAddress - Start with the address based on the location provided,
-/// and generate the DWARF information necessary to find the actual variable
-/// given the extra address information encoded in the DbgVariable, starting
-/// from the starting location. Add the DWARF information to the die.
-///
-void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location) {
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- unsigned N = DV.getNumAddrElements();
- unsigned i = 0;
- if (Location.isReg()) {
- if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
- // If first address element is OpPlus then emit
- // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
- addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1));
- i = 2;
- } else
- addRegisterOp(*Loc, Location.getReg());
- } else
- addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
-
- for (; i < N; ++i) {
- uint64_t Element = DV.getAddrElement(i);
- if (Element == DIBuilder::OpPlus) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
- } else if (Element == DIBuilder::OpDeref) {
- if (!Location.isReg())
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- } else
- llvm_unreachable("unknown DIBuilder Opcode");
- }
-
- // Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
-}
-
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
gives the variable VarName either the struct, or a pointer to the struct, as
@@ -690,7 +560,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Find the __forwarding field and the variable field in the __Block_byref
// struct.
- DIArray Fields = blockStruct.getTypeArray();
+ DIArray Fields = blockStruct.getElements();
DIDerivedType varField;
DIDerivedType forwardingField;
@@ -712,7 +582,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
if (Location.isReg())
- addRegisterOp(*Loc, Location.getReg());
+ addRegisterOpPiece(*Loc, Location.getReg());
else
addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
@@ -1002,11 +872,9 @@ void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
DD->addAccelType(Ty.getName(), TyDIE, Flags);
- if ((!Context || Context.isCompileUnit() || Context.isFile() ||
- Context.isNameSpace()) &&
- getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly)
- GlobalTypes[getParentContextString(Context) + Ty.getName().str()] =
- &TyDIE;
+ if (!Context || Context.isCompileUnit() || Context.isFile() ||
+ Context.isNameSpace())
+ addGlobalType(Ty, TyDIE, Context);
}
}
@@ -1031,14 +899,6 @@ void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) {
addDIEEntry(Entity, Attribute, Entry);
}
-/// addGlobalName - Add a new global name to the compile unit.
-void DwarfUnit::addGlobalName(StringRef Name, DIE &Die, DIScope Context) {
- if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly)
- return;
- std::string FullName = getParentContextString(Context) + Name.str();
- GlobalNames[FullName] = &Die;
-}
-
/// getParentContextString - Walks the metadata parent chain in a language
/// specific manner (using the compile unit language) and returns
/// it as a string. This is done at the metadata level because DIEs may
@@ -1129,16 +989,16 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
}
/// constructSubprogramArguments - Construct function argument DIEs.
-void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) {
+void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
- DIDescriptor Ty = Args.getElement(i);
- if (Ty.isUnspecifiedParameter()) {
+ DIType Ty = resolve(Args.getElement(i));
+ if (!Ty) {
assert(i == N-1 && "Unspecified parameter must be the last argument");
createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
} else {
DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
- addType(Arg, DIType(Ty));
- if (DIType(Ty).isArtificial())
+ addType(Arg, Ty);
+ if (Ty.isArtificial())
addFlag(Arg, dwarf::DW_AT_artificial);
}
}
@@ -1161,14 +1021,14 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
break;
case dwarf::DW_TAG_subroutine_type: {
// Add return type. A void return won't have a type.
- DIArray Elements = CTy.getTypeArray();
- DIType RTy(Elements.getElement(0));
+ DITypeArray Elements = DISubroutineType(CTy).getTypeArray();
+ DIType RTy(resolve(Elements.getElement(0)));
if (RTy)
addType(Buffer, RTy);
bool isPrototyped = true;
if (Elements.getNumElements() == 2 &&
- Elements.getElement(1).isUnspecifiedParameter())
+ !Elements.getElement(1))
isPrototyped = false;
constructSubprogramArguments(Buffer, Elements);
@@ -1191,7 +1051,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_class_type: {
// Add elements to structure type.
- DIArray Elements = CTy.getTypeArray();
+ DIArray Elements = CTy.getElements();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
if (Element.isSubprogram())
@@ -1373,20 +1233,23 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
}
/// getOrCreateSubprogramDIE - Create new DIE using SP.
-DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE (as is the case for member function
// declarations).
- DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+ DIE *ContextDIE =
+ Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext()));
if (DIE *SPDie = getDIE(SP))
return SPDie;
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
- // Add subprogram definitions to the CU die directly.
- ContextDIE = &getUnitDie();
- // Build the decl now to ensure it precedes the definition.
- getOrCreateSubprogramDIE(SPDecl);
+ if (!Minimal) {
+ // Add subprogram definitions to the CU die directly.
+ ContextDIE = &getUnitDie();
+ // Build the decl now to ensure it precedes the definition.
+ getOrCreateSubprogramDIE(SPDecl);
+ }
}
// DW_TAG_inlined_subroutine may refer to this DIE.
@@ -1401,14 +1264,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
return &SPDie;
}
-void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) {
- DISubprogram SPDecl = SP.getFunctionDeclaration();
- DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
- applySubprogramAttributes(SP, SPDie);
- addGlobalName(SP.getName(), SPDie, Context);
-}
-
-void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
+bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
+ DIE &SPDie) {
DIE *DeclDie = nullptr;
StringRef DeclLinkageName;
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
@@ -1431,17 +1288,29 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
GlobalValue::getRealLinkageName(LinkageName));
- if (DeclDie) {
- // Refer to the function declaration where all the other attributes will be
- // found.
- addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
- return;
- }
+ if (!DeclDie)
+ return false;
+
+ // Refer to the function declaration where all the other attributes will be
+ // found.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
+ return true;
+}
+
+void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+ bool Minimal) {
+ if (!Minimal)
+ if (applySubprogramDefinitionAttributes(SP, SPDie))
+ return;
// Constructors and operators for anonymous aggregates do not have names.
if (!SP.getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP.getName());
+ // Skip the rest of the attributes under -gmlt to save space.
+ if (Minimal)
+ return;
+
addSourceLine(SPDie, SP);
// Add the prototype if we have a prototype and we have a C like
@@ -1452,15 +1321,15 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
- DICompositeType SPTy = SP.getType();
+ DISubroutineType SPTy = SP.getType();
assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type &&
"the type of a subprogram should be a subroutine");
- DIArray Args = SPTy.getTypeArray();
+ DITypeArray Args = SPTy.getTypeArray();
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
- if (Args.getElement(0))
- addType(SPDie, DIType(Args.getElement(0)));
+ if (resolve(Args.getElement(0)))
+ addType(SPDie, DIType(resolve(Args.getElement(0))));
unsigned VK = SP.getVirtuality();
if (VK) {
@@ -1506,7 +1375,7 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
else if (SP.isPrivate())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
- else
+ else if (SP.isPublic())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
@@ -1514,184 +1383,6 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
addFlag(SPDie, dwarf::DW_AT_explicit);
}
-void DwarfUnit::applyVariableAttributes(const DbgVariable &Var,
- DIE &VariableDie) {
- StringRef Name = Var.getName();
- if (!Name.empty())
- addString(VariableDie, dwarf::DW_AT_name, Name);
- addSourceLine(VariableDie, Var.getVariable());
- addType(VariableDie, Var.getType());
- if (Var.isArtificial())
- addFlag(VariableDie, dwarf::DW_AT_artificial);
-}
-
-// Return const expression if value is a GEP to access merged global
-// constant. e.g.
-// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
-static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
- const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
- if (!CE || CE->getNumOperands() != 3 ||
- CE->getOpcode() != Instruction::GetElementPtr)
- return nullptr;
-
- // First operand points to a global struct.
- Value *Ptr = CE->getOperand(0);
- if (!isa<GlobalValue>(Ptr) ||
- !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
- return nullptr;
-
- // Second operand is zero.
- const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
- if (!CI || !CI->isZero())
- return nullptr;
-
- // Third operand is offset.
- if (!isa<ConstantInt>(CE->getOperand(2)))
- return nullptr;
-
- return CE;
-}
-
-/// createGlobalVariableDIE - create global variable DIE.
-void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
- // Check for pre-existence.
- if (getDIE(GV))
- return;
-
- assert(GV.isGlobalVariable());
-
- DIScope GVContext = GV.getContext();
- DIType GTy = DD->resolve(GV.getType());
-
- // If this is a static data member definition, some attributes belong
- // to the declaration DIE.
- DIE *VariableDIE = nullptr;
- bool IsStaticMember = false;
- DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration();
- if (SDMDecl.Verify()) {
- assert(SDMDecl.isStaticMember() && "Expected static member decl");
- // We need the declaration DIE that is in the static member's class.
- VariableDIE = getOrCreateStaticMemberDIE(SDMDecl);
- IsStaticMember = true;
- }
-
- // If this is not a static data member definition, create the variable
- // DIE and add the initial set of attributes to it.
- if (!VariableDIE) {
- // Construct the context before querying for the existence of the DIE in
- // case such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(GVContext);
-
- // Add to map.
- VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
-
- // Add name and type.
- addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
- addType(*VariableDIE, GTy);
-
- // Add scoping info.
- if (!GV.isLocalToUnit())
- addFlag(*VariableDIE, dwarf::DW_AT_external);
-
- // Add line number info.
- addSourceLine(*VariableDIE, GV);
- }
-
- // Add location.
- bool addToAccelTable = false;
- DIE *VariableSpecDIE = nullptr;
- bool isGlobalVariable = GV.getGlobal() != nullptr;
- if (isGlobalVariable) {
- addToAccelTable = true;
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
- if (GV.getGlobal()->isThreadLocal()) {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
- } else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
- }
- // 3) followed by a custom OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
- } else {
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
- }
- // Do not create specification DIE if context is either compile unit
- // or a subprogram.
- if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
- !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) {
- // Create specification DIE.
- VariableSpecDIE = &createAndAddDIE(dwarf::DW_TAG_variable, UnitDie);
- addDIEEntry(*VariableSpecDIE, dwarf::DW_AT_specification, *VariableDIE);
- addBlock(*VariableSpecDIE, dwarf::DW_AT_location, Loc);
- // A static member's declaration is already flagged as such.
- if (!SDMDecl.Verify())
- addFlag(*VariableDIE, dwarf::DW_AT_declaration);
- } else {
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- }
- // Add the linkage name.
- StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty())
- // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
- // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
- // TAG_variable.
- addString(IsStaticMember && VariableSpecDIE ? *VariableSpecDIE
- : *VariableDIE,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
- : dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
- } else if (const ConstantInt *CI =
- dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
- // AT_const_value was added when the static member was created. To avoid
- // emitting AT_const_value multiple times, we only add AT_const_value when
- // it is not a static member.
- if (!IsStaticMember)
- addConstantValue(*VariableDIE, CI, GTy);
- } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) {
- addToAccelTable = true;
- // GV is a merged global.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- Value *Ptr = CE->getOperand(0);
- MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
- addUInt(*Loc, dwarf::DW_FORM_udata,
- Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- }
-
- if (addToAccelTable) {
- DIE &AddrDIE = VariableSpecDIE ? *VariableSpecDIE : *VariableDIE;
- DD->addAccelName(GV.getName(), AddrDIE);
-
- // If the linkage name is different than the name, go ahead and output
- // that as well into the name table.
- if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
- DD->addAccelName(GV.getLinkageName(), AddrDIE);
- }
-
- addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
- GV.getContext());
-}
-
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
@@ -1700,9 +1391,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
// The LowerBound value defines the lower bounds which is typically zero for
// C/C++. The Count value is the number of elements. Values are 64 bit. If
// Count == -1 then the array is unbounded and we do not emit
- // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and
- // Count == 0, then the array has zero elements in which case we do not emit
- // an upper bound.
+ // DW_AT_lower_bound and DW_AT_count attributes.
int64_t LowerBound = SR.getLo();
int64_t DefaultLowerBound = getDefaultLowerBound();
int64_t Count = SR.getCount();
@@ -1710,11 +1399,22 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
- if (Count != -1 && Count != 0)
+ if (Count != -1)
// FIXME: An unbounded array should reference the expression that defines
// the array.
- addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None,
- LowerBound + Count - 1);
+ addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
+}
+
+DIE *DwarfUnit::getIndexTyDie() {
+ if (IndexTyDie)
+ return IndexTyDie;
+ // Construct an integer type to use for indexes.
+ IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
+ addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype");
+ addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
+ addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_unsigned);
+ return IndexTyDie;
}
/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
@@ -1729,18 +1429,9 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
// FIXME: This type should be passed down from the front end
// as different languages may have different sizes for indexes.
DIE *IdxTy = getIndexTyDie();
- if (!IdxTy) {
- // Construct an integer type to use for indexes.
- IdxTy = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
- addString(*IdxTy, dwarf::DW_AT_name, "sizetype");
- addUInt(*IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
- addUInt(*IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- dwarf::DW_ATE_unsigned);
- setIndexTyDie(IdxTy);
- }
// Add subranges to array type.
- DIArray Elements = CTy.getTypeArray();
+ DIArray Elements = CTy.getElements();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
if (Element.getTag() == dwarf::DW_TAG_subrange_type)
@@ -1750,7 +1441,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
- DIArray Elements = CTy.getTypeArray();
+ DIArray Elements = CTy.getElements();
// Add enumerators to enumeration type.
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
@@ -1788,68 +1479,6 @@ void DwarfUnit::constructContainingTypeDIEs() {
}
}
-/// constructVariableDIE - Construct a DIE for the given DbgVariable.
-std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV,
- bool Abstract) {
- auto D = constructVariableDIEImpl(DV, Abstract);
- DV.setDIE(*D);
- return D;
-}
-
-std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV,
- bool Abstract) {
- // Define variable debug information entry.
- auto VariableDie = make_unique<DIE>(DV.getTag());
-
- if (Abstract) {
- applyVariableAttributes(DV, *VariableDie);
- return VariableDie;
- }
-
- // Add variable address.
-
- unsigned Offset = DV.getDotDebugLocOffset();
- if (Offset != ~0U) {
- addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
- return VariableDie;
- }
-
- // Check if variable is described by a DBG_VALUE instruction.
- if (const MachineInstr *DVInsn = DV.getMInsn()) {
- assert(DVInsn->getNumOperands() == 3);
- if (DVInsn->getOperand(0).isReg()) {
- const MachineOperand RegOp = DVInsn->getOperand(0);
- // If the second operand is an immediate, this is an indirect value.
- if (DVInsn->getOperand(1).isImm()) {
- MachineLocation Location(RegOp.getReg(),
- DVInsn->getOperand(1).getImm());
- addVariableAddress(DV, *VariableDie, Location);
- } else if (RegOp.getReg())
- addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
- } else if (DVInsn->getOperand(0).isImm())
- addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
- else if (DVInsn->getOperand(0).isFPImm())
- addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isCImm())
- addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
- DV.getType());
-
- return VariableDie;
- }
-
- // .. else use frame index.
- int FI = DV.getFrameIndex();
- if (FI != ~0) {
- unsigned FrameReg = 0;
- const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- MachineLocation Location(FrameReg, Offset);
- addVariableAddress(DV, *VariableDie, Location);
- }
-
- return VariableDie;
-}
-
/// constructMemberDIE - Construct member DIE from DIDerivedType.
void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer);
@@ -1922,7 +1551,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
- else
+ else if (DT.isPublic())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
@@ -1971,7 +1600,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
else if (DT.isPrivate())
addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
- else
+ else if (DT.isPublic())
addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
@@ -1984,6 +1613,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
}
void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
+ // Emit size of content not including length itself
+ Asm->OutStreamer.AddComment("Length of Unit");
+ Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
+
Asm->OutStreamer.AddComment("DWARF version number");
Asm->EmitInt16(DD->getDwarfVersion());
Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
@@ -1999,50 +1632,9 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
}
-void DwarfUnit::addRange(RangeSpan Range) {
- // Only add a range for this unit if we're emitting full debug.
- if (getCUNode().getEmissionKind() == DIBuilder::FullDebug) {
- // If we have no current ranges just add the range and return, otherwise,
- // check the current section and CU against the previous section and CU we
- // emitted into and the subprogram was contained within. If these are the
- // same then extend our current range, otherwise add this as a new range.
- if (CURanges.size() == 0 ||
- this != DD->getPrevCU() ||
- Asm->getCurrentSection() != DD->getPrevSection()) {
- CURanges.push_back(Range);
- return;
- }
-
- assert(&(CURanges.back().getEnd()->getSection()) ==
- &(Range.getEnd()->getSection()) &&
- "We can only append to a range in the same section!");
- CURanges.back().setEnd(Range.getEnd());
- }
-}
-
-void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
- // Define start line table label for each Compile Unit.
- MCSymbol *LineTableStartSym =
- Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
-
- stmtListIndex = UnitDie.getValues().size();
-
- // DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section. For split dwarf this is
- // left in the skeleton CU and so not included.
- // The line table entries are not always emitted in assembly, so it
- // is not okay to use line_table_start here.
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym);
- else
- addSectionDelta(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
- DwarfLineSectionSym);
-}
-
-void DwarfCompileUnit::applyStmtList(DIE &D) {
- D.addValue(dwarf::DW_AT_stmt_list,
- UnitDie.getAbbrev().getData()[stmtListIndex].getForm(),
- UnitDie.getValues()[stmtListIndex]);
+void DwarfUnit::initSection(const MCSection *Section) {
+ assert(!this->Section);
+ this->Section = Section;
}
void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
@@ -2055,16 +1647,8 @@ void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
sizeof(Ty->getOffset()));
}
-void DwarfTypeUnit::initSection(const MCSection *Section) {
- assert(!this->Section);
- this->Section = Section;
- // Since each type unit is contained in its own COMDAT section, the begin
- // label and the section label are the same. Using the begin label emission in
- // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but
- // the only other alternative of lazily constructing start-of-section labels
- // and storing a mapping in DwarfDebug (or AsmPrinter).
- this->SectionSym = this->LabelBegin =
- Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
- this->LabelEnd =
- Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
+bool DwarfTypeUnit::isDwoUnit() const {
+ // Since there are no skeleton type units, all type units are dwo type units
+ // when split DWARF is being used.
+ return DD->useSplitDwarf();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index b7b83b2..f40c937 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
-#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
#include "DIE.h"
#include "DwarfDebug.h"
@@ -55,7 +55,8 @@ private:
SmallVector<RangeSpan, 2> Ranges;
public:
- RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {}
+ RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges)
+ : RangeSym(Sym), Ranges(std::move(Ranges)) {}
MCSymbol *getSym() const { return RangeSym; }
const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
void addRange(RangeSpan Range) { Ranges.push_back(Range); }
@@ -96,12 +97,6 @@ protected:
/// descriptors to debug information entries using a DIEEntry proxy.
DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
- /// GlobalNames - A map of globally visible named entities for this unit.
- StringMap<const DIE *> GlobalNames;
-
- /// GlobalTypes - A map of globally visible types for this unit.
- StringMap<const DIE *> GlobalTypes;
-
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -113,13 +108,6 @@ protected:
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
- // List of ranges for a given compile unit.
- SmallVector<RangeSpan, 1> CURanges;
-
- // List of range lists for a given compile unit, separate from the ranges for
- // the CU itself.
- SmallVector<RangeSpanList, 1> CURangeLists;
-
// DIEValueAllocator - All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
@@ -129,86 +117,31 @@ protected:
/// The section this unit will be emitted in.
const MCSection *Section;
- /// A label at the start of the non-dwo section related to this unit.
- MCSymbol *SectionSym;
+ DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU);
- /// The start of the unit within its section.
- MCSymbol *LabelBegin;
+ void initSection(const MCSection *Section);
- /// The end of the unit within its section.
- MCSymbol *LabelEnd;
+ /// Add a string attribute data and value.
+ void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
- /// Skeleton unit associated with this unit.
- DwarfUnit *Skeleton;
+ void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
- DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU);
+ bool applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie);
public:
virtual ~DwarfUnit();
- /// Set the skeleton unit associated with this unit.
- void setSkeleton(DwarfUnit &Skel) { Skeleton = &Skel; }
-
- /// Get the skeleton unit associated with this unit.
- DwarfUnit *getSkeleton() const { return Skeleton; }
-
- /// Pass in the SectionSym even though we could recreate it in every compile
- /// unit (type units will have actually distinct symbols once they're in
- /// comdat sections).
- void initSection(const MCSection *Section, MCSymbol *SectionSym) {
- assert(!this->Section);
- this->Section = Section;
- this->SectionSym = SectionSym;
- this->LabelBegin =
- Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
- this->LabelEnd =
- Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
- }
-
const MCSection *getSection() const {
assert(Section);
return Section;
}
- /// If there's a skeleton then return the section symbol for the skeleton
- /// unit, otherwise return the section symbol for this unit.
- MCSymbol *getLocalSectionSym() const {
- if (Skeleton)
- return Skeleton->getSectionSym();
- return getSectionSym();
- }
-
- MCSymbol *getSectionSym() const {
- assert(Section);
- return SectionSym;
- }
-
- /// If there's a skeleton then return the begin label for the skeleton unit,
- /// otherwise return the local label for this unit.
- MCSymbol *getLocalLabelBegin() const {
- if (Skeleton)
- return Skeleton->getLabelBegin();
- return getLabelBegin();
- }
-
- MCSymbol *getLabelBegin() const {
- assert(Section);
- return LabelBegin;
- }
-
- MCSymbol *getLabelEnd() const {
- assert(Section);
- return LabelEnd;
- }
-
// Accessors.
unsigned getUniqueID() const { return UniqueID; }
uint16_t getLanguage() const { return CUNode.getLanguage(); }
DICompileUnit getCUNode() const { return CUNode; }
DIE &getUnitDie() { return UnitDie; }
- const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
- const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
@@ -216,29 +149,15 @@ public:
/// hasContent - Return true if this compile unit has something to write out.
bool hasContent() const { return !UnitDie.getChildren().empty(); }
- /// addRange - Add an address range to the list of ranges for this unit.
- void addRange(RangeSpan Range);
-
- /// getRanges - Get the list of ranges for this unit.
- const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
- SmallVectorImpl<RangeSpan> &getRanges() { return CURanges; }
-
- /// addRangeList - Add an address range list to the list of range lists.
- void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); }
-
- /// getRangeLists - Get the vector of range lists.
- const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
- return CURangeLists;
- }
- SmallVectorImpl<RangeSpanList> &getRangeLists() { return CURangeLists; }
-
/// getParentContextString - Get a string containing the language specific
/// context for a global name.
std::string getParentContextString(DIScope Context) const;
- /// addGlobalName - Add a new global entity to the compile unit.
- ///
- void addGlobalName(StringRef Name, DIE &Die, DIScope Context);
+ /// Add a new global name to the compile unit.
+ virtual void addGlobalName(StringRef Name, DIE &Die, DIScope Context) {}
+
+ /// Add a new global type to the compile unit.
+ virtual void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) {}
/// addAccelNamespace - Add a new name to the namespace accelerator table.
void addAccelNamespace(StringRef Name, const DIE &Die);
@@ -275,14 +194,7 @@ public:
void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
/// addString - Add a string attribute data and value.
- void addString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str);
-
- /// addLocalString - Add a string attribute data and value.
- void addLocalString(DIE &Die, dwarf::Attribute Attribute,
- const StringRef Str);
-
- /// addExpr - Add a Dwarf expression attribute data and value.
- void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+ void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
/// addLabel - Add a Dwarf label attribute data and value.
void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
@@ -290,14 +202,6 @@ public:
void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
- /// addLocationList - Add a Dwarf loclistptr attribute data and value.
- void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
-
- /// addSectionLabel - Add a Dwarf section label attribute data and value.
- ///
- void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label);
-
/// addSectionOffset - Add an offset into a section attribute data and value.
///
void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
@@ -306,10 +210,6 @@ public:
/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
void addOpAddress(DIELoc &Die, const MCSymbol *Label);
- /// addSectionDelta - Add a label delta attribute data and value.
- void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
- const MCSymbol *Lo);
-
/// addLabelDelta - Add a label delta attribute data and value.
void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
const MCSymbol *Lo);
@@ -339,11 +239,6 @@ public:
void addSourceLine(DIE &Die, DINameSpace NS);
void addSourceLine(DIE &Die, DIObjCProperty Ty);
- /// addAddress - Add an address attribute to a die based on the location
- /// provided.
- void addAddress(DIE &Die, dwarf::Attribute Attribute,
- const MachineLocation &Location, bool Indirect = false);
-
/// addConstantValue - Add constant value entry in variable DIE.
void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty);
void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty);
@@ -359,19 +254,12 @@ public:
void addTemplateParams(DIE &Buffer, DIArray TParams);
/// addRegisterOp - Add register operand.
- void addRegisterOp(DIELoc &TheDie, unsigned Reg);
+ void addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+ unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
/// addRegisterOffset - Add register offset.
void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
- /// addComplexAddress - Start with the address based on the location provided,
- /// and generate the DWARF information necessary to find the actual variable
- /// (navigating the extra location information encoded in the type) based on
- /// the starting location. Add the DWARF information to the die.
- void addComplexAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location);
-
// FIXME: Should be reformulated in terms of addComplexAddress.
/// addBlockByrefAddress - Start with the address based on the location
/// provided, and generate the DWARF information necessary to find the
@@ -382,11 +270,6 @@ public:
dwarf::Attribute Attribute,
const MachineLocation &Location);
- /// addVariableAddress - Add DW_AT_location attribute for a
- /// DbgVariable based on provided MachineLocation.
- void addVariableAddress(const DbgVariable &DV, DIE &Die,
- MachineLocation Location);
-
/// addType - Add a new type attribute to the specified entity. This takes
/// and attribute parameter because DW_AT_friend attributes are also
/// type references.
@@ -397,11 +280,10 @@ public:
DIE *getOrCreateNameSpace(DINameSpace NS);
/// getOrCreateSubprogramDIE - Create new DIE using SP.
- DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+ DIE *getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal = false);
- void applySubprogramAttributes(DISubprogram SP, DIE &SPDie);
- void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
- void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
+ void applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
+ bool Minimal = false);
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
@@ -417,12 +299,8 @@ public:
/// vtables.
void constructContainingTypeDIEs();
- /// constructVariableDIE - Construct a DIE for the given DbgVariable.
- std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
- bool Abstract = false);
-
/// constructSubprogramArguments - Construct function argument DIEs.
- void constructSubprogramArguments(DIE &Buffer, DIArray Args);
+ void constructSubprogramArguments(DIE &Buffer, DITypeArray Args);
/// Create a DIE with the given Tag, add the DIE to its parent, and
/// call insertDIE if MD is not null.
@@ -453,12 +331,13 @@ protected:
/// none currently exists, create a new ID and insert it in the line table.
virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
-private:
- /// \brief Construct a DIE for the given DbgVariable without initializing the
- /// DbgVariable's DIE reference.
- std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV,
- bool Abstract);
+ /// resolve - Look in the DwarfDebug map for the MDNode that
+ /// corresponds to the reference.
+ template <typename T> T resolve(DIRef<T> Ref) const {
+ return DD->resolve(Ref);
+ }
+private:
/// constructTypeDIE - Construct basic type die from DIBasicType.
void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
@@ -503,7 +382,7 @@ private:
}
// getIndexTyDie - Get an anonymous type for index type.
- DIE *getIndexTyDie() { return IndexTyDie; }
+ DIE *getIndexTyDie();
// setIndexTyDie - Set D as anonymous type for index which can be reused
// later.
@@ -513,56 +392,22 @@ private:
/// information entry.
DIEEntry *createDIEEntry(DIE &Entry);
- /// resolve - Look in the DwarfDebug map for the MDNode that
- /// corresponds to the reference.
- template <typename T> T resolve(DIRef<T> Ref) const {
- return DD->resolve(Ref);
- }
-
/// If this is a named finished type then include it in the list of types for
/// the accelerator tables.
void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE);
-};
-
-class DwarfCompileUnit : public DwarfUnit {
- /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
- /// the need to search for it in applyStmtList.
- unsigned stmtListIndex;
-
-public:
- DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU);
-
- void initStmtList(MCSymbol *DwarfLineSectionSym);
-
- /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
- void applyStmtList(DIE &D);
-
- /// createGlobalVariableDIE - create global variable DIE.
- void createGlobalVariableDIE(DIGlobalVariable GV);
-
- /// addLabelAddress - Add a dwarf label attribute data and value using
- /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
- void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label);
-
- /// addLocalLabelAddress - Add a dwarf label attribute data and value using
- /// DW_FORM_addr only.
- void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label);
- DwarfCompileUnit &getCU() override { return *this; }
-
- unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+ virtual bool isDwoUnit() const = 0;
};
class DwarfTypeUnit : public DwarfUnit {
-private:
uint64_t TypeSignature;
const DIE *Ty;
DwarfCompileUnit &CU;
MCDwarfDwoLineTable *SplitLineTable;
+ unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override;
+ bool isDwoUnit() const override;
+
public:
DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -578,11 +423,8 @@ public:
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
sizeof(uint32_t); // Type DIE Offset
}
- void initSection(const MCSection *Section);
+ using DwarfUnit::initSection;
DwarfCompileUnit &getCU() override { return CU; }
-
-protected:
- unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override;
};
} // end llvm namespace
#endif
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 73f62bf..2bbffb3 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -237,7 +237,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// instruction between the previous try-range and this one may throw,
// create a call-site entry with no landing pad for the region between the
// try-ranges.
- if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) {
CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
CallSites.push_back(Site);
PreviousIsInvoke = false;
@@ -259,7 +259,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
};
// Try to merge with the previous call-site. SJLJ doesn't do this
- if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+ if (PreviousIsInvoke && Asm->MAI->usesItaniumLSDAForExceptions()) {
CallSiteEntry &Prev = CallSites.back();
if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
// Extend the range of the previous entry.
@@ -269,7 +269,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
}
// Otherwise, create a new call-site.
- if (Asm->MAI->isExceptionHandlingDwarf())
+ if (Asm->MAI->usesItaniumLSDAForExceptions())
CallSites.push_back(Site);
else {
// SjLj EH must maintain the call sites in the order assigned
@@ -287,7 +287,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+ if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) {
CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
CallSites.push_back(Site);
}
@@ -314,7 +314,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
/// 3. Type ID table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reverse indexed base 1.
void EHStreamer::emitExceptionTable() {
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
@@ -520,7 +520,7 @@ void EHStreamer::emitExceptionTable() {
}
} else {
// DWARF Exception handling
- assert(Asm->MAI->isExceptionHandlingDwarf());
+ assert(Asm->MAI->usesItaniumLSDAForExceptions());
// The call-site table is a list of all call sites that may throw an
// exception (including C++ 'throw' statements) in the procedure
@@ -649,7 +649,7 @@ void EHStreamer::emitExceptionTable() {
}
void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
@@ -662,9 +662,9 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
Entry = TypeInfos.size();
}
- for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ for (std::vector<const GlobalValue *>::const_reverse_iterator
I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
- const GlobalVariable *GV = *I;
+ const GlobalValue *GV = *I;
if (VerboseAsm)
Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 2b6ba78..7e9549d 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
-#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
#include "AsmPrinterHandler.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index bfcbe6b..5bda5a9 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -28,6 +28,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -50,7 +51,8 @@ void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
MCStreamer &OS = AP.OutStreamer;
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
// Put this in a custom .note section.
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 5a9ecd7..6480d048 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
@@ -91,7 +92,8 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
/// either condition is detected in a function which uses the GC.
///
void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
- unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+ unsigned IntPtrSize =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 81285d5..0f0ad75 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DwarfException.h"
+#include "Win64Exception.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h
new file mode 100644
index 0000000..538e132
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -0,0 +1,52 @@
+//===-- Win64Exception.h - Windows Exception Handling ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing windows exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+
+#include "EHStreamer.h"
+
+namespace llvm {
+class MachineFunction;
+
+class Win64Exception : public EHStreamer {
+ /// Per-function flag to indicate if personality info should be emitted.
+ bool shouldEmitPersonality;
+
+ /// Per-function flag to indicate if the LSDA should be emitted.
+ bool shouldEmitLSDA;
+
+ /// Per-function flag to indicate if frame moves info should be emitted.
+ bool shouldEmitMoves;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ Win64Exception(AsmPrinter *A);
+ virtual ~Win64Exception();
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+};
+}
+
+#endif
+
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 6a5c431..b5e0929 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -116,15 +116,67 @@ WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
Asm = AP;
}
+void WinCodeViewLineTables::endModule() {
+ if (FnDebugInfo.empty())
+ return;
+
+ assert(Asm != nullptr);
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
+ Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
+
+ // The COFF .debug$S section consists of several subsections, each starting
+ // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
+ // of the payload followed by the payload itself. The subsections are 4-byte
+ // aligned.
+
+ // Emit per-function debug information. This code is extracted into a
+ // separate function for readability.
+ for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
+ emitDebugInfoForFunction(VisitedFunctions[I]);
+
+ // This subsection holds a file index to offset in string table table.
+ Asm->OutStreamer.AddComment("File index to string table offset subsection");
+ Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
+ size_t NumFilenames = FileNameRegistry.Infos.size();
+ Asm->EmitInt32(8 * NumFilenames);
+ for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
+ StringRef Filename = FileNameRegistry.Filenames[I];
+ // For each unique filename, just write its offset in the string table.
+ Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
+ // The function name offset is not followed by any additional data.
+ Asm->EmitInt32(0);
+ }
+
+ // This subsection holds the string table.
+ Asm->OutStreamer.AddComment("String table");
+ Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
+ Asm->EmitInt32(FileNameRegistry.LastOffset);
+ // The payload starts with a null character.
+ Asm->EmitInt8(0);
+
+ for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
+ // Just emit unique filenames one by one, separated by a null character.
+ Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
+ Asm->EmitInt8(0);
+ }
+
+ // No more subsections. Fill with zeros to align the end of the section by 4.
+ Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
+
+ clear();
+}
+
static void EmitLabelDiff(MCStreamer &Streamer,
- const MCSymbol *From, const MCSymbol *To) {
+ const MCSymbol *From, const MCSymbol *To,
+ unsigned int Size = 4) {
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
MCContext &Context = Streamer.getContext();
const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context),
*ToRef = MCSymbolRefExpr::Create(To, Variant, Context);
const MCExpr *AddrDelta =
MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
- Streamer.EmitValue(AddrDelta, 4);
+ Streamer.EmitValue(AddrDelta, Size);
}
void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
@@ -138,6 +190,51 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
return;
assert(FI.End && "Don't know where the function ends?");
+ StringRef FuncName = getDISubprogram(GV).getDisplayName(),
+ GVName = GV->getName();
+ // FIXME Clang currently sets DisplayName to "bar" for a C++
+ // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying
+ // to demangle display names anyways, so let's just put a mangled name into
+ // the symbols subsection until Clang gives us what we need.
+ if (GVName.startswith("\01?"))
+ FuncName = GVName.substr(1);
+ // Emit a symbol subsection, required by VS2012+ to find function boundaries.
+ MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(),
+ *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName));
+ Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION);
+ EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
+ Asm->OutStreamer.EmitLabel(SymbolsBegin);
+ {
+ MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(),
+ *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
+ EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
+ Asm->OutStreamer.EmitLabel(ProcSegmentBegin);
+
+ Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START);
+ // Some bytes of this segment don't seem to be required for basic debugging,
+ // so just fill them with zeroes.
+ Asm->OutStreamer.EmitFill(12, 0);
+ // This is the important bit that tells the debugger where the function
+ // code is located and what's its size:
+ EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
+ Asm->OutStreamer.EmitFill(12, 0);
+ Asm->OutStreamer.EmitCOFFSecRel32(Fn);
+ Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+ Asm->EmitInt8(0);
+ // Emit the function display name as a null-terminated string.
+ Asm->OutStreamer.EmitBytes(FuncName);
+ Asm->EmitInt8(0);
+ Asm->OutStreamer.EmitLabel(ProcSegmentEnd);
+
+ // We're done with this function.
+ Asm->EmitInt16(0x0002);
+ Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END);
+ }
+ Asm->OutStreamer.EmitLabel(SymbolsEnd);
+ // Every subsection must be aligned to a 4-byte boundary.
+ Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0);
+
// PCs/Instructions are grouped into segments sharing the same filename.
// Pre-calculate the lengths (in instructions) of these segments and store
// them in a map for convenience. Each index in the map is the sequential
@@ -154,18 +251,19 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
}
FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
- // Emit the control code of the subsection followed by the payload size.
- Asm->OutStreamer.AddComment(
- "Linetable subsection for " + Twine(Fn->getName()));
+ // Emit a line table subsection, requred to do PC-to-file:line lookup.
+ Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName));
Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
- MCSymbol *SubsectionBegin = Asm->MMI->getContext().CreateTempSymbol(),
- *SubsectionEnd = Asm->MMI->getContext().CreateTempSymbol();
- EmitLabelDiff(Asm->OutStreamer, SubsectionBegin, SubsectionEnd);
- Asm->OutStreamer.EmitLabel(SubsectionBegin);
+ MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(),
+ *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol();
+ EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd);
+ Asm->OutStreamer.EmitLabel(LineTableBegin);
// Identify the function this subsection is for.
Asm->OutStreamer.EmitCOFFSecRel32(Fn);
Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+ // Insert padding after a 16-bit section index.
+ Asm->EmitInt16(0);
// Length of the function's code, in bytes.
EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
@@ -209,56 +307,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
if (FileSegmentEnd)
Asm->OutStreamer.EmitLabel(FileSegmentEnd);
- Asm->OutStreamer.EmitLabel(SubsectionEnd);
-}
-
-void WinCodeViewLineTables::endModule() {
- if (FnDebugInfo.empty())
- return;
-
- assert(Asm != nullptr);
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
- Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
-
- // The COFF .debug$S section consists of several subsections, each starting
- // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
- // of the payload followed by the payload itself. The subsections are 4-byte
- // aligned.
-
- for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
- emitDebugInfoForFunction(VisitedFunctions[I]);
-
- // This subsection holds a file index to offset in string table table.
- Asm->OutStreamer.AddComment("File index to string table offset subsection");
- Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
- size_t NumFilenames = FileNameRegistry.Infos.size();
- Asm->EmitInt32(8 * NumFilenames);
- for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
- StringRef Filename = FileNameRegistry.Filenames[I];
- // For each unique filename, just write it's offset in the string table.
- Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
- // The function name offset is not followed by any additional data.
- Asm->EmitInt32(0);
- }
-
- // This subsection holds the string table.
- Asm->OutStreamer.AddComment("String table");
- Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
- Asm->EmitInt32(FileNameRegistry.LastOffset);
- // The payload starts with a null character.
- Asm->EmitInt8(0);
-
- for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
- // Just emit unique filenames one by one, separated by a null character.
- Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
- Asm->EmitInt8(0);
- }
-
- // No more subsections. Fill with zeros to align the end of the section by 4.
- Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
-
- clear();
+ Asm->OutStreamer.EmitLabel(LineTableEnd);
}
void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 0734d97..8492eac 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__
-#define CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H
#include "AsmPrinterHandler.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
deleted file mode 100644
index 421946d..0000000
--- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
+++ /dev/null
@@ -1,380 +0,0 @@
-//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass (at IR level) to replace atomic instructions with
-// appropriate (intrinsic-based) ldrex/strex loops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "arm-atomic-expand"
-
-namespace {
- class AtomicExpandLoadLinked : public FunctionPass {
- const TargetMachine *TM;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {
- initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
- bool expandAtomicInsts(Function &F);
-
- bool expandAtomicLoad(LoadInst *LI);
- bool expandAtomicStore(StoreInst *LI);
- bool expandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
- AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
- void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
- };
-}
-
-char AtomicExpandLoadLinked::ID = 0;
-char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
-INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
- "Expand Atomic calls in terms of load-linked & store-conditional",
- false, false)
-
-FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
- return new AtomicExpandLoadLinked(TM);
-}
-
-bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
- if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
- return false;
-
- SmallVector<Instruction *, 1> AtomicInsts;
-
- // Changing control-flow while iterating through it is a bad idea, so gather a
- // list of all atomic instructions before we start.
- for (BasicBlock &BB : F)
- for (Instruction &Inst : BB) {
- if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
- (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
- (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
- AtomicInsts.push_back(&Inst);
- }
-
- bool MadeChange = false;
- for (Instruction *Inst : AtomicInsts) {
- if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst))
- continue;
-
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
- MadeChange |= expandAtomicRMW(AI);
- else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
- MadeChange |= expandAtomicCmpXchg(CI);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- MadeChange |= expandAtomicLoad(LI);
- else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- MadeChange |= expandAtomicStore(SI);
- else
- llvm_unreachable("Unknown atomic instruction");
- }
-
- return MadeChange;
-}
-
-bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
- // Load instructions don't actually need a leading fence, even in the
- // SequentiallyConsistent case.
- AtomicOrdering MemOpOrder =
- TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic
- : LI->getOrdering();
-
- // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
- // an ldrexd (A3.5.3).
- IRBuilder<> Builder(LI);
- Value *Val = TM->getTargetLowering()->emitLoadLinked(
- Builder, LI->getPointerOperand(), MemOpOrder);
-
- insertTrailingFence(Builder, LI->getOrdering());
-
- LI->replaceAllUsesWith(Val);
- LI->eraseFromParent();
-
- return true;
-}
-
-bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
- // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
- // we need a loop and the entire instruction is essentially an "atomicrmw
- // xchg" that ignores the value loaded.
- IRBuilder<> Builder(SI);
- AtomicRMWInst *AI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
- SI->getValueOperand(), SI->getOrdering());
- SI->eraseFromParent();
-
- // Now we have an appropriate swap instruction, lower it as usual.
- return expandAtomicRMW(AI);
-}
-
-bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
- AtomicOrdering Order = AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // fence?
- // atomicrmw.start:
- // %loaded = @load.linked(%addr)
- // %new = some_op iN %loaded, %incr
- // %stored = @store_conditional(%new, %addr)
- // %try_again = icmp i32 ne %stored, 0
- // br i1 %try_again, label %loop, label %atomicrmw.end
- // atomicrmw.end:
- // fence?
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we might want a fence too. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded =
- TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
-
- Value *NewVal;
- switch (AI->getOperation()) {
- case AtomicRMWInst::Xchg:
- NewVal = AI->getValOperand();
- break;
- case AtomicRMWInst::Add:
- NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Sub:
- NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::And:
- NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Nand:
- NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
- "new");
- break;
- case AtomicRMWInst::Or:
- NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Xor:
- NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- default:
- llvm_unreachable("Unknown atomic op");
- }
-
- Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
- Builder, NewVal, Addr, MemOpOrder);
- Value *TryAgain = Builder.CreateICmpNE(
- StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
- Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
-
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- insertTrailingFence(Builder, Order);
-
- AI->replaceAllUsesWith(Loaded);
- AI->eraseFromParent();
-
- return true;
-}
-
-bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
- AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
- AtomicOrdering FailureOrder = CI->getFailureOrdering();
- Value *Addr = CI->getPointerOperand();
- BasicBlock *BB = CI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
- //
- // The full expansion we produce is:
- // [...]
- // fence?
- // cmpxchg.start:
- // %loaded = @load.linked(%addr)
- // %should_store = icmp eq %loaded, %desired
- // br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.failure
- // cmpxchg.trystore:
- // %stored = @store_conditional(%new, %addr)
- // %success = icmp eq i32 %stored, 0
- // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
- // cmpxchg.success:
- // fence?
- // br label %cmpxchg.end
- // cmpxchg.failure:
- // fence?
- // br label %cmpxchg.end
- // cmpxchg.end:
- // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
- // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
- // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
- auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
- auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
- auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
-
- // This grabs the DebugLoc from CI
- IRBuilder<> Builder(CI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we might want a fence too. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded =
- TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
- Value *ShouldStore =
- Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
-
- // If the the cmpxchg doesn't actually need any ordering when it fails, we can
- // jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
-
- Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
- Builder, CI->getNewValOperand(), Addr, MemOpOrder);
- StoreSuccess = Builder.CreateICmpEQ(
- StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
- Builder.CreateCondBr(StoreSuccess, SuccessBB,
- CI->isWeak() ? FailureBB : LoopBB);
-
- // Make sure later instructions don't get reordered with a fence if necessary.
- Builder.SetInsertPoint(SuccessBB);
- insertTrailingFence(Builder, SuccessOrder);
- Builder.CreateBr(ExitBB);
-
- Builder.SetInsertPoint(FailureBB);
- insertTrailingFence(Builder, FailureOrder);
- Builder.CreateBr(ExitBB);
-
- // Finally, we have control-flow based knowledge of whether the cmpxchg
- // succeeded or not. We expose this to later passes by converting any
- // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
-
- // Setup the builder so we can create any PHIs we need.
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
- Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
- Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
-
- // Look for any users of the cmpxchg that are just comparing the loaded value
- // against the desired one, and replace them with the CFG-derived version.
- SmallVector<ExtractValueInst *, 2> PrunedInsts;
- for (auto User : CI->users()) {
- ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
- if (!EV)
- continue;
-
- assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
- "weird extraction from { iN, i1 }");
-
- if (EV->getIndices()[0] == 0)
- EV->replaceAllUsesWith(Loaded);
- else
- EV->replaceAllUsesWith(Success);
-
- PrunedInsts.push_back(EV);
- }
-
- // We can remove the instructions now we're no longer iterating through them.
- for (auto EV : PrunedInsts)
- EV->eraseFromParent();
-
- if (!CI->use_empty()) {
- // Some use of the full struct return that we don't understand has happened,
- // so we've got to reconstruct it properly.
- Value *Res;
- Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
- Res = Builder.CreateInsertValue(Res, Success, 1);
-
- CI->replaceAllUsesWith(Res);
- }
-
- CI->eraseFromParent();
- return true;
-}
-
-AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TM->getTargetLowering()->getInsertFencesForAtomic())
- return Ord;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- Builder.CreateFence(Release);
-
- // The exclusive operations don't need any barrier if we're adding separate
- // fences.
- return Monotonic;
-}
-
-void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TM->getTargetLowering()->getInsertFencesForAtomic())
- return;
-
- if (Ord == Acquire || Ord == AcquireRelease)
- Builder.CreateFence(Acquire);
- else if (Ord == SequentiallyConsistent)
- Builder.CreateFence(SequentiallyConsistent);
-}
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
new file mode 100644
index 0000000..12f6bd7
--- /dev/null
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -0,0 +1,563 @@
+//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions with
+// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "atomic-expand"
+
+namespace {
+ class AtomicExpand: public FunctionPass {
+ const TargetMachine *TM;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit AtomicExpand(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {
+ initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ private:
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad);
+ bool expandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ bool expandAtomicStore(StoreInst *SI);
+ bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
+ bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *AI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *AI);
+ };
+}
+
+char AtomicExpand::ID = 0;
+char &llvm::AtomicExpandID = AtomicExpand::ID;
+INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand",
+ "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg",
+ false, false)
+
+FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
+ return new AtomicExpand(TM);
+}
+
+bool AtomicExpand::runOnFunction(Function &F) {
+ if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
+ return false;
+ auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
+
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (I->isAtomic())
+ AtomicInsts.push_back(&*I);
+ }
+
+ bool MadeChange = false;
+ for (auto I : AtomicInsts) {
+ auto LI = dyn_cast<LoadInst>(I);
+ auto SI = dyn_cast<StoreInst>(I);
+ auto RMWI = dyn_cast<AtomicRMWInst>(I);
+ auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
+ assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) &&
+ "Unknown atomic instruction");
+
+ auto FenceOrdering = Monotonic;
+ bool IsStore, IsLoad;
+ if (TargetLowering->getInsertFencesForAtomic()) {
+ if (LI && isAtLeastAcquire(LI->getOrdering())) {
+ FenceOrdering = LI->getOrdering();
+ LI->setOrdering(Monotonic);
+ IsStore = false;
+ IsLoad = true;
+ } else if (SI && isAtLeastRelease(SI->getOrdering())) {
+ FenceOrdering = SI->getOrdering();
+ SI->setOrdering(Monotonic);
+ IsStore = true;
+ IsLoad = false;
+ } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) ||
+ isAtLeastAcquire(RMWI->getOrdering()))) {
+ FenceOrdering = RMWI->getOrdering();
+ RMWI->setOrdering(Monotonic);
+ IsStore = IsLoad = true;
+ } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
+ (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+ isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+ // If a compare and swap is lowered to LL/SC, we can do smarter fence
+ // insertion, with a stronger one on the success path than on the
+ // failure path. As a result, fence insertion is directly done by
+ // expandAtomicCmpXchg in that case.
+ FenceOrdering = CASI->getSuccessOrdering();
+ CASI->setSuccessOrdering(Monotonic);
+ CASI->setFailureOrdering(Monotonic);
+ IsStore = IsLoad = true;
+ }
+
+ if (FenceOrdering != Monotonic) {
+ MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
+ }
+ }
+
+ if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
+ MadeChange |= expandAtomicLoad(LI);
+ } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
+ MadeChange |= expandAtomicStore(SI);
+ } else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+ MadeChange |= (isIdempotentRMW(RMWI) &&
+ simplifyIdempotentRMW(RMWI)) ||
+ (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) &&
+ expandAtomicRMW(RMWI));
+ } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
+ MadeChange |= expandAtomicCmpXchg(CASI);
+ }
+ }
+ return MadeChange;
+}
+
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad) {
+ IRBuilder<> Builder(I);
+
+ auto LeadingFence =
+ TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
+ Builder, Order, IsStore, IsLoad);
+
+ auto TrailingFence =
+ TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
+ Builder, Order, IsStore, IsLoad);
+ // The trailing fence is emitted before the instruction instead of after
+ // because there is no easy way of setting Builder insertion point after
+ // an instruction. So we must erase it from the BB, and insert it back
+ // in the right place.
+ // We have a guard here because not every atomic operation generates a
+ // trailing fence.
+ if (TrailingFence) {
+ TrailingFence->removeFromParent();
+ TrailingFence->insertAfter(I);
+ }
+
+ return (LeadingFence || TrailingFence);
+}
+
+bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
+ if (TM->getSubtargetImpl()
+ ->getTargetLowering()
+ ->hasLoadLinkedStoreConditional())
+ return expandAtomicLoadToLL(LI);
+ else
+ return expandAtomicLoadToCmpXchg(LI);
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ IRBuilder<> Builder(LI);
+
+ // On some architectures, load-linked instructions are atomic for larger
+ // sizes than normal loads. For example, the only 64-bit load guaranteed
+ // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
+ Value *Val =
+ TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+
+ LI->replaceAllUsesWith(Val);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+ IRBuilder<> Builder(LI);
+ AtomicOrdering Order = LI->getOrdering();
+ Value *Addr = LI->getPointerOperand();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ Constant *DummyVal = Constant::getNullValue(Ty);
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, DummyVal, DummyVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+ LI->replaceAllUsesWith(Loaded);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
+ // This function is only called on atomic stores that are too large to be
+ // atomic if implemented as a native store. So we replace them by an
+ // atomic swap, that can be implemented for example as a ldrex/strex on ARM
+ // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
+ // It is the responsibility of the target to only return true in
+ // shouldExpandAtomicRMW in cases where this is required and possible.
+ IRBuilder<> Builder(SI);
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), SI->getOrdering());
+ SI->eraseFromParent();
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ return expandAtomicRMW(AI);
+}
+
+bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
+ if (TM->getSubtargetImpl()
+ ->getTargetLowering()
+ ->hasLoadLinkedStoreConditional())
+ return expandAtomicRMWToLLSC(AI);
+ else
+ return expandAtomicRMWToCmpXchg(AI);
+}
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
+ Value *Loaded, Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Inc;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Inc, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Inc, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Inc, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Inc, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ AtomicOrdering MemOpOrder = AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // fence?
+ // atomicrmw.start:
+ // %loaded = @load.linked(%addr)
+ // %new = some_op iN %loaded, %incr
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %atomicrmw.end
+ // atomicrmw.end:
+ // fence?
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *StoreSuccess =
+ TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
+ AtomicOrdering MemOpOrder =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ // Atomics require at least natural alignment.
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ AI->replaceAllUsesWith(NewLoaded);
+ AI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
+ AtomicOrdering FailureOrder = CI->getFailureOrdering();
+ Value *Addr = CI->getPointerOperand();
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+ // If getInsertFencesForAtomic() returns true, then the target does not want
+ // to deal with memory orders, and emitLeading/TrailingFence should take care
+ // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+ // should preserve the ordering.
+ AtomicOrdering MemOpOrder =
+ TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
+
+ // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
+ //
+ // The full expansion we produce is:
+ // [...]
+ // fence?
+ // cmpxchg.start:
+ // %loaded = @load.linked(%addr)
+ // %should_store = icmp eq %loaded, %desired
+ // br i1 %should_store, label %cmpxchg.trystore,
+ // label %cmpxchg.failure
+ // cmpxchg.trystore:
+ // %stored = @store_conditional(%new, %addr)
+ // %success = icmp eq i32 %stored, 0
+ // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+ // cmpxchg.success:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.failure:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.end:
+ // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
+ // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+ auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
+ auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+
+ // This grabs the DebugLoc from CI
+ IRBuilder<> Builder(CI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *ShouldStore =
+ Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
+
+ // If the the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
+
+ Builder.SetInsertPoint(TryStoreBB);
+ Value *StoreSuccess = TLI->emitStoreConditional(
+ Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ StoreSuccess = Builder.CreateICmpEQ(
+ StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ Builder.CreateCondBr(StoreSuccess, SuccessBB,
+ CI->isWeak() ? FailureBB : LoopBB);
+
+ // Make sure later instructions don't get reordered with a fence if necessary.
+ Builder.SetInsertPoint(SuccessBB);
+ TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(ExitBB);
+
+ Builder.SetInsertPoint(FailureBB);
+ TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(ExitBB);
+
+ // Finally, we have control-flow based knowledge of whether the cmpxchg
+ // succeeded or not. We expose this to later passes by converting any
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
+
+ // Setup the builder so we can create any PHIs we need.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+ Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
+ Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+
+ // Look for any users of the cmpxchg that are just comparing the loaded value
+ // against the desired one, and replace them with the CFG-derived version.
+ SmallVector<ExtractValueInst *, 2> PrunedInsts;
+ for (auto User : CI->users()) {
+ ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
+ if (!EV)
+ continue;
+
+ assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
+ "weird extraction from { iN, i1 }");
+
+ if (EV->getIndices()[0] == 0)
+ EV->replaceAllUsesWith(Loaded);
+ else
+ EV->replaceAllUsesWith(Success);
+
+ PrunedInsts.push_back(EV);
+ }
+
+ // We can remove the instructions now we're no longer iterating through them.
+ for (auto EV : PrunedInsts)
+ EV->eraseFromParent();
+
+ if (!CI->use_empty()) {
+ // Some use of the full struct return that we don't understand has happened,
+ // so we've got to reconstruct it properly.
+ Value *Res;
+ Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ }
+
+ CI->eraseFromParent();
+ return true;
+}
+
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
+ if(!C)
+ return false;
+
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ switch(Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
+ }
+}
+
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+
+ if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
+ if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
+ expandAtomicLoad(ResultingLoad);
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index b2737bf..b9b1fd8 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -42,7 +42,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
/// Estimate the cost overhead of SK_Alternate shuffle.
unsigned getAltShuffleOverhead(Type *Ty) const;
- const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
+ const TargetLoweringBase *getTLI() const {
+ return TM->getSubtargetImpl()->getTargetLowering();
+ }
public:
BasicTTI() : ImmutablePass(ID), TM(nullptr) {
@@ -90,7 +92,7 @@ public:
unsigned getJumpBufSize() const override;
bool shouldBuildLookupTables() const override;
bool haveFastSqrt(Type *Ty) const override;
- void getUnrollingPreferences(Loop *L,
+ void getUnrollingPreferences(const Function *F, Loop *L,
UnrollingPreferences &UP) const override;
/// @}
@@ -99,10 +101,11 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector) const override;
- unsigned getMaximumUnrollFactor() const override;
+ unsigned getMaxInterleaveFactor() const override;
unsigned getRegisterBitWidth(bool Vector) const override;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind) const override;
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const override;
unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -186,9 +189,8 @@ unsigned BasicTTI::getJumpBufSize() const {
bool BasicTTI::shouldBuildLookupTables() const {
const TargetLoweringBase *TLI = getTLI();
- return TLI->supportJumpTables() &&
- (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+ return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
}
bool BasicTTI::haveFastSqrt(Type *Ty) const {
@@ -197,7 +199,7 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const {
return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
}
-void BasicTTI::getUnrollingPreferences(Loop *L,
+void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L,
UnrollingPreferences &UP) const {
// This unrolling functionality is target independent, but to provide some
// motivation for its intended use, for x86:
@@ -223,11 +225,11 @@ void BasicTTI::getUnrollingPreferences(Loop *L,
// until someone finds a case where it matters in practice.
unsigned MaxOps;
- const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F);
if (PartialUnrollingThreshold.getNumOccurrences() > 0)
MaxOps = PartialUnrollingThreshold;
- else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0)
- MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize;
+ else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
+ MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
else
return;
@@ -282,13 +284,14 @@ unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
return 32;
}
-unsigned BasicTTI::getMaximumUnrollFactor() const {
+unsigned BasicTTI::getMaxInterleaveFactor() const {
return 1;
}
unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind,
- OperandValueKind) const {
+ OperandValueKind, OperandValueKind,
+ OperandValueProperties,
+ OperandValueProperties) const {
// Check if any of the operands are vector operands.
const TargetLoweringBase *TLI = getTLI();
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -465,7 +468,8 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
- if (!TLI->isOperationExpand(ISD, LT.second)) {
+ if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
+ !TLI->isOperationExpand(ISD, LT.second)) {
// The operation is legal. Assume it costs 1. Multiply
// by the type-legalization overhead.
return LT.first * 1;
@@ -561,6 +565,8 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::log10: ISD = ISD::FLOG10; break;
case Intrinsic::log2: ISD = ISD::FLOG2; break;
case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::minnum: ISD = ISD::FMINNUM; break;
+ case Intrinsic::maxnum: ISD = ISD::FMAXNUM; break;
case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break;
case Intrinsic::floor: ISD = ISD::FFLOOR; break;
case Intrinsic::ceil: ISD = ISD::FCEIL; break;
@@ -572,6 +578,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::pow: ISD = ISD::FPOW; break;
case Intrinsic::fma: ISD = ISD::FMA; break;
case Intrinsic::fmuladd: ISD = ISD::FMA; break;
+ // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return 0;
@@ -582,7 +589,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
// The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that thre is some
+ // If the type is split to multiple registers, assume that there is some
// overhead to this.
// TODO: Once we have extract/insert subvector cost we need to use them.
if (LT.first > 1)
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 7503e57..2128da1 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -20,6 +20,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -32,8 +34,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -70,6 +72,8 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -91,22 +95,24 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge();
- BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true);
- return Folder.OptimizeFunction(MF,
- MF.getTarget().getInstrInfo(),
- MF.getTarget().getRegisterInfo(),
+ BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
+ getAnalysis<MachineBlockFrequencyInfo>(),
+ getAnalysis<MachineBranchProbabilityInfo>());
+ return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
+ MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
-
-BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) {
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+ const MachineBlockFrequencyInfo &FreqInfo,
+ const MachineBranchProbabilityInfo &ProbInfo)
+ : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
+ MBPI(ProbInfo) {
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
case cl::BOU_FALSE: EnableTailMerge = false; break;
}
-
- EnableHoistCommonCode = CommonHoist;
}
/// RemoveDeadBlock - Remove the specified dead machine basic block from the
@@ -388,10 +394,8 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
RS->enterBasicBlock(CurMBB);
if (!CurMBB->empty())
RS->forward(std::prev(CurMBB->end()));
- BitVector RegsLiveAtExit(TRI->getNumRegs());
- RS->getRegsUsed(RegsLiveAtExit, false);
- for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
- if (RegsLiveAtExit[i])
+ for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++)
+ if (RS->isRegUsed(i, false))
NewMBB->addLiveIn(i);
}
}
@@ -435,6 +439,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+ // NewMBB inherits CurMBB's block frequency.
+ MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
+
// For targets that use the register scavenger, we must maintain LiveIns.
MaintainLiveIns(&CurMBB, NewMBB);
@@ -504,6 +511,21 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
#endif
}
+BlockFrequency
+BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ if (I != MergedBBFreq.end())
+ return I->second;
+
+ return MBFI.getBlockFreq(MBB);
+}
+
+void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
+ BlockFrequency F) {
+ MergedBBFreq[MBB] = F;
+}
+
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@@ -806,6 +828,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
}
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // Recompute commont tail MBB's edge weights and block frequency.
+ setCommonTailEdgeWeights(*MBB);
+
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
@@ -890,7 +916,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
continue;
// Visit each predecessor only once.
- if (!UniquePreds.insert(PBB))
+ if (!UniquePreds.insert(PBB).second)
continue;
// Skip blocks which may jump to a landing pad. Can't tail merge these.
@@ -968,6 +994,44 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
return MadeChange;
}
+void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
+ SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());
+ BlockFrequency AccumulatedMBBFreq;
+
+ // Aggregate edge frequency of successor edge j:
+ // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),
+ // where bb is a basic block that is in SameTails.
+ for (const auto &Src : SameTails) {
+ const MachineBasicBlock *SrcMBB = Src.getBlock();
+ BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB);
+ AccumulatedMBBFreq += BlockFreq;
+
+ // It is not necessary to recompute edge weights if TailBB has less than two
+ // successors.
+ if (TailMBB.succ_size() <= 1)
+ continue;
+
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq)
+ *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI);
+ }
+
+ MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq);
+
+ if (TailMBB.succ_size() <= 1)
+ return;
+
+ auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end());
+ uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1;
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq)
+ TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale);
+}
+
//===----------------------------------------------------------------------===//
// Branch Optimization
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 0d15ed7..3653a2c 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -7,14 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
-#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H
+#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/BlockFrequency.h"
#include <vector>
namespace llvm {
+ class MachineBlockFrequencyInfo;
+ class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineModuleInfo;
class RegScavenger;
@@ -23,7 +26,9 @@ namespace llvm {
class BranchFolder {
public:
- explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist);
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+ const MachineBlockFrequencyInfo &MBFI,
+ const MachineBranchProbabilityInfo &MBPI);
bool OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
@@ -92,9 +97,26 @@ namespace llvm {
MachineModuleInfo *MMI;
RegScavenger *RS;
+ /// \brief This class keeps track of branch frequencies of newly created
+ /// blocks and tail-merged blocks.
+ class MBFIWrapper {
+ public:
+ MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
+ BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
+ void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
+
+ private:
+ const MachineBlockFrequencyInfo &MBFI;
+ DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq;
+ };
+
+ MBFIWrapper MBBFreqInfo;
+ const MachineBranchProbabilityInfo &MBPI;
+
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
+ void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
void MaintainLiveIns(MachineBasicBlock *CurMBB,
MachineBasicBlock *NewMBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 57c24e8..092346b 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -2,7 +2,7 @@ add_llvm_library(LLVMCodeGen
AggressiveAntiDepBreaker.cpp
AllocationOrder.cpp
Analysis.cpp
- AtomicExpandLoadLinkedPass.cpp
+ AtomicExpandPass.cpp
BasicTargetTransformInfo.cpp
BranchFolding.cpp
CalcSpillWeights.cpp
@@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
+ ForwardControlFlowIntegrity.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
@@ -27,7 +28,6 @@ add_llvm_library(LLVMCodeGen
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
- JITCodeEmitter.cpp
JumpInstrTables.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
@@ -48,9 +48,10 @@ add_llvm_library(LLVMCodeGen
MachineBlockPlacement.cpp
MachineBranchProbabilityInfo.cpp
MachineCSE.cpp
- MachineCodeEmitter.cpp
+ MachineCombiner.cpp
MachineCopyPropagation.cpp
MachineDominators.cpp
+ MachineDominanceFrontier.cpp
MachineFunction.cpp
MachineFunctionAnalysis.cpp
MachineFunctionPass.cpp
@@ -64,6 +65,7 @@ add_llvm_library(LLVMCodeGen
MachinePassRegistry.cpp
MachinePostDominators.cpp
MachineRegisterInfo.cpp
+ MachineRegionInfo.cpp
MachineSSAUpdater.cpp
MachineScheduler.cpp
MachineSink.cpp
@@ -96,7 +98,6 @@ add_llvm_library(LLVMCodeGen
SjLjEHPrepare.cpp
SlotIndexes.cpp
SpillPlacement.cpp
- Spiller.cpp
SplitKit.cpp
StackColoring.cpp
StackProtector.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index bc033f9..d08fae0 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -16,8 +16,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "calcspillweights"
@@ -95,11 +95,12 @@ static bool isRematerializable(const LiveInterval &LI,
void
VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
MachineRegisterInfo &mri = MF.getRegInfo();
- const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
MachineBasicBlock *mbb = nullptr;
MachineLoop *loop = nullptr;
bool isExiting = false;
float totalWeight = 0;
+ unsigned numInstr = 0; // Number of instructions using li
SmallPtrSet<MachineInstr*, 8> visited;
// Find the best physreg hint and the best virtreg hint.
@@ -116,9 +117,10 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
I != E; ) {
MachineInstr *mi = &*(I++);
+ numInstr++;
if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
continue;
- if (!visited.insert(mi))
+ if (!visited.insert(mi).second)
continue;
float weight = 1.0f;
@@ -186,8 +188,8 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
// it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo()))
+ if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo()))
totalWeight *= 0.5F;
- li.weight = normalize(totalWeight, li.getSize());
+ li.weight = normalize(totalWeight, li.getSize(), numInstr);
}
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index add861a..56ecde0 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -19,16 +19,15 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
- const TargetMachine &tm, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C)
- : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm),
- TRI(*TM.getRegisterInfo()), Locs(locs), Context(C),
- CallOrPrologue(Unknown) {
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), MF(mf),
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C),
+ CallOrPrologue(Unknown) {
// No stack is used.
StackOffset = 0;
@@ -50,7 +49,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
if (MinAlign > (int)Align)
Align = MinAlign;
MF.getFrameInfo()->ensureMaxAlignment(Align);
- TM.getTargetLowering()->HandleByVal(this, Size, Align);
+ MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align);
+ Size = unsigned(RoundUpToAlignment(Size, MinAlign));
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index b3beac3..307dec5 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
- initializeAtomicExpandLoadLinkedPass(Registry);
+ initializeAtomicExpandPass(Registry);
initializeBasicTTIPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
@@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCopyPropagationPass(Registry);
+ initializeMachineCombinerPass(Registry);
initializeMachineCSEPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachinePostDominatorTreePass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index ccac40c..8d20848 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -63,6 +64,7 @@ STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
+STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
@@ -80,15 +82,29 @@ static cl::opt<bool> EnableAndCmpSinking(
"enable-andcmp-sinking", cl::Hidden, cl::init(true),
cl::desc("Enable sinkinig and/cmp into branches."));
+static cl::opt<bool> DisableStoreExtract(
+ "disable-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> StressStoreExtract(
+ "stress-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
+struct TypeIsSExt {
+ Type *Ty;
+ bool IsSExt;
+ TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
+};
+typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetMachine *TM;
const TargetLowering *TLI;
+ const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
@@ -118,7 +134,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr) {
+ : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -128,6 +144,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetTransformInfo>();
}
private:
@@ -144,6 +161,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
+ bool OptimizeExtractElementInst(Instruction *Inst);
bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
bool sinkAndCmp(Function &F);
@@ -168,8 +186,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
PromotedInsts.clear();
ModifiedDT = false;
- if (TM) TLI = TM->getTargetLowering();
+ if (TM)
+ TLI = TM->getSubtargetImpl()->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfo>();
+ TTI = &getAnalysis<TargetTransformInfo>();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
@@ -662,10 +682,13 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
if (!ISDOpcode)
continue;
- // If the use is actually a legal node, there will not be an implicit
- // truncate.
- if (TLI.isOperationLegalOrCustom(ISDOpcode,
- EVT::getEVT(TruncUser->getType())))
+ // If the use is actually a legal node, there will not be an
+ // implicit truncate.
+ // FIXME: always querying the result type is just an
+ // approximation; some nodes' legality is determined by the
+ // operand or other means. There's no good way to find out though.
+ if (TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
continue;
// Don't bother for PHI nodes.
@@ -978,7 +1001,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
} else {
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!VisitedBBs.insert(*PI))
+ if (!VisitedBBs.insert(*PI).second)
continue;
BasicBlock::InstListType &InstList = (*PI)->getInstList();
@@ -1250,46 +1273,75 @@ class TypePromotionTransaction {
/// \brief Build a truncate instruction.
class TruncBuilder : public TypePromotionAction {
+ Value *Val;
public:
/// \brief Build a truncate instruction of \p Opnd producing a \p Ty
/// result.
/// trunc Opnd to Ty.
TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
IRBuilder<> Builder(Opnd);
- Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted"));
- DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n");
+ Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
}
- /// \brief Get the built instruction.
- Instruction *getBuiltInstruction() { return Inst; }
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
/// \brief Remove the built instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n");
- Inst->eraseFromParent();
+ DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
}
};
/// \brief Build a sign extension instruction.
class SExtBuilder : public TypePromotionAction {
+ Value *Val;
public:
/// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
/// result.
/// sext Opnd to Ty.
SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
- : TypePromotionAction(Inst) {
+ : TypePromotionAction(InsertPt) {
+ IRBuilder<> Builder(InsertPt);
+ Val = Builder.CreateSExt(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
+ }
+
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// \brief Remove the built instruction.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// \brief Build a zero extension instruction.
+ class ZExtBuilder : public TypePromotionAction {
+ Value *Val;
+ public:
+ /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// zext Opnd to Ty.
+ ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
+ : TypePromotionAction(InsertPt) {
IRBuilder<> Builder(InsertPt);
- Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted"));
- DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n");
+ Val = Builder.CreateZExt(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
}
- /// \brief Get the built instruction.
- Instruction *getBuiltInstruction() { return Inst; }
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
/// \brief Remove the built instruction.
void undo() override {
- DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n");
- Inst->eraseFromParent();
+ DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
}
};
@@ -1418,9 +1470,11 @@ public:
/// Same as Value::mutateType.
void mutateType(Instruction *Inst, Type *NewTy);
/// Same as IRBuilder::createTrunc.
- Instruction *createTrunc(Instruction *Opnd, Type *Ty);
+ Value *createTrunc(Instruction *Opnd, Type *Ty);
/// Same as IRBuilder::createSExt.
- Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+ Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+ /// Same as IRBuilder::createZExt.
+ Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
/// Same as Instruction::moveBefore.
void moveBefore(Instruction *Inst, Instruction *Before);
/// @}
@@ -1452,20 +1506,28 @@ void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
}
-Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd,
- Type *Ty) {
+Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
+ Type *Ty) {
std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
- Instruction *I = Ptr->getBuiltInstruction();
+ Value *Val = Ptr->getBuiltValue();
Actions.push_back(std::move(Ptr));
- return I;
+ return Val;
}
-Instruction *TypePromotionTransaction::createSExt(Instruction *Inst,
- Value *Opnd, Type *Ty) {
+Value *TypePromotionTransaction::createSExt(Instruction *Inst,
+ Value *Opnd, Type *Ty) {
std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
- Instruction *I = Ptr->getBuiltInstruction();
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+Value *TypePromotionTransaction::createZExt(Instruction *Inst,
+ Value *Opnd, Type *Ty) {
+ std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
Actions.push_back(std::move(Ptr));
- return I;
+ return Val;
}
void TypePromotionTransaction::moveBefore(Instruction *Inst,
@@ -1658,58 +1720,75 @@ static bool MightBeFoldableInst(Instruction *I) {
/// \brief Hepler class to perform type promotion.
class TypePromotionHelper {
- /// \brief Utility function to check whether or not a sign extension of
- /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either
- /// using the operands of \p Inst or promoting \p Inst.
+ /// \brief Utility function to check whether or not a sign or zero extension
+ /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
+ /// either using the operands of \p Inst or promoting \p Inst.
+ /// The type of the extension is defined by \p IsSExt.
/// In other words, check if:
- /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType.
+ /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
/// #1 Promotion applies:
- /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...).
+ /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
/// #2 Operand reuses:
- /// sext opnd1 to ConsideredSExtType.
+ /// ext opnd1 to ConsideredExtType.
/// \p PromotedInsts maps the instructions to their type before promotion.
- static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType,
- const InstrToOrigTy &PromotedInsts);
+ static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts, bool IsSExt);
/// \brief Utility function to determine if \p OpIdx should be promoted when
/// promoting \p Inst.
- static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
+ static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
if (isa<SelectInst>(Inst) && OpIdx == 0)
return false;
return true;
}
- /// \brief Utility function to promote the operand of \p SExt when this
- /// operand is a promotable trunc or sext.
+ /// \brief Utility function to promote the operand of \p Ext when this
+ /// operand is a promotable trunc or sext or zext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
- /// created to promote the operand of SExt.
+ /// created to promote the operand of Ext.
/// Should never be called directly.
- /// \return The promoted value which is used instead of SExt.
- static Value *promoteOperandForTruncAndSExt(Instruction *SExt,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts);
- /// \brief Utility function to promote the operand of \p SExt when this
+ /// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
/// \p CreatedInsts[out] contains how many non-free instructions have been
- /// created to promote the operand of SExt.
+ /// created to promote the operand of Ext.
/// Should never be called directly.
- /// \return The promoted value which is used instead of SExt.
- static Value *promoteOperandForOther(Instruction *SExt,
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForOther(Instruction *Ext,
TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts);
+ unsigned &CreatedInsts, bool IsSExt);
+
+ /// \see promoteOperandForOther.
+ static Value *signExtendOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true);
+ }
+
+ /// \see promoteOperandForOther.
+ static Value *zeroExtendOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInsts) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false);
+ }
public:
- /// Type for the utility function that promotes the operand of SExt.
- typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT,
+ /// Type for the utility function that promotes the operand of Ext.
+ typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInsts);
- /// \brief Given a sign extend instruction \p SExt, return the approriate
- /// action to promote the operand of \p SExt instead of using SExt.
+ /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
+ /// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
/// sign extension.
/// \p InsertedTruncs keeps track of all the truncate instructions inserted by
@@ -1717,36 +1796,42 @@ public:
/// because we do not want to promote these instructions as CodeGenPrepare
/// will reinsert them later. Thus creating an infinite loop: create/remove.
/// \p PromotedInsts maps the instructions to their type before promotion.
- static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs,
+ static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
const TargetLowering &TLI,
const InstrToOrigTy &PromotedInsts);
};
bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
- Type *ConsideredSExtType,
- const InstrToOrigTy &PromotedInsts) {
- // We can always get through sext.
- if (isa<SExtInst>(Inst))
+ Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts,
+ bool IsSExt) {
+ // We can always get through zext.
+ if (isa<ZExtInst>(Inst))
+ return true;
+
+ // sext(sext) is ok too.
+ if (IsSExt && isa<SExtInst>(Inst))
return true;
// We can get through binary operator, if it is legal. In other words, the
// binary operator must have a nuw or nsw flag.
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
- (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
+ ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
+ (IsSExt && BinOp->hasNoSignedWrap())))
return true;
// Check if we can do the following simplification.
- // sext(trunc(sext)) --> sext
+ // ext(trunc(opnd)) --> ext(opnd)
if (!isa<TruncInst>(Inst))
return false;
Value *OpndVal = Inst->getOperand(0);
- // Check if we can use this operand in the sext.
- // If the type is larger than the result type of the sign extension,
+ // Check if we can use this operand in the extension.
+ // If the type is larger than the result type of the extension,
// we cannot.
if (OpndVal->getType()->getIntegerBitWidth() >
- ConsideredSExtType->getIntegerBitWidth())
+ ConsideredExtType->getIntegerBitWidth())
return false;
// If the operand of the truncate is not an instruction, we will not have
@@ -1757,18 +1842,19 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
return false;
// Check if the source of the type is narrow enough.
- // I.e., check that trunc just drops sign extended bits.
- // #1 get the type of the operand.
+ // I.e., check that trunc just drops extended bits of the same kind of
+ // the extension.
+ // #1 get the type of the operand and check the kind of the extended bits.
const Type *OpndType;
InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
- if (It != PromotedInsts.end())
- OpndType = It->second;
- else if (isa<SExtInst>(Opnd))
- OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType();
+ if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
+ OpndType = It->second.Ty;
+ else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
+ OpndType = Opnd->getOperand(0)->getType();
else
return false;
- // #2 check that the truncate just drop sign extended bits.
+ // #2 check that the truncate just drop extended bits.
if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
return true;
@@ -1776,149 +1862,167 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
}
TypePromotionHelper::Action TypePromotionHelper::getAction(
- Instruction *SExt, const SetOfInstrs &InsertedTruncs,
+ Instruction *Ext, const SetOfInstrs &InsertedTruncs,
const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
- Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0));
- Type *SExtTy = SExt->getType();
- // If the operand of the sign extension is not an instruction, we cannot
+ assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
+ "Unexpected instruction type");
+ Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
+ Type *ExtTy = Ext->getType();
+ bool IsSExt = isa<SExtInst>(Ext);
+ // If the operand of the extension is not an instruction, we cannot
// get through.
// If it, check we can get through.
- if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts))
+ if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
return nullptr;
// Do not promote if the operand has been added by codegenprepare.
// Otherwise, it means we are undoing an optimization that is likely to be
// redone, thus causing potential infinite loop.
- if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd))
+ if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
return nullptr;
// SExt or Trunc instructions.
// Return the related handler.
- if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd))
- return promoteOperandForTruncAndSExt;
+ if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
+ isa<ZExtInst>(ExtOpnd))
+ return promoteOperandForTruncAndAnyExt;
// Regular instruction.
// Abort early if we will have to insert non-free instructions.
- if (!SExtOpnd->hasOneUse() &&
- !TLI.isTruncateFree(SExtTy, SExtOpnd->getType()))
+ if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
return nullptr;
- return promoteOperandForOther;
+ return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
}
-Value *TypePromotionHelper::promoteOperandForTruncAndSExt(
+Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
- // Replace sext(trunc(opnd)) or sext(sext(opnd))
- // => sext(opnd).
- TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
+ Value *ExtVal = SExt;
+ if (isa<ZExtInst>(SExtOpnd)) {
+ // Replace s|zext(zext(opnd))
+ // => zext(opnd).
+ Value *ZExt =
+ TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
+ TPT.replaceAllUsesWith(SExt, ZExt);
+ TPT.eraseInstruction(SExt);
+ ExtVal = ZExt;
+ } else {
+ // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
+ // => z|sext(opnd).
+ TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
+ }
CreatedInsts = 0;
// Remove dead code.
if (SExtOpnd->use_empty())
TPT.eraseInstruction(SExtOpnd);
- // Check if the sext is still needed.
- if (SExt->getType() != SExt->getOperand(0)->getType())
- return SExt;
+ // Check if the extension is still needed.
+ Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
+ if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType())
+ return ExtVal;
- // At this point we have: sext ty opnd to ty.
- // Reassign the uses of SExt to the opnd and remove SExt.
- Value *NextVal = SExt->getOperand(0);
- TPT.eraseInstruction(SExt, NextVal);
+ // At this point we have: ext ty opnd to ty.
+ // Reassign the uses of ExtInst to the opnd and remove ExtInst.
+ Value *NextVal = ExtInst->getOperand(0);
+ TPT.eraseInstruction(ExtInst, NextVal);
return NextVal;
}
-Value *
-TypePromotionHelper::promoteOperandForOther(Instruction *SExt,
- TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts) {
- // By construction, the operand of SExt is an instruction. Otherwise we cannot
+Value *TypePromotionHelper::promoteOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) {
+ // By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
- Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
+ Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
CreatedInsts = 0;
- if (!SExtOpnd->hasOneUse()) {
- // SExtOpnd will be promoted.
- // All its uses, but SExt, will need to use a truncated value of the
+ if (!ExtOpnd->hasOneUse()) {
+ // ExtOpnd will be promoted.
+ // All its uses, but Ext, will need to use a truncated value of the
// promoted version.
// Create the truncate now.
- Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType());
- Trunc->removeFromParent();
- // Insert it just after the definition.
- Trunc->insertAfter(SExtOpnd);
+ Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
+ if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
+ ITrunc->removeFromParent();
+ // Insert it just after the definition.
+ ITrunc->insertAfter(ExtOpnd);
+ }
- TPT.replaceAllUsesWith(SExtOpnd, Trunc);
- // Restore the operand of SExt (which has been replace by the previous call
+ TPT.replaceAllUsesWith(ExtOpnd, Trunc);
+ // Restore the operand of Ext (which has been replace by the previous call
// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
- TPT.setOperand(SExt, 0, SExtOpnd);
+ TPT.setOperand(Ext, 0, ExtOpnd);
}
// Get through the Instruction:
// 1. Update its type.
- // 2. Replace the uses of SExt by Inst.
- // 3. Sign extend each operand that needs to be sign extended.
+ // 2. Replace the uses of Ext by Inst.
+ // 3. Extend each operand that needs to be extended.
// Remember the original type of the instruction before promotion.
// This is useful to know that the high bits are sign extended bits.
- PromotedInsts.insert(
- std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType()));
+ PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
+ ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
// Step #1.
- TPT.mutateType(SExtOpnd, SExt->getType());
+ TPT.mutateType(ExtOpnd, Ext->getType());
// Step #2.
- TPT.replaceAllUsesWith(SExt, SExtOpnd);
+ TPT.replaceAllUsesWith(Ext, ExtOpnd);
// Step #3.
- Instruction *SExtForOpnd = SExt;
+ Instruction *ExtForOpnd = Ext;
- DEBUG(dbgs() << "Propagate SExt to operands\n");
- for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
+ DEBUG(dbgs() << "Propagate Ext to operands\n");
+ for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
++OpIdx) {
- DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n');
- if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() ||
- !shouldSExtOperand(SExtOpnd, OpIdx)) {
+ DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
+ if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
+ !shouldExtOperand(ExtOpnd, OpIdx)) {
DEBUG(dbgs() << "No need to propagate\n");
continue;
}
- // Check if we can statically sign extend the operand.
- Value *Opnd = SExtOpnd->getOperand(OpIdx);
+ // Check if we can statically extend the operand.
+ Value *Opnd = ExtOpnd->getOperand(OpIdx);
if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
- DEBUG(dbgs() << "Statically sign extend\n");
- TPT.setOperand(
- SExtOpnd, OpIdx,
- ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue()));
+ DEBUG(dbgs() << "Statically extend\n");
+ unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
+ APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
+ : Cst->getValue().zext(BitWidth);
+ TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
continue;
}
// UndefValue are typed, so we have to statically sign extend them.
if (isa<UndefValue>(Opnd)) {
- DEBUG(dbgs() << "Statically sign extend\n");
- TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType()));
+ DEBUG(dbgs() << "Statically extend\n");
+ TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
continue;
}
// Otherwise we have to explicity sign extend the operand.
- // Check if SExt was reused to sign extend an operand.
- if (!SExtForOpnd) {
+ // Check if Ext was reused to extend an operand.
+ if (!ExtForOpnd) {
// If yes, create a new one.
- DEBUG(dbgs() << "More operands to sext\n");
- SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType());
+ DEBUG(dbgs() << "More operands to ext\n");
+ ExtForOpnd =
+ cast<Instruction>(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
+ : TPT.createZExt(Ext, Opnd, Ext->getType()));
++CreatedInsts;
}
- TPT.setOperand(SExtForOpnd, 0, Opnd);
+ TPT.setOperand(ExtForOpnd, 0, Opnd);
// Move the sign extension before the insertion point.
- TPT.moveBefore(SExtForOpnd, SExtOpnd);
- TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd);
+ TPT.moveBefore(ExtForOpnd, ExtOpnd);
+ TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
// If more sext are required, new instructions will have to be created.
- SExtForOpnd = nullptr;
+ ExtForOpnd = nullptr;
}
- if (SExtForOpnd == SExt) {
- DEBUG(dbgs() << "Sign extension is useless now\n");
- TPT.eraseInstruction(SExt);
+ if (ExtForOpnd == Ext) {
+ DEBUG(dbgs() << "Extension is useless now\n");
+ TPT.eraseInstruction(Ext);
}
- return SExtOpnd;
+ return ExtOpnd;
}
/// IsPromotionProfitable - Check whether or not promoting an instruction
@@ -1951,8 +2055,8 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
if (!ISDOpcode)
return true;
// Otherwise, check if the promoted instruction is legal or not.
- return TLI.isOperationLegalOrCustom(ISDOpcode,
- EVT::getEVT(PromotedInst->getType()));
+ return TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(PromotedInst->getType()));
}
/// MatchOperationAddr - Given an instruction or constant expr, see if we can
@@ -2036,7 +2140,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::Shl: {
// Can only handle X*C and X << C.
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
- if (!RHS) return false;
+ if (!RHS)
+ return false;
int64_t Scale = RHS->getSExtValue();
if (Opcode == Instruction::Shl)
Scale = 1LL << Scale;
@@ -2129,28 +2234,32 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
return true;
}
- case Instruction::SExt: {
- // Try to move this sext out of the way of the addressing mode.
- Instruction *SExt = cast<Instruction>(AddrInst);
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ Instruction *Ext = dyn_cast<Instruction>(AddrInst);
+ if (!Ext)
+ return false;
+
+ // Try to move this ext out of the way of the addressing mode.
// Ask for a method for doing so.
- TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
- SExt, InsertedTruncs, TLI, PromotedInsts);
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
if (!TPH)
return false;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
unsigned CreatedInsts = 0;
- Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts);
+ Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
// E.g.,
// op = add opnd, 1
- // idx = sext op
+ // idx = ext op
// addr = gep base, idx
// is now:
- // promotedOpnd = sext opnd <- no match here
+ // promotedOpnd = ext opnd <- no match here
// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
// addr = gep base, op <- match
if (MovedAway)
@@ -2289,10 +2398,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(Instruction *I,
SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
- SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+ SmallPtrSetImpl<Instruction*> &ConsideredInsts,
const TargetLowering &TLI) {
// If we already considered this instruction, we're done.
- if (!ConsideredInsts.insert(I))
+ if (!ConsideredInsts.insert(I).second)
return false;
// If this is an obviously unfoldable instruction, bail out.
@@ -2506,7 +2615,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
worklist.pop_back();
// Break use-def graph loops.
- if (!Visited.insert(V)) {
+ if (!Visited.insert(V).second) {
Consensus = nullptr;
break;
}
@@ -2696,8 +2805,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (AddrMode.BaseOffs) {
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (ResultIndex) {
- // We need to add this separately from the scale above to help with
- // SDAG consecutive load/store merging.
+ // We need to add this separately from the scale above to help with
+ // SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
@@ -3105,6 +3214,367 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
return MadeChange;
}
+namespace {
+/// \brief Helper class to promote a scalar operation to a vector one.
+/// This class is used to move downward extractelement transition.
+/// E.g.,
+/// a = vector_op <2 x i32>
+/// b = extractelement <2 x i32> a, i32 0
+/// c = scalar_op b
+/// store c
+///
+/// =>
+/// a = vector_op <2 x i32>
+/// c = vector_op a (equivalent to scalar_op on the related lane)
+/// * d = extractelement <2 x i32> c, i32 0
+/// * store d
+/// Assuming both extractelement and store can be combine, we get rid of the
+/// transition.
+class VectorPromoteHelper {
+ /// Used to perform some checks on the legality of vector operations.
+ const TargetLowering &TLI;
+
+ /// Used to estimated the cost of the promoted chain.
+ const TargetTransformInfo &TTI;
+
+ /// The transition being moved downwards.
+ Instruction *Transition;
+ /// The sequence of instructions to be promoted.
+ SmallVector<Instruction *, 4> InstsToBePromoted;
+ /// Cost of combining a store and an extract.
+ unsigned StoreExtractCombineCost;
+ /// Instruction that will be combined with the transition.
+ Instruction *CombineInst;
+
+ /// \brief The instruction that represents the current end of the transition.
+ /// Since we are faking the promotion until we reach the end of the chain
+ /// of computation, we need a way to get the current end of the transition.
+ Instruction *getEndOfTransition() const {
+ if (InstsToBePromoted.empty())
+ return Transition;
+ return InstsToBePromoted.back();
+ }
+
+ /// \brief Return the index of the original value in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
+ /// c, is at index 0.
+ unsigned getTransitionOriginalValueIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 0;
+ }
+
+ /// \brief Return the index of the index in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 0" the index
+ /// is at index 1.
+ unsigned getTransitionIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 1;
+ }
+
+ /// \brief Get the type of the transition.
+ /// This is the type of the original value.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
+ /// transition is <2 x i32>.
+ Type *getTransitionType() const {
+ return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
+ }
+
+ /// \brief Promote \p ToBePromoted by moving \p Def downward through.
+ /// I.e., we have the following sequence:
+ /// Def = Transition <ty1> a to <ty2>
+ /// b = ToBePromoted <ty2> Def, ...
+ /// =>
+ /// b = ToBePromoted <ty1> a, ...
+ /// Def = Transition <ty1> ToBePromoted to <ty2>
+ void promoteImpl(Instruction *ToBePromoted);
+
+ /// \brief Check whether or not it is profitable to promote all the
+ /// instructions enqueued to be promoted.
+ bool isProfitableToPromote() {
+ Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
+ unsigned Index = isa<ConstantInt>(ValIdx)
+ ? cast<ConstantInt>(ValIdx)->getZExtValue()
+ : -1;
+ Type *PromotedType = getTransitionType();
+
+ StoreInst *ST = cast<StoreInst>(CombineInst);
+ unsigned AS = ST->getPointerAddressSpace();
+ unsigned Align = ST->getAlignment();
+ // Check if this store is supported.
+ if (!TLI.allowsMisalignedMemoryAccesses(
+ TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
+ // If this is not supported, there is no way we can combine
+ // the extract with the store.
+ return false;
+ }
+
+ // The scalar chain of computation has to pay for the transition
+ // scalar to vector.
+ // The vector chain has to account for the combining cost.
+ uint64_t ScalarCost =
+ TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
+ uint64_t VectorCost = StoreExtractCombineCost;
+ for (const auto &Inst : InstsToBePromoted) {
+ // Compute the cost.
+ // By construction, all instructions being promoted are arithmetic ones.
+ // Moreover, one argument is a constant that can be viewed as a splat
+ // constant.
+ Value *Arg0 = Inst->getOperand(0);
+ bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
+ isa<ConstantFP>(Arg0);
+ TargetTransformInfo::OperandValueKind Arg0OVK =
+ IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
+ : TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Arg1OVK =
+ !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
+ : TargetTransformInfo::OK_AnyValue;
+ ScalarCost += TTI.getArithmeticInstrCost(
+ Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
+ VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
+ Arg0OVK, Arg1OVK);
+ }
+ DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
+ << ScalarCost << "\nVector: " << VectorCost << '\n');
+ return ScalarCost > VectorCost;
+ }
+
+ /// \brief Generate a constant vector with \p Val with the same
+ /// number of elements as the transition.
+ /// \p UseSplat defines whether or not \p Val should be replicated
+ /// accross the whole vector.
+ /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
+ /// otherwise we generate a vector with as many undef as possible:
+ /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
+ /// used at the index of the extract.
+ Value *getConstantVector(Constant *Val, bool UseSplat) const {
+ unsigned ExtractIdx = UINT_MAX;
+ if (!UseSplat) {
+ // If we cannot determine where the constant must be, we have to
+ // use a splat constant.
+ Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
+ if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
+ ExtractIdx = CstVal->getSExtValue();
+ else
+ UseSplat = true;
+ }
+
+ unsigned End = getTransitionType()->getVectorNumElements();
+ if (UseSplat)
+ return ConstantVector::getSplat(End, Val);
+
+ SmallVector<Constant *, 4> ConstVec;
+ UndefValue *UndefVal = UndefValue::get(Val->getType());
+ for (unsigned Idx = 0; Idx != End; ++Idx) {
+ if (Idx == ExtractIdx)
+ ConstVec.push_back(Val);
+ else
+ ConstVec.push_back(UndefVal);
+ }
+ return ConstantVector::get(ConstVec);
+ }
+
+ /// \brief Check if promoting to a vector type an operand at \p OperandIdx
+ /// in \p Use can trigger undefined behavior.
+ static bool canCauseUndefinedBehavior(const Instruction *Use,
+ unsigned OperandIdx) {
+ // This is not safe to introduce undef when the operand is on
+ // the right hand side of a division-like instruction.
+ if (OperandIdx != 1)
+ return false;
+ switch (Use->getOpcode()) {
+ default:
+ return false;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ return true;
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return !Use->hasNoNaNs();
+ }
+ llvm_unreachable(nullptr);
+ }
+
+public:
+ VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
+ Instruction *Transition, unsigned CombineCost)
+ : TLI(TLI), TTI(TTI), Transition(Transition),
+ StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
+ assert(Transition && "Do not know how to promote null");
+ }
+
+ /// \brief Check if we can promote \p ToBePromoted to \p Type.
+ bool canPromote(const Instruction *ToBePromoted) const {
+ // We could support CastInst too.
+ return isa<BinaryOperator>(ToBePromoted);
+ }
+
+ /// \brief Check if it is profitable to promote \p ToBePromoted
+ /// by moving downward the transition through.
+ bool shouldPromote(const Instruction *ToBePromoted) const {
+ // Promote only if all the operands can be statically expanded.
+ // Indeed, we do not want to introduce any new kind of transitions.
+ for (const Use &U : ToBePromoted->operands()) {
+ const Value *Val = U.get();
+ if (Val == getEndOfTransition()) {
+ // If the use is a division and the transition is on the rhs,
+ // we cannot promote the operation, otherwise we may create a
+ // division by zero.
+ if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
+ return false;
+ continue;
+ }
+ if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
+ !isa<ConstantFP>(Val))
+ return false;
+ }
+ // Check that the resulting operation is legal.
+ int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
+ if (!ISDOpcode)
+ return false;
+ return StressStoreExtract ||
+ TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(getTransitionType(), true));
+ }
+
+ /// \brief Check whether or not \p Use can be combined
+ /// with the transition.
+ /// I.e., is it possible to do Use(Transition) => AnotherUse?
+ bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
+
+ /// \brief Record \p ToBePromoted as part of the chain to be promoted.
+ void enqueueForPromotion(Instruction *ToBePromoted) {
+ InstsToBePromoted.push_back(ToBePromoted);
+ }
+
+ /// \brief Set the instruction that will be combined with the transition.
+ void recordCombineInstruction(Instruction *ToBeCombined) {
+ assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
+ CombineInst = ToBeCombined;
+ }
+
+ /// \brief Promote all the instructions enqueued for promotion if it is
+ /// is profitable.
+ /// \return True if the promotion happened, false otherwise.
+ bool promote() {
+ // Check if there is something to promote.
+ // Right now, if we do not have anything to combine with,
+ // we assume the promotion is not profitable.
+ if (InstsToBePromoted.empty() || !CombineInst)
+ return false;
+
+ // Check cost.
+ if (!StressStoreExtract && !isProfitableToPromote())
+ return false;
+
+ // Promote.
+ for (auto &ToBePromoted : InstsToBePromoted)
+ promoteImpl(ToBePromoted);
+ InstsToBePromoted.clear();
+ return true;
+ }
+};
+} // End of anonymous namespace.
+
+void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
+ // At this point, we know that all the operands of ToBePromoted but Def
+ // can be statically promoted.
+ // For Def, we need to use its parameter in ToBePromoted:
+ // b = ToBePromoted ty1 a
+ // Def = Transition ty1 b to ty2
+ // Move the transition down.
+ // 1. Replace all uses of the promoted operation by the transition.
+ // = ... b => = ... Def.
+ assert(ToBePromoted->getType() == Transition->getType() &&
+ "The type of the result of the transition does not match "
+ "the final type");
+ ToBePromoted->replaceAllUsesWith(Transition);
+ // 2. Update the type of the uses.
+ // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
+ Type *TransitionTy = getTransitionType();
+ ToBePromoted->mutateType(TransitionTy);
+ // 3. Update all the operands of the promoted operation with promoted
+ // operands.
+ // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
+ for (Use &U : ToBePromoted->operands()) {
+ Value *Val = U.get();
+ Value *NewVal = nullptr;
+ if (Val == Transition)
+ NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
+ else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
+ isa<ConstantFP>(Val)) {
+ // Use a splat constant if it is not safe to use undef.
+ NewVal = getConstantVector(
+ cast<Constant>(Val),
+ isa<UndefValue>(Val) ||
+ canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
+ } else
+ assert(0 && "Did you modified shouldPromote and forgot to update this?");
+ ToBePromoted->setOperand(U.getOperandNo(), NewVal);
+ }
+ Transition->removeFromParent();
+ Transition->insertAfter(ToBePromoted);
+ Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
+}
+
+/// Some targets can do store(extractelement) with one instruction.
+/// Try to push the extractelement towards the stores when the target
+/// has this feature and this is profitable.
+bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
+ unsigned CombineCost = UINT_MAX;
+ if (DisableStoreExtract || !TLI ||
+ (!StressStoreExtract &&
+ !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
+ Inst->getOperand(1), CombineCost)))
+ return false;
+
+ // At this point we know that Inst is a vector to scalar transition.
+ // Try to move it down the def-use chain, until:
+ // - We can combine the transition with its single use
+ // => we got rid of the transition.
+ // - We escape the current basic block
+ // => we would need to check that we are moving it at a cheaper place and
+ // we do not do that for now.
+ BasicBlock *Parent = Inst->getParent();
+ DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
+ VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
+ // If the transition has more than one use, assume this is not going to be
+ // beneficial.
+ while (Inst->hasOneUse()) {
+ Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
+ DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
+
+ if (ToBePromoted->getParent() != Parent) {
+ DEBUG(dbgs() << "Instruction to promote is in a different block ("
+ << ToBePromoted->getParent()->getName()
+ << ") than the transition (" << Parent->getName() << ").\n");
+ return false;
+ }
+
+ if (VPH.canCombine(ToBePromoted)) {
+ DEBUG(dbgs() << "Assume " << *Inst << '\n'
+ << "will be combined with: " << *ToBePromoted << '\n');
+ VPH.recordCombineInstruction(ToBePromoted);
+ bool Changed = VPH.promote();
+ NumStoreExtractExposed += Changed;
+ return Changed;
+ }
+
+ DEBUG(dbgs() << "Try promoting.\n");
+ if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
+ return false;
+
+ DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
+
+ VPH.enqueueForPromotion(ToBePromoted);
+ Inst = ToBePromoted;
+ }
+ return false;
+}
+
bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
@@ -3199,6 +3669,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
return OptimizeShuffleVectorInst(SVI);
+ if (isa<ExtractElementInst>(I))
+ return OptimizeExtractElementInst(I);
+
return false;
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index d2231ec..3d62d48 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -20,24 +20,20 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-sched"
-CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
- AntiDepBreaker(), MF(MFi),
- MRI(MF.getRegInfo()),
- TII(MF.getTarget().getInstrInfo()),
- TRI(MF.getTarget().getRegisterInfo()),
- RegClassInfo(RCI),
- Classes(TRI->getNumRegs(), nullptr),
- KillIndices(TRI->getNumRegs(), 0),
- DefIndices(TRI->getNumRegs(), 0),
- KeepRegs(TRI->getNumRegs(), false) {}
+CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI)
+ : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
+ TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
+ Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
@@ -273,19 +269,10 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
- // FIXME: we should use a SubRegIterator that includes self (as above), so
- // we don't have to repeat all this code for the reg itself.
- DefIndices[Reg] = Count;
- KillIndices[Reg] = ~0u;
- assert(((KillIndices[Reg] == ~0u) !=
- (DefIndices[Reg] == ~0u)) &&
- "Kill and Def maps aren't consistent for Reg!");
- KeepRegs.reset(Reg);
- Classes[Reg] = nullptr;
- RegRefs.erase(Reg);
- // Repeat, for all subregs.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubregReg = *SubRegs;
+ // For the reg itself and all subregs: update the def to current;
+ // reset the kill state, any restrictions, and references.
+ for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) {
+ unsigned SubregReg = *SRI;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
KeepRegs.reset(SubregReg);
@@ -317,19 +304,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
RegRefs.insert(std::make_pair(Reg, &MO));
- // FIXME: we should use an MCRegAliasIterator that includes self so we don't
- // have to repeat all this code for the reg itself.
-
// It wasn't previously live but now it is, this is a kill.
- if (KillIndices[Reg] == ~0u) {
- KillIndices[Reg] = Count;
- DefIndices[Reg] = ~0u;
- assert(((KillIndices[Reg] == ~0u) !=
- (DefIndices[Reg] == ~0u)) &&
- "Kill and Def maps aren't consistent for Reg!");
- }
- // Repeat, for all aliases.
- for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ // Repeat for all aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
if (KillIndices[AliasReg] == ~0u) {
KillIndices[AliasReg] = Count;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 45e4ff5..ceef74d 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
-#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
@@ -38,32 +38,32 @@ class TargetRegisterInfo;
const TargetRegisterInfo *TRI;
const RegisterClassInfo &RegClassInfo;
- /// AllocatableSet - The set of allocatable registers.
+ /// The set of allocatable registers.
/// We'll be ignoring anti-dependencies on non-allocatable registers,
/// because they may not be safe to break.
const BitVector AllocatableSet;
- /// Classes - For live regs that are only used in one register class in a
+ /// For live regs that are only used in one register class in a
/// live range, the register class. If the register is not live, the
/// corresponding value is null. If the register is live but used in
/// multiple register classes, the corresponding value is -1 casted to a
/// pointer.
std::vector<const TargetRegisterClass*> Classes;
- /// RegRefs - Map registers to all their references within a live range.
+ /// Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
typedef std::multimap<unsigned, MachineOperand *>::const_iterator
RegRefIter;
- /// KillIndices - The index of the most recent kill (proceeding bottom-up),
+ /// The index of the most recent kill (proceeding bottom-up),
/// or ~0u if the register is not live.
std::vector<unsigned> KillIndices;
- /// DefIndices - The index of the most recent complete def (proceeding
+ /// The index of the most recent complete def (proceeding
/// bottom up), or ~0u if the register is live.
std::vector<unsigned> DefIndices;
- /// KeepRegs - A set of registers which are live and cannot be changed to
+ /// A set of registers which are live and cannot be changed to
/// break anti-dependencies.
BitVector KeepRegs;
@@ -71,26 +71,23 @@ class TargetRegisterInfo;
CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
~CriticalAntiDepBreaker();
- /// Start - Initialize anti-dep breaking for a new basic block.
+ /// Initialize anti-dep breaking for a new basic block.
void StartBlock(MachineBasicBlock *BB) override;
- /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
- /// path
+ /// Identifiy anti-dependencies along the critical path
/// of the ScheduleDAG and break them by renaming registers.
- ///
unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex,
DbgValueVector &DbgValues) override;
- /// Observe - Update liveness information to account for the current
+ /// Update liveness information to account for the current
/// instruction, which will not be scheduled.
- ///
void Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) override;
- /// Finish - Finish anti-dep breaking for a basic block.
+ /// Finish anti-dep breaking for a basic block.
void FinishBlock() override;
private:
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index bc6e9dc..0a188c0 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -106,16 +106,15 @@ namespace llvm {
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT, bool IsPostRA);
+ bool IsPostRA);
// Schedule - Actual scheduling work.
void schedule() override;
};
}
-DefaultVLIWScheduler::DefaultVLIWScheduler(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- bool IsPostRA) :
- ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
+ MachineLoopInfo &MLI, bool IsPostRA)
+ : ScheduleDAGInstrs(MF, &MLI, IsPostRA) {
CanHandleTerminators = true;
}
@@ -125,12 +124,12 @@ void DefaultVLIWScheduler::schedule() {
}
// VLIWPacketizerList Ctor
-VLIWPacketizerList::VLIWPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
- TII = TM.getInstrInfo();
- ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr);
- VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF,
+ MachineLoopInfo &MLI, bool IsPostRA)
+ : MF(MF) {
+ TII = MF.getSubtarget().getInstrInfo();
+ ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA);
}
// VLIWPacketizerList Dtor
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 2b144d8..48213c1 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -19,7 +19,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "codegen-dce"
@@ -90,8 +91,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
bool AnyChanges = false;
MRI = &MF.getRegInfo();
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -122,19 +123,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
if (isDead(MI)) {
DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
// It is possible that some DBG_VALUE instructions refer to this
- // instruction. Examine each def operand for such references;
- // if found, mark the DBG_VALUE as undef (but don't delete it).
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- MRI->markUsesInDebugValueAsUndef(Reg);
- }
+ // instruction. They get marked as undef and will be deleted
+ // in the live debug variable analysis.
+ MI->eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
- MI->eraseFromParent();
++NumDeletes;
MIE = MBB->rend();
// MII is now pointing to the next instruction to process,
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index a195586..75b74d9 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -50,6 +51,11 @@ namespace {
bool runOnFunction(Function &Fn) override;
+ bool doFinalization(Module &M) override {
+ RewindFunction = nullptr;
+ return false;
+ }
+
void getAnalysisUsage(AnalysisUsage &AU) const override { }
const char *getPassName() const override {
@@ -118,7 +124,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
return false;
// Find the rewind function if we didn't already.
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
if (!RewindFunction) {
LLVMContext &Ctx = Resumes[0]->getContext();
FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index c470632..995606f 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -153,8 +153,8 @@ private:
public:
/// runOnMachineFunction - Initialize per-function data structures.
void runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
LiveRegUnits.clear();
LiveRegUnits.setUniverse(TRI->getNumRegUnits());
@@ -245,7 +245,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI || DefMI->getParent() != Head)
continue;
- if (InsertAfter.insert(DefMI))
+ if (InsertAfter.insert(DefMI).second)
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
if (DefMI->isTerminator()) {
DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
@@ -580,7 +580,7 @@ namespace {
class EarlyIfConverter : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MCSchedModel *SchedModel;
+ MCSchedModel SchedModel;
MachineRegisterInfo *MRI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
@@ -688,7 +688,7 @@ bool EarlyIfConverter::shouldConvertIf() {
FBBTrace.getCriticalPath());
// Set a somewhat arbitrary limit on the critical path extension we accept.
- unsigned CritLimit = SchedModel->MispredictPenalty/2;
+ unsigned CritLimit = SchedModel.MispredictPenalty/2;
// If-conversion only makes sense when there is unexploited ILP. Compute the
// maximum-ILP resource length of the trace after if-conversion. Compare it
@@ -782,8 +782,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
.enableEarlyIfConversion())
return false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
SchedModel =
MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
MRI = &MF.getRegInfo();
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
index e976d7f..85b0893 100644
--- a/lib/CodeGen/ErlangGC.cpp
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -53,7 +54,7 @@ ErlangGC::ErlangGC() {
MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
DebugLoc DL) const {
- const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
return Label;
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index cf55b68..3680498 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -29,7 +29,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "execution-fix"
@@ -713,13 +714,13 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
- TII = MF->getTarget().getInstrInfo();
- TRI = MF->getTarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
LiveRegs = nullptr;
assert(NumRegs == RC->getNumRegs() && "Bad regclass");
DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
- << RC->getName() << " **********\n");
+ << TRI->getRegClassName(RC) << " **********\n");
// If no relevant registers are used in the function, we can skip it
// completely.
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 90b62b5..55e809e 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -19,7 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "expand-isel-pseudos"
@@ -46,7 +46,7 @@ INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
// Iterate through each instruction in the function, looking for pseudos.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 8969bcc..e7bf143 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -20,8 +20,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "postrapseudos"
@@ -182,8 +183,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Machine Function\n"
<< "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
<< "********** Function: " << MF.getName() << '\n');
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
bool MadeChange = false;
diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
new file mode 100644
index 0000000..5e7e853
--- /dev/null
+++ b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
@@ -0,0 +1,374 @@
+//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief A pass that instruments code with fast checks for indirect calls and
+/// hooks for a function to check violations.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cfi"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
+#include "llvm/CodeGen/JumpInstrTables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumCFIIndirectCalls,
+ "Number of indirect call sites rewritten by the CFI pass");
+
+char ForwardControlFlowIntegrity::ID = 0;
+INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi",
+ "Control-Flow Integrity", true, true)
+INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
+INITIALIZE_PASS_DEPENDENCY(JumpInstrTables);
+INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi",
+ "Control-Flow Integrity", true, true)
+
+ModulePass *llvm::createForwardControlFlowIntegrityPass() {
+ return new ForwardControlFlowIntegrity();
+}
+
+ModulePass *llvm::createForwardControlFlowIntegrityPass(
+ JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
+ StringRef CFIFuncName) {
+ return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing,
+ CFIFuncName);
+}
+
+// Checks to see if a given CallSite is making an indirect call, including
+// cases where the indirect call is made through a bitcast.
+static bool isIndirectCall(CallSite &CS) {
+ if (CS.getCalledFunction())
+ return false;
+
+ // Check the value to see if it is merely a bitcast of a function. In
+ // this case, it will translate to a direct function call in the resulting
+ // assembly, so we won't treat it as an indirect call here.
+ const Value *V = CS.getCalledValue();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
+ }
+
+ // Otherwise, since we know it's a call, it must be an indirect call
+ return true;
+}
+
+static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning";
+
+ForwardControlFlowIntegrity::ForwardControlFlowIntegrity()
+ : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single),
+ CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") {
+ initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
+}
+
+ForwardControlFlowIntegrity::ForwardControlFlowIntegrity(
+ JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
+ std::string CFIFuncName)
+ : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType),
+ CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) {
+ initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
+}
+
+ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {}
+
+void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<JumpInstrTableInfo>();
+ AU.addRequired<JumpInstrTables>();
+}
+
+void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) {
+ // To get the indirect calls, we iterate over all functions and iterate over
+ // the list of basic blocks in each. We extract a total list of indirect calls
+ // before modifying any of them, since our modifications will modify the list
+ // of basic blocks.
+ for (Function &F : M) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ CallSite CS(&I);
+ if (!(CS && isIndirectCall(CS)))
+ continue;
+
+ Value *CalledValue = CS.getCalledValue();
+
+ // Don't rewrite this instruction if the indirect call is actually just
+ // inline assembly, since our transformation will generate an invalid
+ // module in that case.
+ if (isa<InlineAsm>(CalledValue))
+ continue;
+
+ IndirectCalls.push_back(&I);
+ }
+ }
+ }
+}
+
+void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M,
+ CFITables &CFIT) {
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
+ for (Instruction *I : IndirectCalls) {
+ CallSite CS(I);
+ Value *CalledValue = CS.getCalledValue();
+
+ // Get the function type for this call and look it up in the tables.
+ Type *VTy = CalledValue->getType();
+ PointerType *PTy = dyn_cast<PointerType>(VTy);
+ Type *EltTy = PTy->getElementType();
+ FunctionType *FunTy = dyn_cast<FunctionType>(EltTy);
+ FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy);
+ ++NumCFIIndirectCalls;
+ Constant *JumpTableStart = nullptr;
+ Constant *JumpTableMask = nullptr;
+ Constant *JumpTableSize = nullptr;
+
+ // Some call sites have function types that don't correspond to any
+ // address-taken function in the module. This happens when function pointers
+ // are passed in from external code.
+ auto it = CFIT.find(TransformedTy);
+ if (it == CFIT.end()) {
+ // In this case, make sure that the function pointer will change by
+ // setting the mask and the start to be 0 so that the transformed
+ // function is 0.
+ JumpTableStart = ConstantInt::get(Int64Ty, 0);
+ JumpTableMask = ConstantInt::get(Int64Ty, 0);
+ JumpTableSize = ConstantInt::get(Int64Ty, 0);
+ } else {
+ JumpTableStart = it->second.StartValue;
+ JumpTableMask = it->second.MaskValue;
+ JumpTableSize = it->second.Size;
+ }
+
+ rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask,
+ JumpTableSize);
+ }
+
+ return;
+}
+
+bool ForwardControlFlowIntegrity::runOnModule(Module &M) {
+ JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>();
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+
+ // JumpInstrTableInfo stores information about the alignment of each entry.
+ // The alignment returned by JumpInstrTableInfo is alignment in bytes, not
+ // in the exponent.
+ ByteAlignment = JITI->entryByteAlignment();
+ LogByteAlignment = llvm::Log2_64(ByteAlignment);
+
+ // Set up tables for control-flow integrity based on information about the
+ // jump-instruction tables.
+ CFITables CFIT;
+ for (const auto &KV : JITI->getTables()) {
+ uint64_t Size = static_cast<uint64_t>(KV.second.size());
+ uint64_t TableSize = NextPowerOf2(Size);
+
+ int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment;
+ Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue);
+ Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size);
+
+ // The base of the table is defined to be the first jumptable function in
+ // the table.
+ Function *First = KV.second.begin()->second;
+ Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy);
+ CFIT[KV.first].StartValue = JumpTableStartValue;
+ CFIT[KV.first].MaskValue = JumpTableMaskValue;
+ CFIT[KV.first].Size = JumpTableSize;
+ }
+
+ if (CFIT.empty())
+ return false;
+
+ getIndirectCalls(M);
+
+ if (!CFIEnforcing) {
+ addWarningFunction(M);
+ }
+
+ // Update the instructions with the check and the indirect jump through our
+ // table.
+ updateIndirectCalls(M, CFIT);
+
+ return true;
+}
+
+void ForwardControlFlowIntegrity::addWarningFunction(Module &M) {
+ PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext());
+
+ // Get the type of the Warning Function: void (i8*, i8*),
+ // where the first argument is the name of the function in which the violation
+ // occurs, and the second is the function pointer that violates CFI.
+ SmallVector<Type *, 2> WarningFunArgs;
+ WarningFunArgs.push_back(CharPtrTy);
+ WarningFunArgs.push_back(CharPtrTy);
+ FunctionType *WarningFunTy =
+ FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false);
+
+ if (!CFIFuncName.empty()) {
+ Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy);
+ if (!FailureFun)
+ report_fatal_error("Could not get or insert the function specified by"
+ " -cfi-func-name");
+ } else {
+ // The default warning function swallows the warning and lets the call
+ // continue, since there's no generic way for it to print out this
+ // information.
+ Function *WarningFun = M.getFunction(cfi_failure_func_name);
+ if (!WarningFun) {
+ WarningFun =
+ Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage,
+ cfi_failure_func_name, &M);
+ }
+
+ BasicBlock *Entry =
+ BasicBlock::Create(M.getContext(), "entry", WarningFun, 0);
+ ReturnInst::Create(M.getContext(), Entry);
+ }
+}
+
+void ForwardControlFlowIntegrity::rewriteFunctionPointer(
+ Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart,
+ Constant *JumpTableMask, Constant *JumpTableSize) {
+ IRBuilder<> TempBuilder(I);
+
+ Type *OrigFunType = FunPtr->getType();
+
+ BasicBlock *CurBB = cast<BasicBlock>(I->getParent());
+ Function *CurF = cast<Function>(CurBB->getParent());
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
+
+ Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty);
+ Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty);
+
+ Value *NewFunPtr = nullptr;
+ Value *Check = nullptr;
+ switch (CFIType) {
+ case CFIntegrity::Sub: {
+ // This is the subtract, mask, and add version.
+ // Subtract from the base.
+ Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
+
+ // Mask the difference to force this to be a table offset.
+ Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask);
+
+ // Add it back to the base.
+ Value *Result = TempBuilder.CreateAdd(And, TStartInt);
+
+ // Convert it back into a function pointer that we can call.
+ NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
+ break;
+ }
+ case CFIntegrity::Ror: {
+ // This is the subtract and rotate version.
+ // Rotate right by the alignment value. The optimizer should recognize
+ // this sequence as a rotation.
+
+ // This cast is safe, since unsigned is always a subset of uint64_t.
+ uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment);
+ Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64);
+ Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64);
+
+ // Subtract from the base.
+ Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
+
+ // Create the equivalent of a rotate-right instruction.
+ Value *Shr = TempBuilder.CreateLShr(Sub, RightShift);
+ Value *Shl = TempBuilder.CreateShl(Sub, LeftShift);
+ Value *Or = TempBuilder.CreateOr(Shr, Shl);
+
+ // Perform unsigned comparison to check for inclusion in the table.
+ Check = TempBuilder.CreateICmpULT(Or, JumpTableSize);
+ NewFunPtr = FunPtr;
+ break;
+ }
+ case CFIntegrity::Add: {
+ // This is the mask and add version.
+ // Mask the function pointer to turn it into an offset into the table.
+ Value *And = TempBuilder.CreateAnd(TI, JumpTableMask);
+
+ // Then or this offset to the base and get the pointer value.
+ Value *Result = TempBuilder.CreateAdd(And, TStartInt);
+
+ // Convert it back into a function pointer that we can call.
+ NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
+ break;
+ }
+ }
+
+ if (!CFIEnforcing) {
+ // If a check hasn't been added (in the rotation version), then check to see
+ // if it's the same as the original function. This check determines whether
+ // or not we call the CFI failure function.
+ if (!Check)
+ Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr);
+ BasicBlock *InvalidPtrBlock =
+ BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0);
+ BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I);
+
+ // Remove the unconditional branch that connects the two blocks.
+ TerminatorInst *TermInst = CurBB->getTerminator();
+ TermInst->eraseFromParent();
+
+ // Add a conditional branch that depends on the Check above.
+ BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB);
+
+ // Call the warning function for this pointer, then continue.
+ Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock);
+ insertWarning(M, InvalidPtrBlock, BI, FunPtr);
+ } else {
+ // Modify the instruction to call this value.
+ CallSite CS(I);
+ CS.setCalledFunction(NewFunPtr);
+ }
+}
+
+void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block,
+ Instruction *I, Value *FunPtr) {
+ Function *ParentFun = cast<Function>(Block->getParent());
+
+ // Get the function to call right before the instruction.
+ Function *WarningFun = nullptr;
+ if (CFIFuncName.empty()) {
+ WarningFun = M.getFunction(cfi_failure_func_name);
+ } else {
+ WarningFun = M.getFunction(CFIFuncName);
+ }
+
+ assert(WarningFun && "Could not find the CFI failure function");
+
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+
+ IRBuilder<> WarningInserter(I);
+ // Create a mergeable GlobalVariable containing the name of the function.
+ Value *ParentNameGV =
+ WarningInserter.CreateGlobalString(ParentFun->getName());
+ Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy);
+ Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy);
+ WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr);
+}
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index c3e4f3e..ed40982 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -73,7 +73,7 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
std::unique_ptr<GCStrategy> S = I->instantiate();
S->M = M;
S->Name = Name;
- StrategyMap.GetOrCreateValue(Name).setValue(S.get());
+ StrategyMap[Name] = S.get();
StrategyList.push_back(std::move(S));
return StrategyList.back().get();
}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 1fdff6b..b346657 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -31,6 +31,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -377,7 +378,7 @@ void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
}
void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
- const TargetFrameLowering *TFI = TM->getFrameLowering();
+ const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering();
assert(TFI && "TargetRegisterInfo not available!");
for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
@@ -403,7 +404,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
TM = &MF.getTarget();
MMI = &getAnalysis<MachineModuleInfo>();
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
// Find the size of the stack frame.
FI->setFrameSize(MF.getFrameInfo()->getStackSize());
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 027ee38..457d7d6 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -68,6 +68,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "global-merge"
@@ -142,7 +143,7 @@ INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
const DataLayout *DL = TLI->getDataLayout();
// FIXME: Infer the maximum possible offset depending on the actual users
@@ -281,7 +282,7 @@ bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
BSSGlobals;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
const DataLayout *DL = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1502d5f..e84d25d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -30,7 +31,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -161,6 +161,7 @@ namespace {
const TargetLoweringBase *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
MachineRegisterInfo *MRI;
@@ -177,6 +178,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -269,15 +271,16 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
- TLI = MF.getTarget().getTargetLowering();
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MRI = &MF.getRegInfo();
const TargetSubtargetInfo &ST =
MF.getTarget().getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
if (!TII) return false;
@@ -286,9 +289,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool BFChange = false;
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true, false);
- BFChange = BF.OptimizeFunction(MF, TII,
- MF.getTarget().getRegisterInfo(),
+ BranchFolder BF(true, false, *MBFI, *MBPI);
+ BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -420,9 +422,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
- BranchFolder BF(false, false);
- BF.OptimizeFunction(MF, TII,
- MF.getTarget().getRegisterInfo(),
+ BranchFolder BF(false, false, *MBFI, *MBPI);
+ BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -940,9 +941,8 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
/// to determine if it can be if-converted. If predecessor is already enqueued,
/// dequeue it!
void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
- for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
- E = BB->pred_end(); PI != E; ++PI) {
- BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+ for (const auto &Predecessor : BB->predecessors()) {
+ BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
if (PBBI.IsDone || PBBI.BB == BB)
continue;
PBBI.IsAnalyzed = false;
@@ -1184,6 +1184,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
uint32_t WeightScale = 0;
+
if (HasEarlyExit) {
// Get weights before modifying CvtBBI->BB and BBI.BB.
CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB);
@@ -1192,6 +1193,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB);
SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale);
}
+
if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index f3c8d3d..6a6e15d 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -139,21 +138,16 @@ private:
~InlineSpiller() {}
public:
- InlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm)
- : MF(mf),
- LIS(pass.getAnalysis<LiveIntervals>()),
- LSS(pass.getAnalysis<LiveStacks>()),
- AA(&pass.getAnalysis<AliasAnalysis>()),
- MDT(pass.getAnalysis<MachineDominatorTree>()),
- Loops(pass.getAnalysis<MachineLoopInfo>()),
- VRM(vrm),
- MFI(*mf.getFrameInfo()),
- MRI(mf.getRegInfo()),
- TII(*mf.getTarget().getInstrInfo()),
- TRI(*mf.getTarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
+ InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AliasAnalysis>()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ TII(*mf.getSubtarget().getInstrInfo()),
+ TRI(*mf.getSubtarget().getRegisterInfo()),
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
void spill(LiveRangeEdit &) override;
@@ -190,11 +184,16 @@ private:
}
namespace llvm {
+
+Spiller::~Spiller() { }
+void Spiller::anchor() { }
+
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
return new InlineSpiller(pass, mf, vrm);
}
+
}
//===----------------------------------------------------------------------===//
@@ -824,7 +823,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
WorkList.push_back(std::make_pair(LI, VNI));
do {
std::tie(LI, VNI) = WorkList.pop_back_val();
- if (!UsedValues.insert(VNI))
+ if (!UsedValues.insert(VNI).second)
continue;
if (VNI->isPHIDef()) {
@@ -853,6 +852,15 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
MachineBasicBlock::iterator MI) {
+
+ // Analyze instruction
+ SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+
+ if (!RI.Reads)
+ return false;
+
SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
@@ -883,9 +891,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
// If the instruction also writes VirtReg.reg, it had better not require the
// same register for uses and defs.
- SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
if (RI.Tied) {
markValueUsed(&VirtReg, ParentVNI);
DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
@@ -939,10 +944,15 @@ void InlineSpiller::reMaterializeAll() {
for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
unsigned Reg = RegsToSpill[i];
LiveInterval &LI = LIS.getInterval(Reg);
- for (MachineRegisterInfo::use_bundle_nodbg_iterator
- RI = MRI.use_bundle_nodbg_begin(Reg), E = MRI.use_bundle_nodbg_end();
- RI != E; ) {
- MachineInstr *MI = &*(RI++);
+ for (MachineRegisterInfo::reg_bundle_iterator
+ RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
+ RegI != E; ) {
+ MachineInstr *MI = &*(RegI++);
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue())
+ continue;
+
anyRemat |= reMaterializeFor(LI, MI);
}
}
@@ -1218,12 +1228,16 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Modify DBG_VALUE now that the value is in a spill slot.
bool IsIndirect = MI->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
DebugLoc DL = MI->getDebugLoc();
DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
MachineBasicBlock *MBB = MI->getParent();
BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(StackSlot)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
continue;
}
@@ -1363,7 +1377,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
StackInt = nullptr;
DEBUG(dbgs() << "Inline spilling "
- << MRI.getRegClass(edit.getReg())->getName()
+ << TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
<< ':' << edit.getParent()
<< "\nFrom original " << PrintReg(Original) << '\n');
assert(edit.getParent().isSpillable() &&
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 91a1da9..1791afb 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_INTERFERENCECACHE
-#define LLVM_CODEGEN_INTERFERENCECACHE
+#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
+#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
#include "llvm/CodeGen/LiveIntervalUnion.h"
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index a8b8600..2c95e9e 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -459,9 +459,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(CI->getOperand(0));
break;
+ case Intrinsic::assume:
case Intrinsic::var_annotation:
- break; // Strip out annotate intrinsic
-
+ break; // Strip out these intrinsics
+
case Intrinsic::memcpy: {
Type *IntPtr = DL.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
@@ -527,6 +528,34 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
break;
}
+ case Intrinsic::sin: {
+ ReplaceFPIntrinsicWithCall(CI, "sinf", "sin", "sinl");
+ break;
+ }
+ case Intrinsic::cos: {
+ ReplaceFPIntrinsicWithCall(CI, "cosf", "cos", "cosl");
+ break;
+ }
+ case Intrinsic::floor: {
+ ReplaceFPIntrinsicWithCall(CI, "floorf", "floor", "floorl");
+ break;
+ }
+ case Intrinsic::ceil: {
+ ReplaceFPIntrinsicWithCall(CI, "ceilf", "ceil", "ceill");
+ break;
+ }
+ case Intrinsic::trunc: {
+ ReplaceFPIntrinsicWithCall(CI, "truncf", "trunc", "truncl");
+ break;
+ }
+ case Intrinsic::round: {
+ ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl");
+ break;
+ }
+ case Intrinsic::copysign: {
+ ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
+ break;
+ }
case Intrinsic::flt_rounds:
// Lower to "round to the nearest"
if (!CI->getType()->isVoidTy())
diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp
deleted file mode 100644
index 96a5389..0000000
--- a/lib/CodeGen/JITCodeEmitter.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-
-using namespace llvm;
-
-void JITCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
index 61ef722..20f775c 100644
--- a/lib/CodeGen/JumpInstrTables.cpp
+++ b/lib/CodeGen/JumpInstrTables.cpp
@@ -163,7 +163,7 @@ void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
FunctionType *OrigFunTy = Target->getFunctionType();
- FunctionType *FunTy = transformType(OrigFunTy);
+ FunctionType *FunTy = transformType(JTType, OrigFunTy);
JumpMap::iterator it = Metadata.find(FunTy);
if (Metadata.end() == it) {
@@ -191,11 +191,12 @@ Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
}
bool JumpInstrTables::hasTable(FunctionType *FunTy) {
- FunctionType *TransTy = transformType(FunTy);
+ FunctionType *TransTy = transformType(JTType, FunTy);
return Metadata.end() != Metadata.find(TransTy);
}
-FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
+FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT,
+ FunctionType *FunTy) {
// Returning nullptr forces all types into the same table, since all types map
// to the same type
Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
@@ -211,7 +212,7 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
FunctionType *VoidFnTy = FunctionType::get(
Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
- switch (JTType) {
+ switch (JTT) {
case JumpTable::Single:
return FunctionType::get(RetTy, EmptyParams, false);
@@ -251,16 +252,12 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
}
bool JumpInstrTables::runOnModule(Module &M) {
- // Make sure the module is well-formed, especially with respect to jumptable.
- if (verifyModule(M))
- return false;
-
JITI = &getAnalysis<JumpInstrTableInfo>();
- // Get the set of jumptable-annotated functions.
+ // Get the set of jumptable-annotated functions that have their address taken.
DenseMap<Function *, Function *> Functions;
for (Function &F : M) {
- if (F.hasFnAttribute(Attribute::JumpTable)) {
+ if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) {
assert(F.hasUnnamedAddr() &&
"Attribute 'jumptable' requires 'unnamed_addr'");
Functions[&F] = nullptr;
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index df96b94..61face2 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,8 +13,10 @@
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
#include "llvm/CodeGen/JumpInstrTables.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -48,8 +50,8 @@ EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
void LLVMTargetMachine::initAsmInfo() {
- MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(),
- TargetTriple);
+ MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
+ *getSubtargetImpl()->getRegisterInfo(), getTargetTriple());
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
// we'll crash later.
@@ -110,9 +112,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI =
- new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
- &TM->getTargetLowering()->getObjFileLowering());
+ MachineModuleInfo *MMI = new MachineModuleInfo(
+ *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(),
+ &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering());
PM.add(MMI);
// Set up a MachineFunction for the rest of CodeGen to work on.
@@ -143,8 +145,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
AnalysisID StopAfter) {
// Passes to handle jumptable function annotations. These can't be handled at
// JIT time, so we don't add them directly to addPassesToGenerateCode.
- PM.add(createJumpInstrTableInfoPass());
+ PM.add(createJumpInstrTableInfoPass(
+ getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound()));
PM.add(createJumpInstrTablesPass(Options.JTType));
+ if (Options.FCFI)
+ PM.add(createForwardControlFlowIntegrityPass(
+ Options.JTType, Options.CFIType, Options.CFIEnforcing,
+ Options.getCFIFuncName()));
// Add common CodeGen passes.
MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
@@ -165,10 +172,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
if (Options.MCOptions.MCSaveTempLabels)
Context->setAllowTemporaryLabels(false);
- const MCAsmInfo &MAI = *getMCAsmInfo();
- const MCRegisterInfo &MRI = *getRegisterInfo();
- const MCInstrInfo &MII = *getInstrInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
+ const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo();
std::unique_ptr<MCStreamer> AsmStreamer;
switch (FileType) {
@@ -201,9 +208,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
if (!MCE || !MAB)
return true;
- AsmStreamer.reset(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Context, *MAB, Out, MCE, STI,
- Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack));
+ AsmStreamer.reset(
+ getTarget()
+ .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE,
+ STI, Options.MCOptions.MCRelaxAll));
break;
}
case CGFT_Null:
@@ -226,26 +234,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
return false;
}
-/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
-/// get machine code emitted. This uses a JITCodeEmitter object to handle
-/// actually outputting the machine code and resolving things like the address
-/// of functions. This method should return true if machine code emission is
-/// not supported.
-///
-bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
- JITCodeEmitter &JCE,
- bool DisableVerify) {
- // Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr,
- nullptr);
- if (!Context)
- return true;
-
- addCodeEmitter(PM, JCE);
-
- return false; // success!
-}
-
/// addPassesToEmitMC - Add passes to the specified pass manager to get
/// machine code emitted with the MCJIT. This method returns true if machine
/// code is not supported. It fills the MCContext Ctx pointer which can be
@@ -265,19 +253,20 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- const MCRegisterInfo &MRI = *getRegisterInfo();
+ const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
- STI, *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(
+ *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
if (!MCE || !MAB)
return true;
std::unique_ptr<MCStreamer> AsmStreamer;
- AsmStreamer.reset(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Ctx, *MAB, Out, MCE, STI,
- Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack));
+ AsmStreamer.reset(getTarget()
+ .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB,
+ Out, MCE, STI,
+ Options.MCOptions.MCRelaxAll));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index d12c234..b621e3b 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -137,6 +137,8 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
/// not available then create new lexical scope.
LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
+ if (DL.isUnknown())
+ return nullptr;
MDNode *Scope = nullptr;
MDNode *InlinedAt = nullptr;
DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
@@ -172,9 +174,12 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
std::make_tuple(Parent, DIDescriptor(Scope),
nullptr, false)).first;
- if (!Parent && DIDescriptor(Scope).isSubprogram() &&
- DISubprogram(Scope).describes(MF->getFunction()))
+ if (!Parent) {
+ assert(DIDescriptor(Scope).isSubprogram());
+ assert(DISubprogram(Scope).describes(MF->getFunction()));
+ assert(!CurrentFnLexicalScope);
CurrentFnLexicalScope = &I->second;
+ }
return &I->second;
}
@@ -285,7 +290,7 @@ void LexicalScopes::assignInstructionRanges(
/// have machine instructions that belong to lexical scope identified by
/// DebugLoc.
void LexicalScopes::getMachineBasicBlocks(
- DebugLoc DL, SmallPtrSet<const MachineBasicBlock *, 4> &MBBs) {
+ DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
MBBs.clear();
LexicalScope *Scope = getOrCreateLexicalScope(DL);
if (!Scope)
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 7d5646b..1624851 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -39,6 +39,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <memory>
@@ -109,7 +110,8 @@ public:
namespace {
class LDVImpl;
class UserValue {
- const MDNode *variable; ///< The debug info variable we are part of.
+ const MDNode *Variable; ///< The debug info variable we are part of.
+ const MDNode *Expression; ///< Any complex address expression.
unsigned offset; ///< Byte offset into variable.
bool IsIndirect; ///< true if this is a register-indirect+offset value.
DebugLoc dl; ///< The debug location for the variable. This is
@@ -139,11 +141,10 @@ class UserValue {
public:
/// UserValue - Create a new UserValue.
- UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L,
- LocMap::Allocator &alloc)
- : variable(var), offset(o), IsIndirect(i), dl(L), leader(this),
- next(nullptr), locInts(alloc)
- {}
+ UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i,
+ DebugLoc L, LocMap::Allocator &alloc)
+ : Variable(var), Expression(expr), offset(o), IsIndirect(i), dl(L),
+ leader(this), next(nullptr), locInts(alloc) {}
/// getLeader - Get the leader of this value's equivalence class.
UserValue *getLeader() {
@@ -157,8 +158,10 @@ public:
UserValue *getNext() const { return next; }
/// match - Does this UserValue match the parameters?
- bool match(const MDNode *Var, unsigned Offset, bool indirect) const {
- return Var == variable && Offset == offset && indirect == IsIndirect;
+ bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset,
+ bool indirect) const {
+ return Var == Variable && Expr == Expression && Offset == offset &&
+ indirect == IsIndirect;
}
/// merge - Merge equivalence classes.
@@ -267,7 +270,7 @@ public:
LiveIntervals &LIS, const TargetInstrInfo &TRI);
/// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
- /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// variable may have more than one corresponding DBG_VALUE instructions.
/// Only first one needs DebugLoc to identify variable's lexical scope
/// in source file.
DebugLoc findDebugLoc();
@@ -306,8 +309,8 @@ class LDVImpl {
UVMap userVarMap;
/// getUserValue - Find or create a UserValue.
- UserValue *getUserValue(const MDNode *Var, unsigned Offset,
- bool IsIndirect, DebugLoc DL);
+ UserValue *getUserValue(const MDNode *Var, const MDNode *Expr,
+ unsigned Offset, bool IsIndirect, DebugLoc DL);
/// lookupVirtReg - Find the EC leader for VirtReg or null.
UserValue *lookupVirtReg(unsigned VirtReg);
@@ -344,6 +347,7 @@ public:
"Dbg values are not emitted in LDV");
EmitDone = false;
ModifiedMF = false;
+ LS.reset();
}
/// mapVirtReg - Map virtual register to an equivalence class.
@@ -360,8 +364,8 @@ public:
} // namespace
void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
- DIVariable DV(variable);
- OS << "!\"";
+ DIVariable DV(Variable);
+ OS << "!\"";
DV.printExtendedName(OS);
OS << "\"\t";
if (offset)
@@ -421,19 +425,20 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
LDV->mapVirtReg(locations[i].getReg(), this);
}
-UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
- bool IsIndirect, DebugLoc DL) {
+UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
+ unsigned Offset, bool IsIndirect,
+ DebugLoc DL) {
UserValue *&Leader = userVarMap[Var];
if (Leader) {
UserValue *UV = Leader->getLeader();
Leader = UV;
for (; UV; UV = UV->getNext())
- if (UV->match(Var, Offset, IsIndirect))
+ if (UV->match(Var, Expr, Offset, IsIndirect))
return UV;
}
userValues.push_back(
- make_unique<UserValue>(Var, Offset, IsIndirect, DL, allocator));
+ make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator));
UserValue *UV = userValues.back().get();
Leader = UserValue::merge(Leader, UV);
return UV;
@@ -453,7 +458,7 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
// DBG_VALUE loc, offset, variable
- if (MI->getNumOperands() != 3 ||
+ if (MI->getNumOperands() != 4 ||
!(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) ||
!MI->getOperand(2).isMetadata()) {
DEBUG(dbgs() << "Can't handle " << *MI);
@@ -463,9 +468,11 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
// Get or create the UserValue for (variable,offset).
bool IsIndirect = MI->isIndirectDebugValue();
unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *Var = MI->getOperand(2).getMetadata();
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
//here.
- UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc());
+ UserValue *UV =
+ getUserValue(Var, Expr, Offset, IsIndirect, MI->getDebugLoc());
UV->addDef(Idx, MI->getOperand(0));
return true;
}
@@ -698,7 +705,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
LIS = &pass.getAnalysis<LiveIntervals>();
MDT = &pass.getAnalysis<MachineDominatorTree>();
- TRI = mf.getTarget().getRegisterInfo();
+ TRI = mf.getSubtarget().getRegisterInfo();
LS.initialize(mf);
DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< mf.getName() << " **********\n");
@@ -710,11 +717,25 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
return Changed;
}
+static void removeDebugValues(MachineFunction &mf) {
+ for (MachineBasicBlock &MBB : mf) {
+ for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ MBBI = MBB.erase(MBBI);
+ }
+ }
+}
+
bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
- if (!FunctionDIs.count(mf.getFunction()))
+ if (!FunctionDIs.count(mf.getFunction())) {
+ removeDebugValues(mf);
return false;
+ }
if (!pImpl)
pImpl = new LDVImpl(this);
return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
@@ -936,10 +957,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
if (Loc.isReg())
BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, Loc.getReg(), offset, variable);
+ IsIndirect, Loc.getReg(), offset, Variable, Expression);
else
BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(Loc).addImm(offset).addMetadata(variable);
+ .addOperand(Loc)
+ .addImm(offset)
+ .addMetadata(Variable)
+ .addMetadata(Expression);
}
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
@@ -979,7 +1003,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
if (!MF)
return;
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
userValues[i]->rewriteLocations(*VRM, *TRI);
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 7ec0d17..7e3b361 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -18,8 +18,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
-#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/DebugInfo.h"
@@ -72,4 +72,4 @@ private:
} // namespace llvm
-#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#endif
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index ce8ce96..ddb0032 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -206,7 +206,7 @@ void LiveRange::RenumberValues() {
valnos.clear();
for (const_iterator I = begin(), E = end(); I != E; ++I) {
VNInfo *VNI = I->valno;
- if (!Seen.insert(VNI))
+ if (!Seen.insert(VNI).second)
continue;
assert(!VNI->isUnused() && "Unused valno used by live segment");
VNI->id = (unsigned)valnos.size();
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 1559560..1742e63 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -34,8 +34,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cmath>
#include <limits>
@@ -110,9 +110,8 @@ void LiveIntervals::releaseMemory() {
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &MF->getRegInfo();
- TM = &fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
AA = &getAnalysis<AliasAnalysis>();
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
@@ -380,12 +379,13 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
(void)ExtVNI;
assert(ExtVNI == VNI && "Unexpected existing value number");
// Is this a PHIDef we haven't seen before?
- if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+ if (!VNI->isPHIDef() || VNI->def != BlockStart ||
+ !UsedPHIs.insert(VNI).second)
continue;
// The PHI is live, make sure the predecessors are live-out.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- if (!LiveOut.insert(*PI))
+ if (!LiveOut.insert(*PI).second)
continue;
SlotIndex Stop = getMBBEndIdx(*PI);
// A predecessor is not required to have a live-out value for a PHI.
@@ -402,7 +402,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Make sure VNI is live-out from the predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
- if (!LiveOut.insert(*PI))
+ if (!LiveOut.insert(*PI).second)
continue;
SlotIndex Stop = getMBBEndIdx(*PI);
assert(li->getVNInfoBefore(Stop) == VNI &&
@@ -785,7 +785,7 @@ private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
void updateRange(LiveRange &LR, unsigned Reg) {
- if (!Updated.insert(&LR))
+ if (!Updated.insert(&LR).second)
return;
DEBUG({
dbgs() << " ";
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 67ab559..345d6c4 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -19,8 +19,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_LIVERANGECALC_H
-#define LLVM_CODEGEN_LIVERANGECALC_H
+#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
+#define LLVM_LIB_CODEGEN_LIVERANGECALC_H
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/IndexedMap.h"
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index c27d630..a0fb712 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -411,8 +411,11 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
- DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
- << MRI.getRegClass(LI.reg)->getName() << '\n');
+ DEBUG({
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
+ });
VRAI.calculateSpillWeightAndHint(LI);
}
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index de2ce22..a8cae08 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -48,7 +47,7 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index b3161a4..8a6ac25 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <limits>
using namespace llvm;
@@ -49,7 +50,7 @@ void LiveStacks::releaseMemory() {
}
bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
// FIXME: No analysis is being done right now. We are relying on the
// register allocators to provide the information.
return false;
@@ -80,7 +81,7 @@ void LiveStacks::print(raw_ostream &OS, const Module*) const {
int Slot = I->first;
const TargetRegisterClass *RC = getIntervalRegClass(Slot);
if (RC)
- OS << " [" << RC->getName() << "]\n";
+ OS << " [" << TRI->getRegClassName(RC) << "]\n";
else
OS << " [Unknown]\n";
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 758b216..c4bca5f 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
using namespace llvm;
@@ -497,17 +496,135 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
}
}
+void LiveVariables::runOnInstr(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ assert(!MI->isDebugValue());
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI->getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI->isPHI())
+ NumOperandsToProcess = 1;
+
+ // Clear kill and dead markers. LV will recompute them.
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ MO.setIsKill(false);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
+ } else /*MO.isDef()*/ {
+ MO.setIsDead(false);
+ DefRegs.push_back(MOReg);
+ }
+ }
+
+ MachineBasicBlock *MBB = MI->getParent();
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all masked registers. (Call clobbers).
+ for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+ HandleRegMask(MI->getOperand(RegMasks[i]));
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegDef(MOReg, MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+}
+
+void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
+ EE = MBB->livein_end(); II != EE; ++II) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(*II, nullptr, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
+ DistanceMap.insert(std::make_pair(MI, Dist++));
+
+ runOnInstr(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->isLandingPad())
+ continue;
+ for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+ LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+ unsigned LReg = *LI;
+ if (!TRI->isInAllocatableClass(LReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LReg);
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
+ HandlePhysRegDef(i, nullptr, Defs);
+}
+
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MRI = &mf.getRegInfo();
- TRI = MF->getTarget().getRegisterInfo();
-
- unsigned NumRegs = TRI->getNumRegs();
- PhysRegDef = new MachineInstr*[NumRegs];
- PhysRegUse = new MachineInstr*[NumRegs];
- PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
- std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr);
- std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr);
+ TRI = MF->getSubtarget().getRegisterInfo();
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
+ PHIVarInfo.resize(MF->getNumBlockIDs());
PHIJoins.clear();
// FIXME: LiveIntervals will be updated to remove its dependence on
@@ -525,124 +642,11 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock *Entry = MF->begin();
SmallPtrSet<MachineBasicBlock*,16> Visited;
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
- DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
- DFI != E; ++DFI) {
- MachineBasicBlock *MBB = *DFI;
-
- // Mark live-in registers as live-in.
- SmallVector<unsigned, 4> Defs;
- for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
- EE = MBB->livein_end(); II != EE; ++II) {
- assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
- "Cannot have a live-in virtual register!");
- HandlePhysRegDef(*II, nullptr, Defs);
- }
-
- // Loop over all of the instructions, processing them.
- DistanceMap.clear();
- unsigned Dist = 0;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- MachineInstr *MI = I;
- if (MI->isDebugValue())
- continue;
- DistanceMap.insert(std::make_pair(MI, Dist++));
-
- // Process all of the operands of the instruction...
- unsigned NumOperandsToProcess = MI->getNumOperands();
-
- // Unless it is a PHI node. In this case, ONLY process the DEF, not any
- // of the uses. They will be handled in other basic blocks.
- if (MI->isPHI())
- NumOperandsToProcess = 1;
-
- // Clear kill and dead markers. LV will recompute them.
- SmallVector<unsigned, 4> UseRegs;
- SmallVector<unsigned, 4> DefRegs;
- SmallVector<unsigned, 1> RegMasks;
- for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isRegMask()) {
- RegMasks.push_back(i);
- continue;
- }
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
- unsigned MOReg = MO.getReg();
- if (MO.isUse()) {
- MO.setIsKill(false);
- if (MO.readsReg())
- UseRegs.push_back(MOReg);
- } else /*MO.isDef()*/ {
- MO.setIsDead(false);
- DefRegs.push_back(MOReg);
- }
- }
-
- // Process all uses.
- for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
- unsigned MOReg = UseRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
- HandleVirtRegUse(MOReg, MBB, MI);
- else if (!MRI->isReserved(MOReg))
- HandlePhysRegUse(MOReg, MI);
- }
-
- // Process all masked registers. (Call clobbers).
- for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
- HandleRegMask(MI->getOperand(RegMasks[i]));
-
- // Process all defs.
- for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
- unsigned MOReg = DefRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
- HandleVirtRegDef(MOReg, MI);
- else if (!MRI->isReserved(MOReg))
- HandlePhysRegDef(MOReg, MI, Defs);
- }
- UpdatePhysRegDefs(MI, Defs);
- }
-
- // Handle any virtual assignments from PHI nodes which might be at the
- // bottom of this basic block. We check all of our successor blocks to see
- // if they have PHI nodes, and if so, we simulate an assignment at the end
- // of the current block.
- if (!PHIVarInfo[MBB->getNumber()].empty()) {
- SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
-
- for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(),
- E = VarInfoVec.end(); I != E; ++I)
- // Mark it alive only in the block we are representing.
- MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
- MBB);
- }
-
- // MachineCSE may CSE instructions which write to non-allocatable physical
- // registers across MBBs. Remember if any reserved register is liveout.
- SmallSet<unsigned, 4> LiveOuts;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB->isLandingPad())
- continue;
- for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
- LE = SuccMBB->livein_end(); LI != LE; ++LI) {
- unsigned LReg = *LI;
- if (!TRI->isInAllocatableClass(LReg))
- // Ignore other live-ins, e.g. those that are live into landing pads.
- LiveOuts.insert(LReg);
- }
- }
-
- // Loop over PhysRegDef / PhysRegUse, killing any registers that are
- // available at the end of the basic block.
- for (unsigned i = 0; i != NumRegs; ++i)
- if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
- HandlePhysRegDef(i, nullptr, Defs);
+ for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
+ runOnBlock(MBB, NumRegs);
- std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr);
- std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr);
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
}
// Convert and transfer the dead / killed information we have gathered into
@@ -664,9 +668,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
assert(Visited.count(&*i) != 0 && "unreachable basic block found");
#endif
- delete[] PhysRegDef;
- delete[] PhysRegUse;
- delete[] PHIVarInfo;
+ PhysRegDef.clear();
+ PhysRegUse.clear();
+ PHIVarInfo.clear();
return false;
}
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 36885e8..5c5712f 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -102,7 +103,7 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned LocalObjectCount = MFI->getObjectIndexEnd();
// If the target doesn't want/need this pass, or if there are no locals
@@ -183,7 +184,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int64_t Offset = 0;
@@ -272,8 +273,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
bool UsedBaseReg = false;
MachineFrameInfo *MFI = Fn.getFrameInfo();
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 08fef5f..3058b1a 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -32,6 +32,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -54,7 +55,8 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
const TargetMachine &TM = MF->getTarget();
- const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix();
+ const char *Prefix =
+ TM.getSubtargetImpl()->getDataLayout()->getPrivateGlobalPrefix();
CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));
@@ -290,7 +292,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\n';
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (!livein_empty()) {
if (Indexes) OS << '\t';
OS << " Live Ins:";
@@ -359,7 +361,7 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
bool LiveIn = isLiveIn(PhysReg);
iterator I = SkipPHIsAndLabels(begin()), E = end();
MachineRegisterInfo &MRI = getParent()->getRegInfo();
- const TargetInstrInfo &TII = *getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo();
// Look for an existing copy.
if (LiveIn)
@@ -390,7 +392,7 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
}
void MachineBasicBlock::updateTerminator() {
- const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
// A block with no successors has no concerns with fall-through edges.
if (this->succ_empty()) return;
@@ -645,7 +647,7 @@ bool MachineBasicBlock::canFallThrough() {
// Analyze the branches, if any, at the end of the block.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
// If we couldn't analyze the branch, examine the last instruction.
// If the block doesn't end in a known control barrier, assume fallthrough
@@ -690,7 +692,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// We may need to update this's terminator, but we can't do that if
// AnalyzeBranch fails. If this uses a jump table, we won't touch it.
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
@@ -795,7 +797,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
Cond.clear();
- MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl);
+ MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond,
+ dl);
if (Indexes) {
for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
@@ -823,7 +826,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addLiveIn(*I);
// Update LiveVariables.
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
if (LV) {
// Restore kills of virtual registers that were killed by the terminators.
while (!KilledRegs.empty()) {
@@ -903,31 +906,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
}
if (MachineDominatorTree *MDT =
- P->getAnalysisIfAvailable<MachineDominatorTree>()) {
- // Update dominator information.
- MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
-
- bool IsNewIDom = true;
- for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
- PI != E; ++PI) {
- MachineBasicBlock *PredBB = *PI;
- if (PredBB == NMBB)
- continue;
- if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
- IsNewIDom = false;
- break;
- }
- }
-
- // We know "this" dominates the newly created basic block.
- MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
-
- // If all the other predecessors of "Succ" are dominated by "Succ" itself
- // then the new block is the new immediate dominator of "Succ". Otherwise,
- // the new block doesn't dominate anything.
- if (IsNewIDom)
- MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
- }
+ P->getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->recordSplitCriticalEdge(this, Succ, NMBB);
if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
if (MachineLoop *TIL = MLI->getLoopFor(this)) {
@@ -1086,7 +1066,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineBasicBlock::succ_iterator SI = succ_begin();
while (SI != succ_end()) {
const MachineBasicBlock *MBB = *SI;
- if (!SeenMBBs.insert(MBB) ||
+ if (!SeenMBBs.insert(MBB).second ||
(MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
// This is a superfluous edge, remove it.
SI = removeSuccessor(SI);
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 74af1e2..08fd200 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -812,7 +813,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
BE = L.block_end();
BI != BE; ++BI) {
BlockChain &Chain = *BlockToChain[*BI];
- if (!UpdatedPreds.insert(&Chain))
+ if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
@@ -913,7 +914,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
MachineBasicBlock *BB = &*FI;
BlockChain &Chain = *BlockToChain[BB];
- if (!UpdatedPreds.insert(&Chain))
+ if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
@@ -1111,8 +1112,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
- TII = F.getTarget().getInstrInfo();
- TLI = F.getTarget().getTargetLowering();
+ TII = F.getSubtarget().getInstrInfo();
+ TLI = F.getSubtarget().getTargetLowering();
assert(BlockToChain.empty());
buildCFGChains(F);
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index c2ab76e..ae26967 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "machine-cse"
@@ -78,7 +79,8 @@ namespace {
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
- bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) const;
@@ -112,8 +114,12 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MachineCSE, "machine-cse",
"Machine Common Subexpression Elimination", false, false)
-bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
- MachineBasicBlock *MBB) {
+/// The source register of a COPY machine instruction can be propagated to all
+/// its users, and this propagation could increase the probability of finding
+/// common subexpressions. If the COPY has only one user, the COPY itself can
+/// be removed.
+bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
bool Changed = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -122,10 +128,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (!MRI->hasOneNonDBGUse(Reg))
- // Only coalesce single use copies. This ensure the copy will be
- // deleted.
- continue;
+ bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI->isCopy())
continue;
@@ -153,10 +156,14 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
continue;
DEBUG(dbgs() << "Coalescing: " << *DefMI);
DEBUG(dbgs() << "*** to: " << *MI);
+ // Propagate SrcReg of copies to MI.
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
- DefMI->eraseFromParent();
- ++NumCoalesces;
+ // Coalesce single use copies.
+ if (OnlyOneUse) {
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ }
Changed = true;
}
@@ -453,13 +460,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
- // Look for trivial copy coalescing opportunities.
- if (PerformTrivialCoalescing(MI, MBB)) {
+ // Using trivial copy propagation to find more CSE opportunities.
+ if (PerformTrivialCopyPropagation(MI, MBB)) {
Changed = true;
// After coalescing MI itself may become a copy.
if (MI->isCopyLike())
continue;
+
+ // Try again to see if CSE is possible.
FoundCSE = VNT.count(MI);
}
}
@@ -663,8 +672,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipOptnoneFunction(*MF.getFunction()))
return false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<MachineDominatorTree>();
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
new file mode 100644
index 0000000..2931258
--- /dev/null
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -0,0 +1,435 @@
+//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The machine combiner pass uses machine trace metrics to ensure the combined
+// instructions does not lengthen the critical path or the resource depth.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "machine-combiner"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumInstCombined, "Number of machineinst combined");
+
+namespace {
+class MachineCombiner : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MCSchedModel SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+
+ TargetSchedModel TSchedModel;
+
+ /// OptSize - True if optimizing for code size.
+ bool OptSize;
+
+public:
+ static char ID;
+ MachineCombiner() : MachineFunctionPass(ID) {
+ initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override { return "Machine InstCombiner"; }
+
+private:
+ bool doSubstitute(unsigned NewSize, unsigned OldSize);
+ bool combineInstructions(MachineBasicBlock *);
+ MachineInstr *getOperandDef(const MachineOperand &MO);
+ unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace);
+ unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace);
+ bool
+ preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
+ bool preservesResourceLen(MachineBasicBlock *MBB,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs);
+ void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
+};
+}
+
+char MachineCombiner::ID = 0;
+char &llvm::MachineCombinerID = MachineCombiner::ID;
+
+INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
+ "Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
+ false, false)
+
+void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
+ MachineInstr *DefInstr = nullptr;
+ // We need a virtual register definition.
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ DefInstr = MRI->getUniqueVRegDef(MO.getReg());
+ // PHI's have no depth etc.
+ if (DefInstr && DefInstr->isPHI())
+ DefInstr = nullptr;
+ return DefInstr;
+}
+
+/// getDepth - Computes depth of instructions in vector \InsInstr.
+///
+/// \param InsInstrs is a vector of machine instructions
+/// \param InstrIdxForVirtReg is a dense map of virtual register to index
+/// of defining machine instruction in \p InsInstrs
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Depth of last instruction in \InsInstrs ("NewRoot")
+unsigned
+MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace) {
+
+ SmallVector<unsigned, 16> InstrDepth;
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+
+ // Foreach instruction in in the new sequence compute the depth based on the
+ // operands. Use the trace information when possible. For new operands which
+ // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
+ for (auto *InstrPtr : InsInstrs) { // for each Use
+ unsigned IDepth = 0;
+ DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
+ for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = InstrPtr->getOperand(i);
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isUse())
+ continue;
+ unsigned DepthOp = 0;
+ unsigned LatencyOp = 0;
+ DenseMap<unsigned, unsigned>::iterator II =
+ InstrIdxForVirtReg.find(MO.getReg());
+ if (II != InstrIdxForVirtReg.end()) {
+ // Operand is new virtual register not in trace
+ assert(II->second < InstrDepth.size() && "Bad Index");
+ MachineInstr *DefInstr = InsInstrs[II->second];
+ assert(DefInstr &&
+ "There must be a definition for a new virtual register");
+ DepthOp = InstrDepth[II->second];
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ MachineInstr *DefInstr = getOperandDef(MO);
+ if (DefInstr) {
+ DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ }
+ }
+ IDepth = std::max(IDepth, DepthOp + LatencyOp);
+ }
+ InstrDepth.push_back(IDepth);
+ }
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ return InstrDepth[NewRootIdx];
+}
+
+/// getLatency - Computes instruction latency as max of latency of defined
+/// operands
+///
+/// \param Root is a machine instruction that could be replaced by NewRoot.
+/// It is used to compute a more accurate latency information for NewRoot in
+/// case there is a dependent instruction in the same trace (\p BlockTrace)
+/// \param NewRoot is the instruction for which the latency is computed
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Latency of \p NewRoot
+unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace) {
+
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+
+ // Check each definition in NewRoot and compute the latency
+ unsigned NewRootLatency = 0;
+
+ for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = NewRoot->getOperand(i);
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isDef())
+ continue;
+ // Get the first instruction that uses MO
+ MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
+ RI++;
+ MachineInstr *UseMO = RI->getParent();
+ unsigned LatencyOp = 0;
+ if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
+ LatencyOp = TSchedModel.computeOperandLatency(
+ NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
+ UseMO->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
+ }
+ NewRootLatency = std::max(NewRootLatency, LatencyOp);
+ }
+ return NewRootLatency;
+}
+
+/// preservesCriticalPathlen - True when the new instruction sequence does not
+/// lengthen the critical path. The DAGCombine code sequence ends in MI
+/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
+/// necessary condition for the new sequence to replace the old sequence is that
+/// is cannot lengthen the critical path. This is decided by the formula
+/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
+/// The slack is the number of cycles Root can be delayed before the critical
+/// patch becomes longer.
+bool MachineCombiner::preservesCriticalPathLen(
+ MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+
+ assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+ // NewRoot is the last instruction in the \p InsInstrs vector
+ // Get depth and latency of NewRoot
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ MachineInstr *NewRoot = InsInstrs[NewRootIdx];
+ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
+ unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
+
+ // Get depth, latency and slack of Root
+ unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+ unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
+ unsigned RootSlack = BlockTrace.getInstrSlack(Root);
+
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ dbgs() << " NewRootDepth: " << NewRootDepth
+ << " NewRootLatency: " << NewRootLatency << "\n";
+ dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
+ << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " NewRootDepth + NewRootLatency "
+ << NewRootDepth + NewRootLatency << "\n";
+ dbgs() << " RootDepth + RootLatency + RootSlack "
+ << RootDepth + RootLatency + RootSlack << "\n";);
+
+ /// True when the new sequence does not lenghten the critical path.
+ return ((NewRootDepth + NewRootLatency) <=
+ (RootDepth + RootLatency + RootSlack));
+}
+
+/// helper routine to convert instructions into SC
+void MachineCombiner::instr2instrSC(
+ SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
+ for (auto *InstrPtr : Instrs) {
+ unsigned Opc = InstrPtr->getOpcode();
+ unsigned Idx = TII->get(Opc).getSchedClass();
+ const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
+ InstrsSC.push_back(SC);
+ }
+}
+/// preservesResourceLen - True when the new instructions do not increase
+/// resource length
+bool MachineCombiner::preservesResourceLen(
+ MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs) {
+
+ // Compute current resource length
+
+ //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
+ SmallVector <const MachineBasicBlock *, 1> MBBarr;
+ MBBarr.push_back(MBB);
+ unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
+
+ // Deal with SC rather than Instructions.
+ SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
+ SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
+
+ instr2instrSC(InsInstrs, InsInstrsSC);
+ instr2instrSC(DelInstrs, DelInstrsSC);
+
+ ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
+ ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
+
+ // Compute new resource length
+ unsigned ResLenAfterCombine =
+ BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
+
+ DEBUG(dbgs() << "RESOURCE DATA: \n";
+ dbgs() << " resource len before: " << ResLenBeforeCombine
+ << " after: " << ResLenAfterCombine << "\n";);
+
+ return ResLenAfterCombine <= ResLenBeforeCombine;
+}
+
+/// \returns true when new instruction sequence should be generated
+/// independent if it lenghtens critical path or not
+bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
+ if (OptSize && (NewSize < OldSize))
+ return true;
+ if (!TSchedModel.hasInstrSchedModel())
+ return true;
+ return false;
+}
+
+/// combineInstructions - substitute a slow code sequence with a faster one by
+/// evaluating instruction combining pattern.
+/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
+/// combining based on machine trace metrics. Only combine a sequence of
+/// instructions when this neither lengthens the critical path nor increases
+/// resource pressure. When optimizing for codesize always combine when the new
+/// sequence is shorter.
+bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+
+ auto BlockIter = MBB->begin();
+
+ while (BlockIter != MBB->end()) {
+ auto &MI = *BlockIter++;
+
+ DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
+ SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
+ // The motivating example is:
+ //
+ // MUL Other MUL_op1 MUL_op2 Other
+ // \ / \ | /
+ // ADD/SUB => MADD/MSUB
+ // (=Root) (=NewRoot)
+
+ // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
+ // usually beneficial for code size it unfortunately can hurt performance
+ // when the ADD is on the critical path, but the MUL is not. With the
+ // substitution the MUL becomes part of the critical path (in form of the
+ // MADD) and can lengthen it on architectures where the MADD latency is
+ // longer than the ADD latency.
+ //
+ // For each instruction we check if it can be the root of a combiner
+ // pattern. Then for each pattern the new code sequence in form of MI is
+ // generated and evaluated. When the efficiency criteria (don't lengthen
+ // critical path, don't use more resources) is met the new sequence gets
+ // hooked up into the basic block before the old sequence is removed.
+ //
+ // The algorithm does not try to evaluate all patterns and pick the best.
+ // This is only an artificial restriction though. In practice there is
+ // mostly one pattern and hasPattern() can order patterns based on an
+ // internal cost heuristic.
+
+ if (TII->hasPattern(MI, Pattern)) {
+ for (auto P : Pattern) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
+ TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (!InsInstrs.size())
+ continue;
+ // Substitute when we optimize for codesize and the new sequence has
+ // fewer instructions OR
+ // the new sequence neither lenghten the critical path nor increases
+ // resource pressure.
+ if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
+ (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+ InstrIdxForVirtReg) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator) & MI,
+ (MachineInstr *)InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+ Changed = true;
+ ++NumInstCombined;
+
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
+ // Eagerly stop after the first pattern fired
+ break;
+ } else {
+ // Cleanup instructions of the alternative code sequence. There is no
+ // use for them.
+ for (auto *InstrPtr : InsInstrs) {
+ MachineFunction *MF = MBB->getParent();
+ MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
+ }
+ }
+ InstrIdxForVirtReg.clear();
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &STI =
+ MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ SchedModel = STI.getSchedModel();
+ TSchedModel.init(SchedModel, &STI, TII);
+ MRI = &MF.getRegInfo();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
+
+ OptSize = MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
+ if (!TII->useMachineCombiner()) {
+ DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
+ return false;
+ }
+
+ bool Changed = false;
+
+ // Try to combine instructions.
+ for (auto &MBB : MF)
+ Changed |= combineInstructions(&MBB);
+
+ return Changed;
+}
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 3119a35..cbd6272 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "codegen-cp"
@@ -335,8 +336,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- TRI = MF.getTarget().getRegisterInfo();
- TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
new file mode 100644
index 0000000..0bee846
--- /dev/null
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -0,0 +1,54 @@
+//===- MachineDominanceFrontier.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/Analysis/DominanceFrontierImpl.h"
+#include "llvm/CodeGen/Passes.h"
+
+
+using namespace llvm;
+
+namespace llvm {
+template class DominanceFrontierBase<MachineBasicBlock>;
+template class ForwardDominanceFrontierBase<MachineBasicBlock>;
+}
+
+
+char MachineDominanceFrontier::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+
+MachineDominanceFrontier::MachineDominanceFrontier()
+ : MachineFunctionPass(ID),
+ Base() {
+ initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry());
+}
+
+char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID;
+
+bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Base.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ return false;
+}
+
+void MachineDominanceFrontier::releaseMemory() {
+ Base.releaseMemory();
+}
+
+void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 04c8ecb..df60cf3 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -35,6 +35,8 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ CriticalEdgesToSplit.clear();
+ NewBBs.clear();
DT->recalculate(F);
return false;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 6138aef..8a2b610 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -36,6 +36,7 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "codegen"
@@ -52,17 +53,19 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
}
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
- unsigned FunctionNum, MachineModuleInfo &mmi,
- GCModuleInfo* gmi)
- : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
- if (TM.getRegisterInfo())
- RegInfo = new (Allocator) MachineRegisterInfo(TM);
+ unsigned FunctionNum, MachineModuleInfo &mmi)
+ : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()),
+ MMI(mmi) {
+ if (STI->getRegisterInfo())
+ RegInfo = new (Allocator) MachineRegisterInfo(this);
else
RegInfo = nullptr;
MFInfo = nullptr;
- FrameInfo =
- new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack"));
+ FrameInfo = new (Allocator)
+ MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(),
+ STI->getFrameLowering()->isStackRealignable(),
+ !F->hasFnAttribute("no-realign-stack"));
if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::StackAlignment))
@@ -70,13 +73,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
getStackAlignment(AttributeSet::FunctionIndex));
ConstantPool = new (Allocator) MachineConstantPool(TM);
- Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
+ Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
- TM.getTargetLowering()->getPrefFunctionAlignment());
+ STI->getTargetLowering()->getPrefFunctionAlignment());
FunctionNumber = FunctionNum;
JumpTableInfo = nullptr;
@@ -229,10 +232,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *
MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
uint64_t s, unsigned base_alignment,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
const MDNode *Ranges) {
return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
- TBAAInfo, Ranges);
+ AAInfo, Ranges);
}
MachineMemOperand *
@@ -243,12 +246,12 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachineMemOperand(MachinePointerInfo(MMO->getValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size,
- MMO->getBaseAlignment(), nullptr);
+ MMO->getBaseAlignment());
return new (Allocator)
MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size,
- MMO->getBaseAlignment(), nullptr);
+ MMO->getBaseAlignment());
}
MachineInstr::mmo_iterator
@@ -279,7 +282,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOStore,
(*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getTBAAInfo());
+ (*I)->getAAInfo());
Result[Index] = JustLoad;
}
++Index;
@@ -311,7 +314,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
(*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getTBAAInfo());
+ (*I)->getAAInfo());
Result[Index] = JustStore;
}
++Index;
@@ -350,7 +353,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
// Print Constant Pool
ConstantPool->print(OS);
- const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = getSubtarget().getRegisterInfo();
if (RegInfo && !RegInfo->livein_empty()) {
OS << "Function Live Ins: ";
@@ -459,7 +462,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
/// normal 'L' label is returned.
MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
- const DataLayout *DL = getTarget().getDataLayout();
+ const DataLayout *DL = getSubtarget().getDataLayout();
assert(JumpTableInfo && "No jump tables");
assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
@@ -474,7 +477,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
/// base.
MCSymbol *MachineFunction::getPICBaseSymbol() const {
- const DataLayout *DL = getTarget().getDataLayout();
+ const DataLayout *DL = getSubtarget().getDataLayout();
return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
Twine(getFunctionNumber())+"$pb");
}
@@ -483,15 +486,11 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
// MachineFrameInfo implementation
//===----------------------------------------------------------------------===//
-const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const {
- return TM.getFrameLowering();
-}
-
/// ensureMaxAlignment - Make sure the function is at least Align bytes
/// aligned.
void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
- if (!getFrameLowering()->isStackRealignable() || !RealignOption)
- assert(Align <= getFrameLowering()->getStackAlignment() &&
+ if (!StackRealignable || !RealignOption)
+ assert(Align <= StackAlignment &&
"For targets without stack realignment, Align is out of limit!");
if (MaxAlignment < Align) MaxAlignment = Align;
}
@@ -513,11 +512,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
bool isSS, const AllocaInst *Alloca) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
- Alignment =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Alignment, getFrameLowering()->getStackAlignment());
- Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca));
+ Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
+ Alignment, StackAlignment);
+ Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
+ !isSS));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
ensureMaxAlignment(Alignment);
@@ -530,9 +528,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
///
int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
unsigned Alignment) {
- Alignment = clampStackAlignment(
- !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
- getFrameLowering()->getStackAlignment());
+ Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
+ Alignment, StackAlignment);
CreateStackObject(Size, Alignment, true);
int Index = (int)Objects.size() - NumFixedObjects - 1;
ensureMaxAlignment(Alignment);
@@ -547,10 +544,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
const AllocaInst *Alloca) {
HasVarSizedObjects = true;
- Alignment = clampStackAlignment(
- !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
- getFrameLowering()->getStackAlignment());
- Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca));
+ Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
+ Alignment, StackAlignment);
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
ensureMaxAlignment(Alignment);
return (int)Objects.size()-NumFixedObjects-1;
}
@@ -561,20 +557,18 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
/// index with a negative value.
///
int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable) {
+ bool Immutable, bool isAliased) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
// The alignment of the frame index can be determined from its offset from
// the incoming frame position. If the frame object is at offset 32 and
// the stack is guaranteed to be 16-byte aligned, then we know that the
// object is 16-byte aligned.
- unsigned StackAlign = getFrameLowering()->getStackAlignment();
- unsigned Align = MinAlign(SPOffset, StackAlign);
- Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Align, getFrameLowering()->getStackAlignment());
+ unsigned Align = MinAlign(SPOffset, StackAlignment);
+ Align = clampStackAlignment(!StackRealignable || !RealignOption, Align,
+ StackAlignment);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/ false,
- /*Alloca*/ nullptr));
+ /*Alloca*/ nullptr, isAliased));
return -++NumFixedObjects;
}
@@ -582,15 +576,14 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
/// on the stack. Returns an index with a negative value.
int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
int64_t SPOffset) {
- unsigned StackAlign = getFrameLowering()->getStackAlignment();
- unsigned Align = MinAlign(SPOffset, StackAlign);
- Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Align, getFrameLowering()->getStackAlignment());
+ unsigned Align = MinAlign(SPOffset, StackAlignment);
+ Align = clampStackAlignment(!StackRealignable || !RealignOption, Align,
+ StackAlignment);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
/*Immutable*/ true,
/*isSS*/ true,
- /*Alloca*/ nullptr));
+ /*Alloca*/ nullptr,
+ /*isAliased*/ false));
return -++NumFixedObjects;
}
@@ -600,7 +593,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
const MachineFunction *MF = MBB->getParent();
assert(MF && "MBB must be part of a MachineFunction");
const TargetMachine &TM = MF->getTarget();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
BitVector BV(TRI->getNumRegs());
// Before CSI is calculated, no registers are considered pristine. They can be
@@ -625,8 +618,8 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
}
unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
unsigned MaxAlign = getMaxAlignment();
int Offset = 0;
@@ -676,7 +669,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
- const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
OS << "Frame Objects:\n";
@@ -820,7 +813,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
void MachineConstantPoolValue::anchor() { }
const DataLayout *MachineConstantPool::getDataLayout() const {
- return TM.getDataLayout();
+ return TM.getSubtargetImpl()->getDataLayout();
}
Type *MachineConstantPoolEntry::getType() const {
@@ -836,6 +829,37 @@ unsigned MachineConstantPoolEntry::getRelocationInfo() const {
return Val.ConstVal->getRelocationInfo();
}
+SectionKind
+MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
+ SectionKind Kind;
+ switch (getRelocationInfo()) {
+ default:
+ llvm_unreachable("Unknown section kind");
+ case 2:
+ Kind = SectionKind::getReadOnlyWithRel();
+ break;
+ case 1:
+ Kind = SectionKind::getReadOnlyWithRelLocal();
+ break;
+ case 0:
+ switch (DL->getTypeAllocSize(getType())) {
+ case 4:
+ Kind = SectionKind::getMergeableConst4();
+ break;
+ case 8:
+ Kind = SectionKind::getMergeableConst8();
+ break;
+ case 16:
+ Kind = SectionKind::getMergeableConst16();
+ break;
+ default:
+ Kind = SectionKind::getMergeableConst();
+ break;
+ }
+ }
+ return Kind;
+}
+
MachineConstantPool::~MachineConstantPool() {
for (unsigned i = 0, e = Constants.size(); i != e; ++i)
if (Constants[i].isMachineConstantPoolEntry())
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 46cd60a..f6f34ba 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -46,8 +46,7 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
bool MachineFunctionAnalysis::runOnFunction(Function &F) {
assert(!MF && "MachineFunctionAnalysis already initialized!");
MF = new MachineFunction(&F, TM, NextFnNum++,
- getAnalysis<MachineModuleInfo>(),
- getAnalysisIfAvailable<GCModuleInfo>());
+ getAnalysis<MachineModuleInfo>());
return false;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 5122165..7ad0d94 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -39,6 +39,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -105,23 +106,41 @@ void MachineOperand::setIsDef(bool Val) {
IsDef = Val;
}
+// If this operand is currently a register operand, and if this is in a
+// function, deregister the operand from the register's use/def list.
+void MachineOperand::removeRegFromUses() {
+ if (!isReg() || !isOnRegUseList())
+ return;
+
+ if (MachineInstr *MI = getParent()) {
+ if (MachineBasicBlock *MBB = MI->getParent()) {
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
+ }
+ }
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
- // If this operand is currently a register operand, and if this is in a
- // function, deregister the operand from the register's use/def list.
- if (isReg() && isOnRegUseList())
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent())
- MF->getRegInfo().removeRegOperandFromUseList(this);
+
+ removeRegFromUses();
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
}
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_FPImmediate;
+ Contents.CFP = FPImm;
+}
+
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
@@ -265,7 +284,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (const MachineBasicBlock *MBB = MI->getParent())
if (const MachineFunction *MF = MBB->getParent())
TM = &MF->getTarget();
- const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr;
+ const TargetRegisterInfo *TRI =
+ TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
switch (getType()) {
case MachineOperand::MO_Register:
@@ -429,11 +449,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
uint64_t s, unsigned int a,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
const MDNode *Ranges)
: PtrInfo(ptrinfo), Size(s),
Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
- TBAAInfo(TBAAInfo), Ranges(Ranges) {
+ AAInfo(AAInfo), Ranges(Ranges) {
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) &&
"invalid pointer value");
@@ -514,7 +534,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
OS << "(align=" << MMO.getAlignment() << ")";
// Print TBAA info.
- if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ if (const MDNode *TBAAInfo = MMO.getAAInfo().TBAA) {
OS << "(tbaa=";
if (TBAAInfo->getNumOperands() > 0)
TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
@@ -523,6 +543,34 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
OS << ")";
}
+ // Print AA scope info.
+ if (const MDNode *ScopeInfo = MMO.getAAInfo().Scope) {
+ OS << "(alias.scope=";
+ if (ScopeInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
+ ScopeInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false);
+ if (i != ie-1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print AA noalias scope info.
+ if (const MDNode *NoAliasInfo = MMO.getAAInfo().NoAlias) {
+ OS << "(noalias=";
+ if (NoAliasInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
+ NoAliasInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false);
+ if (i != ie-1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
// Print nontemporal info.
if (MMO.isNonTemporal())
OS << "(nontemporal)";
@@ -865,6 +913,27 @@ void MachineInstr::eraseFromParent() {
getParent()->erase(this);
}
+void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
+ assert(getParent() && "Not embedded in a basic block!");
+ MachineBasicBlock *MBB = getParent();
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Not embedded in a function!");
+
+ MachineInstr *MI = (MachineInstr *)this;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MRI.markUsesInDebugValueAsUndef(Reg);
+ }
+ MI->eraseFromParent();
+}
+
void MachineInstr::eraseFromBundle() {
assert(getParent() && "Not embedded in a basic block!");
getParent()->erase_instr(this);
@@ -1379,7 +1448,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
// If we have an AliasAnalysis, ask it whether the memory is constant.
if (AA && AA->pointsToConstantMemory(
AliasAnalysis::Location(V, (*I)->getSize(),
- (*I)->getTBAAInfo())))
+ (*I)->getAAInfo())))
continue;
}
@@ -1489,8 +1558,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
OS << " = ";
// Print the opcode name.
- if (TM && TM->getInstrInfo())
- OS << TM->getInstrInfo()->getName(getOpcode());
+ if (TM && TM->getSubtargetImpl()->getInstrInfo())
+ OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode());
else
OS << "UNKNOWN";
@@ -1538,17 +1607,17 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
// call instructions much less noisy on targets where calls clobber lots
// of registers. Don't rely on MO.isDead() because we may be called before
// LiveVariables is run, or we may be looking at a non-allocatable reg.
- if (MF && isCall() &&
+ if (MRI && isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- if (MRI.use_empty(Reg)) {
+ if (MRI->use_empty(Reg)) {
bool HasAliasLive = false;
- for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
+ for (MCRegAliasIterator AI(
+ Reg, TM->getSubtargetImpl()->getRegisterInfo(), true);
AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
- if (!MRI.use_empty(AliasReg)) {
+ if (!MRI->use_empty(AliasReg)) {
HasAliasLive = true;
break;
}
@@ -1573,12 +1642,16 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (isDebugValue() && MO.isMetadata()) {
// Pretty print DBG_VALUE instructions.
const MDNode *MD = MO.getMetadata();
- if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
- OS << "!\"" << MDS->getString() << '\"';
+ DIDescriptor DI(MD);
+ DIVariable DIV(MD);
+
+ if (DI.isVariable() && !DIV.getName().empty())
+ OS << "!\"" << DIV.getName() << '\"';
else
MO.print(OS, TM);
} else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
- OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+ OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName(
+ MO.getImm());
} else if (i == AsmDescOp && MO.isImm()) {
// Pretty print the inline asm operand descriptor.
OS << '$' << AsmOpCount++;
@@ -1595,9 +1668,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
unsigned RCID = 0;
if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
- if (TM)
- OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName();
- else
+ if (TM) {
+ const TargetRegisterInfo *TRI =
+ TM->getSubtargetImpl()->getRegisterInfo();
+ OS << ':'
+ << TRI->getRegClassName(TRI->getRegClass(RCID));
+ } else
OS << ":RC" << RCID;
}
@@ -1646,7 +1722,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (!HaveSemi) OS << ";"; HaveSemi = true;
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
+ OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC)
+ << ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClass(VirtRegs[j]) != RC) {
++j;
@@ -1672,6 +1749,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
OS << " ]";
}
}
+ if (isIndirectDebugValue())
+ OS << " indirect";
} else if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
OS << " dbg:";
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 962169e..0690f08 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -16,6 +16,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
namespace {
@@ -103,12 +104,12 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
assert(FirstMI != LastMI && "Empty bundle?");
MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
- const TargetMachine &TM = MBB.getParent()->getTarget();
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(),
- TII->get(TargetOpcode::BUNDLE));
+ MachineInstrBuilder MIB =
+ BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE));
Bundle.prepend(MIB);
SmallVector<unsigned, 32> LocalDefs;
@@ -140,7 +141,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
// Internal def is now killed.
KilledDefSet.insert(Reg);
} else {
- if (ExternUseSet.insert(Reg)) {
+ if (ExternUseSet.insert(Reg).second) {
ExternUses.push_back(Reg);
if (MO.isUndef())
UndefUseSet.insert(Reg);
@@ -157,7 +158,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!Reg)
continue;
- if (LocalDefSet.insert(Reg)) {
+ if (LocalDefSet.insert(Reg).second) {
LocalDefs.push_back(Reg);
if (MO.isDead()) {
DeadDefSet.insert(Reg);
@@ -173,7 +174,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (!MO.isDead()) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
- if (LocalDefSet.insert(SubReg))
+ if (LocalDefSet.insert(SubReg).second)
LocalDefs.push_back(SubReg);
}
}
@@ -185,7 +186,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
SmallSet<unsigned, 32> Added;
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
- if (Added.insert(Reg)) {
+ if (Added.insert(Reg).second) {
// If it's not live beyond end of the bundle, mark it dead.
bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 94cdab5..2ab0467 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -39,6 +39,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "machine-licm"
@@ -61,7 +62,6 @@ STATISTIC(NumPostRAHoisted,
namespace {
class MachineLICM : public MachineFunctionPass {
- const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetLoweringBase *TLI;
const TargetRegisterInfo *TRI;
@@ -142,9 +142,6 @@ namespace {
RegPressure.clear();
RegLimit.clear();
BackTrace.clear();
- for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
- CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
- CI->second.clear();
CSEMap.clear();
}
@@ -324,13 +321,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
return false;
Changed = FirstInLoop = false;
- TM = &MF.getTarget();
- TII = TM->getInstrInfo();
- TLI = TM->getTargetLowering();
- TRI = TM->getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
+ TRI = MF.getSubtarget().getRegisterInfo();
MFI = MF.getFrameInfo();
MRI = &MF.getRegInfo();
- InstrItins = TM->getInstrItineraryData();
+ InstrItins = MF.getSubtarget().getInstrItineraryData();
PreRegAlloc = MRI->isSSA();
@@ -822,7 +818,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- bool isNew = RegSeen.insert(Reg);
+ bool isNew = RegSeen.insert(Reg).second;
unsigned RCId, RCCost;
getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
if (MO.isDef())
@@ -854,7 +850,7 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- bool isNew = RegSeen.insert(Reg);
+ bool isNew = RegSeen.insert(Reg).second;
if (MO.isDef())
Defs.push_back(Reg);
else if (!isNew && isOperandKill(MO, MRI)) {
@@ -1299,15 +1295,7 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
- CI = CSEMap.find(Opcode);
- if (CI != CSEMap.end())
- CI->second.push_back(MI);
- else {
- std::vector<const MachineInstr*> CSEMIs;
- CSEMIs.push_back(MI);
- CSEMap.insert(std::make_pair(Opcode, CSEMIs));
- }
+ CSEMap[Opcode].push_back(MI);
}
}
@@ -1447,11 +1435,8 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Add to the CSE map.
if (CI != CSEMap.end())
CI->second.push_back(MI);
- else {
- std::vector<const MachineInstr*> CSEMIs;
- CSEMIs.push_back(MI);
- CSEMap.insert(std::make_pair(Opcode, CSEMIs));
- }
+ else
+ CSEMap[Opcode].push_back(MI);
}
++NumHoisted;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 4976e35..eb3c0bf 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -270,7 +270,6 @@ MachineModuleInfo::~MachineModuleInfo() {
bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
- CompactUnwindEncoding = 0;
CurCallSite = 0;
CallsEHReturn = 0;
CallsUnwindInit = 0;
@@ -312,7 +311,6 @@ void MachineModuleInfo::EndFunction() {
FilterEnds.clear();
CallsEHReturn = 0;
CallsUnwindInit = 0;
- CompactUnwindEncoding = 0;
VariableDbgInfos.clear();
}
@@ -429,7 +427,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
///
void MachineModuleInfo::
addCatchTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalVariable *> TyInfo) {
+ ArrayRef<const GlobalValue *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
for (unsigned N = TyInfo.size(); N; --N)
LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
@@ -439,7 +437,7 @@ addCatchTypeInfo(MachineBasicBlock *LandingPad,
///
void MachineModuleInfo::
addFilterTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalVariable *> TyInfo) {
+ ArrayRef<const GlobalValue *> TyInfo) {
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
std::vector<unsigned> IdsInFilter(TyInfo.size());
for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
@@ -508,7 +506,7 @@ void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
/// getTypeIDFor - Return the type id for the specified typeinfo. This is
/// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) {
for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
if (TypeInfos[i] == TI) return i + 1;
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
new file mode 100644
index 0000000..5a5035e
--- /dev/null
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -0,0 +1,140 @@
+
+#include "llvm/CodeGen/MachineRegionInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
+
+#define DEBUG_TYPE "region"
+
+using namespace llvm;
+
+STATISTIC(numMachineRegions, "The # of machine regions");
+STATISTIC(numMachineSimpleRegions, "The # of simple machine regions");
+
+namespace llvm {
+template class RegionBase<RegionTraits<MachineFunction>>;
+template class RegionNodeBase<RegionTraits<MachineFunction>>;
+template class RegionInfoBase<RegionTraits<MachineFunction>>;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineRegion implementation
+//
+
+MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
+ MachineRegionInfo* RI,
+ MachineDominatorTree *DT, MachineRegion *Parent) :
+ RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {
+
+}
+
+MachineRegion::~MachineRegion() { }
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfo implementation
+//
+
+MachineRegionInfo::MachineRegionInfo() :
+ RegionInfoBase<RegionTraits<MachineFunction>>() {
+
+}
+
+MachineRegionInfo::~MachineRegionInfo() {
+
+}
+
+void MachineRegionInfo::updateStatistics(MachineRegion *R) {
+ ++numMachineRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple())
+ ++numMachineSimpleRegions;
+}
+
+void MachineRegionInfo::recalculate(MachineFunction &F,
+ MachineDominatorTree *DT_,
+ MachinePostDominatorTree *PDT_,
+ MachineDominanceFrontier *DF_) {
+ DT = DT_;
+ PDT = PDT_;
+ DF = DF_;
+
+ MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&F);
+
+ TopLevelRegion = new MachineRegion(Entry, nullptr, this, DT, nullptr);
+ updateStatistics(TopLevelRegion);
+ calculate(F);
+}
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfoPass implementation
+//
+
+MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) {
+ initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+MachineRegionInfoPass::~MachineRegionInfoPass() {
+
+}
+
+bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
+ releaseMemory();
+
+ auto DT = &getAnalysis<MachineDominatorTree>();
+ auto PDT = &getAnalysis<MachinePostDominatorTree>();
+ auto DF = &getAnalysis<MachineDominanceFrontier>();
+
+ RI.recalculate(F, DT, PDT, DF);
+ return false;
+}
+
+void MachineRegionInfoPass::releaseMemory() {
+ RI.releaseMemory();
+}
+
+void MachineRegionInfoPass::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (MachineRegionInfo::VerifyRegionInfo)
+ RI.verifyAnalysis();
+}
+
+void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+}
+
+void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
+ RI.print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineRegionInfoPass::dump() const {
+ RI.dump();
+}
+#endif
+
+char MachineRegionInfoPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions",
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(MachineRegionInfoPass, "regions",
+ "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createMachineRegionInfoPass() {
+ return new MachineRegionInfoPass();
+ }
+}
+
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index f560259..e9612f3 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -16,28 +16,22 @@
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
// Pin the vtable to this file.
void MachineRegisterInfo::Delegate::anchor() {}
-MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM)
- : TM(TM), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) {
+MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
+ : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
// Create the physreg use/def lists.
- PhysRegUseDefLists =
- new MachineOperand*[getTargetRegisterInfo()->getNumRegs()];
- memset(PhysRegUseDefLists, 0,
- sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs());
-}
-
-MachineRegisterInfo::~MachineRegisterInfo() {
- delete [] PhysRegUseDefLists;
+ PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr);
}
/// setRegClass - Set the register class of the specified virtual register.
@@ -67,7 +61,7 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
bool
MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterClass *NewRC =
getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
@@ -283,18 +277,25 @@ void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
+/// If ToReg is a physical register we apply the sub register to obtain the
+/// final/proper physical register.
void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
assert(FromReg != ToReg && "Cannot replace a reg with itself");
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+
// TODO: This could be more efficient by bulk changing the operands.
for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
MachineOperand &O = *I;
++I;
- O.setReg(ToReg);
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ O.substPhysReg(ToReg, *TRI);
+ } else {
+ O.setReg(ToReg);
+ }
}
}
-
/// getVRegDef - Return the machine instr that defines the specified virtual
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index d9173a2..71a6eba 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -24,8 +24,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
using namespace llvm;
@@ -39,7 +39,7 @@ static AvailableValsTy &getAvailableVals(void *AV) {
MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
SmallVectorImpl<MachineInstr*> *NewPHI)
: AV(nullptr), InsertedPHIs(NewPHI) {
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 44191f7..261942f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -40,6 +40,9 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::desc("Force bottom-up list scheduling"));
+cl::opt<bool>
+DumpCriticalPathLength("misched-dcpl", cl::Hidden,
+ cl::desc("Print critical path length to stdout"));
}
#ifndef NDEBUG
@@ -378,7 +381,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
/// Main driver for both MachineScheduler and PostMachineScheduler.
void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
bool IsPostRA = Scheduler.isPostRA();
// Visit all machine basic blocks.
@@ -451,6 +454,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
else dbgs() << "End";
dbgs() << " RegionInstrs: " << NumRegionInstrs
<< " Remaining: " << RemainingInstrs << "\n");
+ if (DumpCriticalPathLength) {
+ errs() << MF->getName();
+ errs() << ":BB# " << MBB->getNumber();
+ errs() << " " << MBB->getName() << " \n";
+ }
// Schedule a region: possibly reorder instructions.
// This invalidates 'RegionEnd' and 'I'.
@@ -2355,14 +2363,15 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
- const TargetMachine &TM = DAG->MF.getTarget();
if (!Top.HazardRec) {
Top.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
}
if (!Bot.HazardRec) {
Bot.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
}
}
@@ -2370,8 +2379,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
- const TargetMachine &TM = Context->MF->getTarget();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const MachineFunction &MF = *Begin->getParent()->getParent();
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
// Avoid setting up the register pressure tracker for small regions to save
// compile time. As a rough heuristic, only track pressure when the number of
@@ -2391,8 +2400,8 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
RegionPolicy.OnlyBottomUp = true;
// Allow the subtarget to override default policy.
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
+ NumRegionInstrs);
// After subtarget overrides, apply command line options.
if (!EnableRegPressure)
@@ -2460,7 +2469,10 @@ void GenericScheduler::registerRoots() {
if ((*I)->getDepth() > Rem.CriticalPath)
Rem.CriticalPath = (*I)->getDepth();
}
- DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+ DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
+ }
if (EnableCyclicPath) {
Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
@@ -2482,8 +2494,8 @@ static bool tryPressure(const PressureChange &TryP,
}
// If one candidate decreases and the other increases, go with it.
// Invalid candidates have UnitInc==0.
- if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
- Reason)) {
+ if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
+ Reason)) {
return true;
}
// If the candidates are decreasing pressure, reverse priority.
@@ -2885,10 +2897,10 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
// Initialize the HazardRecognizers. If itineraries don't exist, are empty,
// or are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
- const TargetMachine &TM = DAG->MF.getTarget();
if (!Top.HazardRec) {
Top.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
}
}
@@ -2902,7 +2914,10 @@ void PostGenericScheduler::registerRoots() {
if ((*I)->getDepth() > Rem.CriticalPath)
Rem.CriticalPath = (*I)->getDepth();
}
- DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+ DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
+ }
}
/// Apply a set of heursitics to a new candidate for PostRA scheduling.
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 0ae495c..ba25bca 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -17,18 +17,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "machine-sink"
@@ -38,6 +41,12 @@ SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+UseBlockFreqInfo("machine-sink-bfi",
+ cl::desc("Use block frequency info to find successors to sink"),
+ cl::init(true), cl::Hidden);
+
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -46,14 +55,20 @@ namespace {
class MachineSinking : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI; // Machine register information
- MachineDominatorTree *DT; // Machine dominator tree
+ MachineRegisterInfo *MRI; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree
+ MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
+ const MachineBlockFrequencyInfo *MBFI;
AliasAnalysis *AA;
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
CEBCandidates;
+ // Remember which edges we are about to split.
+ // This is different from CEBCandidates since those edges
+ // will be split.
+ SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
public:
static char ID; // Pass identification
@@ -68,9 +83,13 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
+ if (UseBlockFreqInfo)
+ AU.addRequired<MachineBlockFrequencyInfo>();
}
void releaseMemory() override {
@@ -82,10 +101,22 @@ namespace {
bool isWorthBreakingCriticalEdge(MachineInstr *MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
- MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
- MachineBasicBlock *From,
- MachineBasicBlock *To,
- bool BreakPHIEdge);
+ /// \brief Postpone the splitting of the given critical
+ /// edge (\p From, \p To).
+ ///
+ /// We do not split the edges on the fly. Indeed, this invalidates
+ /// the dominance information and thus triggers a lot of updates
+ /// of that information underneath.
+ /// Instead, we postpone all the splits after each iteration of
+ /// the main loop. That way, the information is at least valid
+ /// for the lifetime of an iteration.
+ ///
+ /// \return True if the edge is marked as toSplit, false otherwise.
+ /// False can be retruned if, for instance, this is not profitable.
+ bool PostponeSplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
bool SinkInstruction(MachineInstr *MI, bool &SawStore);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
@@ -135,6 +166,11 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
DEBUG(dbgs() << "*** to: " << *MI);
MRI->replaceRegWith(DstReg, SrcReg);
MI->eraseFromParent();
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ MRI->clearKillFlags(SrcReg);
+
++NumCoalesces;
return true;
}
@@ -213,12 +249,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "******** Machine Sinking ********\n");
- const TargetMachine &TM = MF.getTarget();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
+ MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
bool EverMadeChange = false;
@@ -228,10 +265,24 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// Process all basic blocks.
CEBCandidates.clear();
+ ToSplit.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
+ // If we have anything we marked as toSplit, split it now.
+ for (auto &Pair : ToSplit) {
+ auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, this);
+ if (NewSucc != nullptr) {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << Pair.first->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << Pair.second->getNumber() << '\n');
+ MadeChange = true;
+ ++NumSplit;
+ } else
+ DEBUG(dbgs() << " *** Not legal to break critical edge\n");
+ }
// If this iteration over the code changed anything, keep iterating.
if (!MadeChange) break;
EverMadeChange = true;
@@ -289,7 +340,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
// If the pass has already considered breaking this edge (during this pass
// through the function), then let's go ahead and break it. This means
// sinking multiple "cheap" instructions into the same block.
- if (!CEBCandidates.insert(std::make_pair(From, To)))
+ if (!CEBCandidates.insert(std::make_pair(From, To)).second)
return true;
if (!MI->isCopy() && !TII->isAsCheapAsAMove(MI))
@@ -328,21 +379,21 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
return false;
}
-MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
- MachineBasicBlock *FromBB,
- MachineBasicBlock *ToBB,
- bool BreakPHIEdge) {
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
- return nullptr;
+ return false;
// Avoid breaking back edge. From == To means backedge for single BB loop.
if (!SplitEdges || FromBB == ToBB)
- return nullptr;
+ return false;
// Check for backedges of more "complex" loops.
if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
LI->isLoopHeader(ToBB))
- return nullptr;
+ return false;
// It's not always legal to break critical edges and sink the computation
// to the edge.
@@ -389,11 +440,13 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
if (*PI == FromBB)
continue;
if (!DT->dominates(ToBB, *PI))
- return nullptr;
+ return false;
}
}
- return FromBB->SplitCriticalEdge(ToBB, this);
+ ToSplit.insert(std::make_pair(FromBB, ToBB));
+
+ return true;
}
static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
@@ -419,23 +472,6 @@ static void collectDebugValues(MachineInstr *MI,
}
}
-/// isPostDominatedBy - Return true if A is post dominated by B.
-static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
-
- // FIXME - Use real post dominator.
- if (A->succ_size() != 2)
- return false;
- MachineBasicBlock::succ_iterator I = A->succ_begin();
- if (B == *I)
- ++I;
- MachineBasicBlock *OtherSuccBlock = *I;
- if (OtherSuccBlock->succ_size() != 1 ||
- *(OtherSuccBlock->succ_begin()) != B)
- return false;
-
- return true;
-}
-
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -447,8 +483,13 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
return false;
// It is profitable if SuccToSinkTo does not post dominate current block.
- if (!isPostDominatedBy(MBB, SuccToSinkTo))
- return true;
+ if (!PDT->dominates(SuccToSinkTo, MBB))
+ return true;
+
+ // It is profitable to sink an instruction from a deeper loop to a shallower
+ // loop, even if the latter post-dominates the former (PR21115).
+ if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
+ return true;
// Check if only use in post dominated block is PHI instruction.
bool NonPHIUse = false;
@@ -539,14 +580,20 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
}
// Otherwise, we should look at all the successors and decide which one
- // we should sink to.
- // We give successors with smaller loop depth higher priority.
- SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
- // Sort Successors according to their loop depth.
+ // we should sink to. If we have reliable block frequency information
+ // (frequency != 0) available, give successors with smaller frequencies
+ // higher priority, otherwise prioritize smaller loop depths.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+ // Sort Successors according to their loop depth or block frequency info.
std::stable_sort(
Succs.begin(), Succs.end(),
- [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) {
- return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
+ uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
+ bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
+ return HasBlockFreq ? LHSFreq < RHSFreq
+ : LI->getLoopDepth(L) < LI->getLoopDepth(R);
});
for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
E = Succs.end(); SI != E; ++SI) {
@@ -655,21 +702,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (!TryBreak)
DEBUG(dbgs() << "Sinking along critical edge.\n");
else {
- MachineBasicBlock *NewSucc =
- SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
- if (!NewSucc) {
+ // Mark this edge as to be split.
+ // If the edge can actually be split, the next iteration of the main loop
+ // will sink MI in the newly created block.
+ bool Status =
+ PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
- "break critical edge\n");
- return false;
- } else {
- DEBUG(dbgs() << " *** Splitting critical edge:"
- " BB#" << ParentBlock->getNumber()
- << " -- BB#" << NewSucc->getNumber()
- << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
- SuccToSinkTo = NewSucc;
- ++NumSplit;
- BreakPHIEdge = false;
- }
+ "break critical edge\n");
+ // The instruction will not be sunk this time.
+ return false;
}
}
@@ -677,20 +719,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// BreakPHIEdge is true if all the uses are in the successor MBB being
// sunken into and they are all PHI nodes. In this case, machine-sink must
// break the critical edge first.
- MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
- SuccToSinkTo, BreakPHIEdge);
- if (!NewSucc) {
+ bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
"break critical edge\n");
- return false;
- }
-
- DEBUG(dbgs() << " *** Splitting critical edge:"
- " BB#" << ParentBlock->getNumber()
- << " -- BB#" << NewSucc->getNumber()
- << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
- SuccToSinkTo = NewSucc;
- ++NumSplit;
+ // The instruction will not be sunk this time.
+ return false;
}
// Determine where to insert into. Skip phi nodes.
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 1bbf0ad..2cf87eb 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -52,13 +52,13 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
- TII = MF->getTarget().getInstrInfo();
- TRI = MF->getTarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
MRI = &MF->getRegInfo();
Loops = &getAnalysis<MachineLoopInfo>();
const TargetSubtargetInfo &ST =
MF->getTarget().getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
BlockInfo.resize(MF->getNumBlockIDs());
ProcResourceCycles.resize(MF->getNumBlockIDs() *
SchedModel.getNumProcResourceKinds());
@@ -135,8 +135,7 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
"getResources() must be called before getProcResourceCycles()");
unsigned PRKinds = SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
- return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
- PRKinds);
+ return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
}
@@ -256,8 +255,7 @@ MachineTraceMetrics::Ensemble::
getProcResourceDepths(unsigned MBBNum) const {
unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
- return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
- PRKinds);
+ return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
}
/// Get an array of processor resource heights for MBB. Indexed by processor
@@ -270,8 +268,7 @@ MachineTraceMetrics::Ensemble::
getProcResourceHeights(unsigned MBBNum) const {
unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
- return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
- PRKinds);
+ return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
}
//===----------------------------------------------------------------------===//
@@ -452,7 +449,7 @@ public:
}
// To is a new block. Mark the block as visited in case the CFG has cycles
// that MachineLoopInfo didn't recognize as a natural loop.
- return LB.Visited.insert(To);
+ return LB.Visited.insert(To).second;
}
};
}
@@ -1169,6 +1166,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
return DepCycle;
}
+/// When bottom is set include instructions in current block in estimate.
unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
// Find the limiting processor resource.
// Numbers have been pre-scaled to be comparable.
@@ -1185,7 +1183,9 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
+ /// All instructions before current block
unsigned Instrs = TBI.InstrDepth;
+ // plus instructions in current block
if (Bottom)
Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
@@ -1194,44 +1194,72 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
return std::max(Instrs, PRMax);
}
-
-unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
- ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
+unsigned MachineTraceMetrics::Trace::getResourceLength(
+ ArrayRef<const MachineBasicBlock *> Extrablocks,
+ ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
+ ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
// Add up resources above and below the center block.
ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
unsigned PRMax = 0;
- for (unsigned K = 0; K != PRDepths.size(); ++K) {
- unsigned PRCycles = PRDepths[K] + PRHeights[K];
- for (unsigned I = 0; I != Extrablocks.size(); ++I)
- PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
- for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
- const MCSchedClassDesc* SC = ExtraInstrs[I];
+
+ // Capture computing cycles from extra instructions
+ auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
+ unsigned ResourceIdx)
+ ->unsigned {
+ unsigned Cycles = 0;
+ for (unsigned I = 0; I != Instrs.size(); ++I) {
+ const MCSchedClassDesc *SC = Instrs[I];
if (!SC->isValid())
continue;
for (TargetSchedModel::ProcResIter
- PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
- PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
- if (PI->ProcResourceIdx != K)
+ PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+ PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (PI->ProcResourceIdx != ResourceIdx)
continue;
- PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
+ Cycles +=
+ (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
}
}
+ return Cycles;
+ };
+
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (unsigned I = 0; I != Extrablocks.size(); ++I)
+ PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+ PRCycles += extraCycles(ExtraInstrs, K);
+ PRCycles -= extraCycles(RemoveInstrs, K);
PRMax = std::max(PRMax, PRCycles);
}
// Convert to cycle count.
PRMax = TE.MTM.getCycles(PRMax);
+ // Instrs: #instructions in current trace outside current block.
unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ // Add instruction count from the extra blocks.
for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ Instrs += ExtraInstrs.size();
+ Instrs -= RemoveInstrs.size();
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
Instrs /= IW;
// Assume issue width 1 without a schedule model.
return std::max(Instrs, PRMax);
}
+bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
+ const MachineInstr *UseMI) const {
+ if (DefMI->getParent() == UseMI->getParent())
+ return true;
+
+ const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
+ const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
+
+ return DepTBI.isUsefulDominator(TBI);
+}
+
void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
OS << getName() << " ensemble:\n";
for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 8515b0f..99f0583 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -46,6 +46,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
namespace {
@@ -214,9 +215,9 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB,
const LiveInterval &LI);
void report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR);
+ const LiveRange &LR, unsigned Reg);
void report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR);
+ const LiveRange &LR, unsigned Reg);
void verifyInlineAsm(const MachineInstr *MI);
@@ -275,11 +276,12 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
raw_ostream *OutFile = nullptr;
if (OutFileName) {
- std::string ErrorInfo;
- OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+ std::error_code EC;
+ OutFile = new raw_fd_ostream(OutFileName, EC,
sys::fs::F_Append | sys::fs::F_Text);
- if (!ErrorInfo.empty()) {
- errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+ if (EC) {
+ errs() << "Error opening '" << OutFileName << "': " << EC.message()
+ << '\n';
exit(1);
}
@@ -292,8 +294,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
TM = &MF.getTarget();
- TII = TM->getInstrInfo();
- TRI = TM->getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
LiveVars = nullptr;
@@ -430,15 +432,17 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
- const LiveRange &LR) {
+ const LiveRange &LR, unsigned Reg) {
report(msg, MBB);
- *OS << "- liverange: " << LR << "\n";
+ *OS << "- liverange: " << LR << '\n';
+ *OS << "- register: " << PrintReg(Reg, TRI) << '\n';
}
void MachineVerifier::report(const char *msg, const MachineFunction *MF,
- const LiveRange &LR) {
+ const LiveRange &LR, unsigned Reg) {
report(msg, MF);
- *OS << "- liverange: " << LR << "\n";
+ *OS << "- liverange: " << LR << '\n';
+ *OS << "- register: " << PrintReg(Reg, TRI) << '\n';
}
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -905,7 +909,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (!DRC->contains(Reg)) {
report("Illegal physical register for instruction", MO, MONum);
*OS << TRI->getName(Reg) << " is not a "
- << DRC->getName() << " register.\n";
+ << TRI->getRegClassName(DRC) << " register.\n";
}
}
} else {
@@ -916,13 +920,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
TRI->getSubClassWithSubReg(RC, SubIdx);
if (!SRC) {
report("Invalid subregister index for virtual register", MO, MONum);
- *OS << "Register class " << RC->getName()
+ *OS << "Register class " << TRI->getRegClassName(RC)
<< " does not support subreg index " << SubIdx << "\n";
return;
}
if (RC != SRC) {
report("Invalid register class for subregister index", MO, MONum);
- *OS << "Register class " << RC->getName()
+ *OS << "Register class " << TRI->getRegClassName(RC)
<< " does not fully support subreg index " << SubIdx << "\n";
return;
}
@@ -944,8 +948,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
if (!RC->hasSuperClassEq(DRC)) {
report("Illegal virtual register for instruction", MO, MONum);
- *OS << "Expected a " << DRC->getName() << " register, but got a "
- << RC->getName() << " register\n";
+ *OS << "Expected a " << TRI->getRegClassName(DRC)
+ << " register, but got a " << TRI->getRegClassName(RC)
+ << " register\n";
}
}
}
@@ -1357,13 +1362,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
if (!DefVNI) {
- report("Valno not live at def and not marked unused", MF, LR);
+ report("Valno not live at def and not marked unused", MF, LR, Reg);
*OS << "Valno #" << VNI->id << '\n';
return;
}
if (DefVNI != VNI) {
- report("Live segment at def has different valno", MF, LR);
+ report("Live segment at def has different valno", MF, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " where valno #" << DefVNI->id << " is live\n";
return;
@@ -1371,7 +1376,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
if (!MBB) {
- report("Invalid definition index", MF, LR);
+ report("Invalid definition index", MF, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< " in " << LR << '\n';
return;
@@ -1379,7 +1384,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
if (VNI->isPHIDef()) {
if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MBB, LR);
+ report("PHIDef value is not defined at MBB start", MBB, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def
<< ", not at the beginning of BB#" << MBB->getNumber() << '\n';
}
@@ -1389,7 +1394,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
// Non-PHI def.
const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
if (!MI) {
- report("No instruction at def index", MBB, LR);
+ report("No instruction at def index", MBB, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
return;
}
@@ -1422,12 +1427,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
// DEF slots.
if (isEarlyClobber) {
if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MBB, LR);
+ report("Early clobber def must be at an early-clobber slot", MBB, LR,
+ Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
} else if (!VNI->def.isRegister()) {
report("Non-PHI, non-early clobber def must be at a register slot",
- MBB, LR);
+ MBB, LR, Reg);
*OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
}
}
@@ -1441,31 +1447,31 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
assert(VNI && "Live segment has no valno");
if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
- report("Foreign valno in live segment", MF, LR);
+ report("Foreign valno in live segment", MF, LR, Reg);
*OS << S << " has a bad valno\n";
}
if (VNI->isUnused()) {
- report("Live segment valno is marked unused", MF, LR);
+ report("Live segment valno is marked unused", MF, LR, Reg);
*OS << S << '\n';
}
const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
if (!MBB) {
- report("Bad start of live segment, no basic block", MF, LR);
+ report("Bad start of live segment, no basic block", MF, LR, Reg);
*OS << S << '\n';
return;
}
SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
if (S.start != MBBStartIdx && S.start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB, LR);
+ report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg);
*OS << S << '\n';
}
const MachineBasicBlock *EndMBB =
LiveInts->getMBBFromIndex(S.end.getPrevSlot());
if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF, LR);
+ report("Bad end of live segment, no basic block", MF, LR, Reg);
*OS << S << '\n';
return;
}
@@ -1483,14 +1489,14 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const MachineInstr *MI =
LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB, LR);
+ report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg);
*OS << S << '\n';
return;
}
// The block slot must refer to a basic block boundary.
if (S.end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB, LR);
+ report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg);
*OS << S << '\n';
}
@@ -1498,7 +1504,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Segment ends on the dead slot.
// That means there must be a dead def.
if (!SlotIndex::isSameInstr(S.start, S.end)) {
- report("Live segment ending at dead slot spans instructions", EndMBB, LR);
+ report("Live segment ending at dead slot spans instructions", EndMBB, LR,
+ Reg);
*OS << S << '\n';
}
}
@@ -1508,7 +1515,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB, LR);
+ "redefined by an EC def in the same instruction", EndMBB, LR, Reg);
*OS << S << '\n';
}
}
@@ -1566,7 +1573,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// All predecessors must have a live-out value.
if (!PVNI) {
- report("Register not marked live out of predecessor", *PI, LR);
+ report("Register not marked live out of predecessor", *PI, LR, Reg);
*OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
<< '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
<< PEnd << '\n';
@@ -1575,7 +1582,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Only PHI-defs can take different predecessor values.
if (!IsPHI && PVNI != VNI) {
- report("Different value live out of predecessor", *PI, LR);
+ report("Different value live out of predecessor", *PI, LR, Reg);
*OS << "Valno #" << PVNI->id << " live out of BB#"
<< (*PI)->getNumber() << '@' << PEnd
<< "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 95a2934..a1042e7 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "phi-opt"
@@ -66,7 +67,7 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
return false;
MRI = &Fn.getRegInfo();
- TII = Fn.getTarget().getInstrInfo();
+ TII = Fn.getSubtarget().getInstrInfo();
// Find dead PHI cycles and PHI cycles that can be replaced by a single
// value. InstCombine does these optimizations, but DAG legalization may
@@ -91,7 +92,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
unsigned DstReg = MI->getOperand(0).getReg();
// See if we already saw this register.
- if (!PHIsInCycle.insert(MI))
+ if (!PHIsInCycle.insert(MI).second)
return true;
// Don't scan crazily complex things.
@@ -136,7 +137,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
"PHI destination is not a virtual register");
// See if we already saw this register.
- if (!PHIsInCycle.insert(MI))
+ if (!PHIsInCycle.insert(MI).second)
return true;
// Don't scan crazily complex things.
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index c8d0819..def2e3d 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -30,7 +30,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -150,9 +150,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
Changed |= EliminatePHINodes(MF, *I);
// Remove dead IMPLICIT_DEF instructions.
- for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
- E = ImpDefs.end(); I != E; ++I) {
- MachineInstr *DefMI = *I;
+ for (MachineInstr *DefMI : ImpDefs) {
unsigned DefReg = DefMI->getOperand(0).getReg();
if (MRI->use_nodbg_empty(DefReg)) {
if (LIS)
@@ -240,7 +238,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Insert a register to register copy at the top of the current block (but
// after any remaining phi nodes) which copies the new incoming register
// into the phi node destination.
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
if (isSourceDefinedByImplicitDef(MPhi, MRI))
// If all sources of a PHI node are implicit_def, just emit an
// implicit_def instead of a copy.
@@ -369,7 +367,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Check to make sure we haven't already emitted the copy for this block.
// This can happen because PHI nodes may have multiple entries for the same
// basic block.
- if (!MBBsInsertedInto.insert(&opBlock))
+ if (!MBBsInsertedInto.insert(&opBlock).second)
continue; // If the copy has already been emitted, we're done.
// Find a safe location to insert the copy, this may be the first terminator
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
index 48234ae..b997d7a 100644
--- a/lib/CodeGen/PHIEliminationUtils.h
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
-#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#ifndef LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 249b2d0..ec71d86 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
using namespace llvm;
@@ -71,6 +72,8 @@ static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
cl::desc("Disable Codegen Prepare"));
static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining",
+ cl::Hidden, cl::desc("Disable Partial Libcall Inlining"));
static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
cl::desc("Print LLVM IR produced by the loop-reduce pass"));
static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
@@ -97,6 +100,10 @@ static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
+static cl::opt<bool> UseCFLAA("use-cfl-aa-in-codegen",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -374,7 +381,10 @@ void TargetPassConfig::addIRPasses() {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
+ if (UseCFLAA)
+ addPass(createCFLAliasAnalysisPass());
addPass(createTypeBasedAliasAnalysisPass());
+ addPass(createScopedNoAliasAAPass());
addPass(createBasicAliasAnalysisPass());
// Before running any passes, run the verifier to determine if the input
@@ -399,6 +409,9 @@ void TargetPassConfig::addIRPasses() {
// Prepare expensive constants for SelectionDAG.
if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
addPass(createConstantHoistingPass());
+
+ if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
+ addPass(createPartiallyInlineLibCallsPass());
}
/// Turn exception handling constructs into something the code generators can
@@ -416,7 +429,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// FALLTHROUGH
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- case ExceptionHandling::WinEH:
+ case ExceptionHandling::ItaniumWinEH:
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
@@ -433,6 +446,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
void TargetPassConfig::addCodeGenPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
addPass(createCodeGenPreparePass(TM));
+ addPass(createRewriteSymbolsPass());
}
/// Add common passes that perform LLVM IR to IR transforms in preparation for
@@ -601,6 +615,9 @@ void TargetPassConfig::addMachineSSAOptimization() {
printAndVerify("After Machine LICM, CSE and Sinking passes");
addPass(&PeepholeOptimizerID);
+ // Clean-up the dead code that may have been generated by peephole
+ // rewriting.
+ addPass(&DeadMachineInstructionElimID);
printAndVerify("After codegen peephole optimization pass");
}
@@ -675,6 +692,12 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
+/// Return true if the default global register allocator is in use and
+/// has not be overriden on the command line with '-regalloc=...'
+bool TargetPassConfig::usingDefaultRegAlloc() const {
+ return RegAlloc.getNumOccurrences() == 0;
+}
+
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
@@ -709,6 +732,7 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&TwoAddressInstructionPassID);
addPass(&RegisterCoalescerID);
+ printAndVerify("After Register Coalescing");
// PreRA instruction scheduling.
if (addPass(&MachineSchedulerID))
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 716cb1f..a296aea 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -46,7 +46,7 @@
// if it loads to virtual registers and the virtual register defined has
// a single use.
//
-// - Optimize Copies and Bitcast:
+// - Optimize Copies and Bitcast (more generally, target specific copies):
//
// Rewrite copies and bitcasts to avoid cross register bank copies
// when possible.
@@ -78,6 +78,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "peephole-opt"
@@ -92,7 +94,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
cl::desc("Disable the peephole optimizer"));
static cl::opt<bool>
-DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true),
+DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
cl::desc("Disable advanced copy optimization"));
STATISTIC(NumReuse, "Number of extension results reused");
@@ -100,12 +102,13 @@ STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
STATISTIC(NumLoadFold, "Number of loads folded");
STATISTIC(NumSelects, "Number of selects optimized");
-STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized");
+STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
+STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
- const TargetMachine *TM;
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
@@ -129,9 +132,14 @@ namespace {
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs);
bool optimizeSelect(MachineInstr *MI);
+ bool optimizeCondBranch(MachineInstr *MI);
bool optimizeCopyOrBitcast(MachineInstr *MI);
+ bool optimizeCoalescableCopy(MachineInstr *MI);
+ bool optimizeUncoalescableCopy(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool findNextSource(unsigned &Reg, unsigned &SubReg);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
@@ -140,6 +148,25 @@ namespace {
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
bool isLoadFoldable(MachineInstr *MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
+
+ /// \brief Check whether \p MI is understood by the register coalescer
+ /// but may require some rewriting.
+ bool isCoalescableCopy(const MachineInstr &MI) {
+ // SubregToRegs are not interesting, because they are already register
+ // coalescer friendly.
+ return MI.isCopy() || (!DisableAdvCopyOpt &&
+ (MI.isRegSequence() || MI.isInsertSubreg() ||
+ MI.isExtractSubreg()));
+ }
+
+ /// \brief Check whether \p MI is a copy like instruction that is
+ /// not recognized by the register coalescer.
+ bool isUncoalescableCopy(const MachineInstr &MI) {
+ return MI.isBitcast() ||
+ (!DisableAdvCopyOpt &&
+ (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike()));
+ }
};
/// \brief Helper class to track the possible sources of a value defined by
@@ -176,63 +203,87 @@ namespace {
/// the ValueTracker class but that would have complicated the code of
/// the users of this class.
bool UseAdvancedTracking;
- /// Optional MachineRegisterInfo used to perform some complex
+ /// MachineRegisterInfo used to perform tracking.
+ const MachineRegisterInfo &MRI;
+ /// Optional TargetInstrInfo used to perform some complex
/// tracking.
- const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
/// \brief Dispatcher to the right underlying implementation of
/// getNextSource.
- bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for Copy instructions.
- bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for Bitcast instructions.
- bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for RegSequence
/// instructions.
- bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for InsertSubreg
/// instructions.
- bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for ExtractSubreg
/// instructions.
- bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Specialized version of getNextSource for SubregToReg
/// instructions.
- bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
public:
- /// \brief Create a ValueTracker instance for the value defines by \p MI
- /// at the operand index \p DefIdx.
+ /// \brief Create a ValueTracker instance for the value defined by \p Reg.
/// \p DefSubReg represents the sub register index the value tracker will
- /// track. It does not need to match the sub register index used in \p MI.
+ /// track. It does not need to match the sub register index used in the
+ /// definition of \p Reg.
/// \p UseAdvancedTracking specifies whether or not the value tracker looks
/// through complex instructions. By default (false), it handles only copy
/// and bitcast instructions.
- /// \p MRI useful to perform some complex checks.
+ /// If \p Reg is a physical register, a value tracker constructed with
+ /// this constructor will not find any alternative source.
+ /// Indeed, when \p Reg is a physical register that constructor does not
+ /// know which definition of \p Reg it should track.
+ /// Use the next constructor to track a physical register.
+ ValueTracker(unsigned Reg, unsigned DefSubReg,
+ const MachineRegisterInfo &MRI,
+ bool UseAdvancedTracking = false,
+ const TargetInstrInfo *TII = nullptr)
+ : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Def = MRI.getVRegDef(Reg);
+ DefIdx = MRI.def_begin(Reg).getOperandNo();
+ }
+ }
+
+ /// \brief Create a ValueTracker instance for the value defined by
+ /// the pair \p MI, \p DefIdx.
+ /// Unlike the other constructor, the value tracker produced by this one
+ /// may be able to find a new source when the definition is a physical
+ /// register.
+ /// This could be useful to rewrite target specific instructions into
+ /// generic copy instructions.
ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
+ const MachineRegisterInfo &MRI,
bool UseAdvancedTracking = false,
- const MachineRegisterInfo *MRI = nullptr)
+ const TargetInstrInfo *TII = nullptr)
: Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
- UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) {
- assert(Def->getOperand(DefIdx).isDef() &&
- Def->getOperand(DefIdx).isReg() &&
- "Definition does not match machine instruction");
- // Initially the value is in the defined register.
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
+ assert(DefIdx < Def->getDesc().getNumDefs() &&
+ Def->getOperand(DefIdx).isReg() && "Invalid definition");
Reg = Def->getOperand(DefIdx).getReg();
}
/// \brief Following the use-def chain, get the next available source
/// for the tracked value.
- /// When the returned value is not nullptr, getReg() gives the register
+ /// When the returned value is not nullptr, \p SrcReg gives the register
/// that contain the tracked value.
/// \note The sub register index returned in \p SrcSubReg must be used
- /// on that getReg() to access the actual value.
+ /// on \p SrcReg to access the actual value.
/// \return Unless the returned value is nullptr (i.e., no source found),
- /// \p SrcIdx gives the index of the next source in the returned
- /// instruction and \p SrcSubReg the index to be used on that source to
- /// get the tracked value. When nullptr is returned, no alternative source
- /// has been found.
- const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \p SrcReg gives the register of the next source used in the returned
+ /// instruction and \p SrcSubReg the sub-register index to be used on that
+ /// source to get the tracked value. When nullptr is returned, no
+ /// alternative source has been found.
+ const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
/// \brief Get the last register where the initial value can be found.
/// Initially this is the register of the definition.
@@ -261,7 +312,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
/// debug uses.
bool PeepholeOptimizer::
optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
return false;
@@ -277,7 +328,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// Ensure DstReg can get a register class that actually supports
// sub-registers. Don't change the class until we commit.
const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
- DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
+ DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
if (!DstRC)
return false;
@@ -286,8 +337,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// register.
// If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
// SrcReg:SubIdx should be replaced.
- bool UseSrcSubIdx = TM->getRegisterInfo()->
- getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
+ bool UseSrcSubIdx =
+ TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
// The source has other uses. See if we can replace the other uses with use of
// the result of the extension.
@@ -447,6 +498,12 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
return true;
}
+/// \brief Check if a simpler conditional branch can be
+// generated
+bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
+ return TII->optimizeCondBranch(MI);
+}
+
/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
/// share the same register file.
static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
@@ -477,85 +534,34 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
}
-/// \brief Get the index of the definition and source for \p Copy
-/// instruction.
-/// \pre Copy.isCopy() or Copy.isBitcast().
-/// \return True if the Copy instruction has only one register source
-/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx
-/// are invalid.
-static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy,
- unsigned &DefIdx, unsigned &SrcIdx) {
- assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type.");
- if (Copy.isCopy()) {
- // Copy instruction are supposed to be: Def = Src.
- if (Copy.getDesc().getNumOperands() != 2)
- return false;
- DefIdx = 0;
- SrcIdx = 1;
- assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!");
- return true;
- }
- // Bitcast case.
- // Bitcasts with more than one def are not supported.
- if (Copy.getDesc().getNumDefs() != 1)
- return false;
- // Initialize SrcIdx to an undefined operand.
- SrcIdx = Copy.getDesc().getNumOperands();
- for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) {
- const MachineOperand &MO = Copy.getOperand(OpIdx);
- if (!MO.isReg() || !MO.getReg())
- continue;
- if (MO.isDef())
- DefIdx = OpIdx;
- else if (SrcIdx != EndOpIdx)
- // Multiple sources?
- return false;
- SrcIdx = OpIdx;
- }
- return true;
-}
-
-/// \brief Optimize a copy or bitcast instruction to avoid cross
-/// register bank copy. The optimization looks through a chain of
-/// copies and try to find a source that has a compatible register
-/// class.
-/// Two register classes are considered to be compatible if they share
-/// the same register bank.
-/// New copies issued by this optimization are register allocator
-/// friendly. This optimization does not remove any copy as it may
-/// overconstraint the register allocator, but replaces some when
-/// possible.
-/// \pre \p MI is a Copy (MI->isCopy() is true)
-/// \return True, when \p MI has been optimized. In that case, \p MI has
-/// been removed from its parent.
-bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
- unsigned DefIdx, SrcIdx;
- if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx))
+/// \brief Try to find the next source that share the same register file
+/// for the value defined by \p Reg and \p SubReg.
+/// When true is returned, \p Reg and \p SubReg are updated with the
+/// register number and sub-register index of the new source.
+/// \return False if no alternative sources are available. True otherwise.
+bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
+ // Do not try to find a new source for a physical register.
+ // So far we do not have any motivating example for doing that.
+ // Thus, instead of maintaining untested code, we will revisit that if
+ // that changes at some point.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
return false;
- const MachineOperand &MODef = MI->getOperand(DefIdx);
- assert(MODef.isReg() && "Copies must be between registers.");
- unsigned Def = MODef.getReg();
-
- if (TargetRegisterInfo::isPhysicalRegister(Def))
- return false;
-
- const TargetRegisterClass *DefRC = MRI->getRegClass(Def);
- unsigned DefSubReg = MODef.getSubReg();
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+ unsigned DefSubReg = SubReg;
unsigned Src;
unsigned SrcSubReg;
bool ShouldRewrite = false;
- const TargetRegisterInfo &TRI = *TM->getRegisterInfo();
// Follow the chain of copies until we reach the top of the use-def chain
// or find a more suitable source.
- ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI);
+ ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
do {
- unsigned CopySrcIdx, CopySrcSubReg;
- if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg))
+ unsigned CopySrcReg, CopySrcSubReg;
+ if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
break;
- Src = ValTracker.getReg();
+ Src = CopySrcReg;
SrcSubReg = CopySrcSubReg;
// Do not extend the live-ranges of physical registers as they add
@@ -569,29 +575,411 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
// If this source does not incur a cross register bank copy, use it.
- ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
+ ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC,
SrcSubReg);
} while (!ShouldRewrite);
// If we did not find a more suitable source, there is nothing to optimize.
- if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
+ if (!ShouldRewrite || Src == Reg)
return false;
- // Rewrite the copy to avoid a cross register bank penalty.
- unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def :
- MRI->createVirtualRegister(DefRC);
- MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
- .addReg(Src, 0, SrcSubReg);
- NewCopy->getOperand(0).setSubReg(DefSubReg);
-
- MRI->replaceRegWith(Def, NewVR);
- MRI->clearKillFlags(NewVR);
- // We extended the lifetime of Src.
- // Clear the kill flags to account for that.
- MRI->clearKillFlags(Src);
+ Reg = Src;
+ SubReg = SrcSubReg;
+ return true;
+}
+
+namespace {
+/// \brief Helper class to rewrite the arguments of a copy-like instruction.
+class CopyRewriter {
+protected:
+ /// The copy-like instruction.
+ MachineInstr &CopyLike;
+ /// The index of the source being rewritten.
+ unsigned CurrentSrcIdx;
+
+public:
+ CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
+
+ virtual ~CopyRewriter() {}
+
+ /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
+ /// the related value that it affects (TrackReg, TrackSubReg).
+ /// A source is considered rewritable if its register class and the
+ /// register class of the related TrackReg may not be register
+ /// coalescer friendly. In other words, given a copy-like instruction
+ /// not all the arguments may be returned at rewritable source, since
+ /// some arguments are none to be register coalescer friendly.
+ ///
+ /// Each call of this method moves the current source to the next
+ /// rewritable source.
+ /// For instance, let CopyLike be the instruction to rewrite.
+ /// CopyLike has one definition and one source:
+ /// dst.dstSubIdx = CopyLike src.srcSubIdx.
+ ///
+ /// The first call will give the first rewritable source, i.e.,
+ /// the only source this instruction has:
+ /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
+ /// This source defines the whole definition, i.e.,
+ /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
+ ///
+ /// The second and subsequent calls will return false, has there is only one
+ /// rewritable source.
+ ///
+ /// \return True if a rewritable source has been found, false otherwise.
+ /// The output arguments are valid if and only if true is returned.
+ virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) {
+ // If CurrentSrcIdx == 1, this means this function has already been
+ // called once. CopyLike has one defintiion and one argument, thus,
+ // there is nothing else to rewrite.
+ if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
+ return false;
+ // This is the first call to getNextRewritableSource.
+ // Move the CurrentSrcIdx to remember that we made that call.
+ CurrentSrcIdx = 1;
+ // The rewritable source is the argument.
+ const MachineOperand &MOSrc = CopyLike.getOperand(1);
+ SrcReg = MOSrc.getReg();
+ SrcSubReg = MOSrc.getSubReg();
+ // What we track are the alternative sources of the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+ return true;
+ }
+
+ /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
+ /// if possible.
+ /// \return True if the rewritting was possible, false otherwise.
+ virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
+ if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
+ return false;
+ MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
+ MOSrc.setReg(NewReg);
+ MOSrc.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for INSERT_SUBREG instruction.
+class InsertSubregRewriter : public CopyRewriter {
+public:
+ InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) {
+ assert(MI.isInsertSubreg() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
+ /// Src1 has the same register class has dst, hence, there is
+ /// nothing to rewrite.
+ /// Src2.src2SubIdx, may not be register coalescer friendly.
+ /// Therefore, the first call to this method returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx).
+ ///
+ /// Subsequence calls will return false.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 2)
+ return false;
+ // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
+ CurrentSrcIdx = 2;
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
+ SrcReg = MOInsertedReg.getReg();
+ SrcSubReg = MOInsertedReg.getSubReg();
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+
+ // We want to track something that is compatible with the
+ // partial definition.
+ TrackReg = MODef.getReg();
+ if (MODef.getSubReg())
+ // Bails if we have to compose sub-register indices.
+ return false;
+ TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
+ return true;
+ }
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ if (CurrentSrcIdx != 2)
+ return false;
+ // We are rewriting the inserted reg.
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for EXTRACT_SUBREG instruction.
+class ExtractSubregRewriter : public CopyRewriter {
+ const TargetInstrInfo &TII;
+
+public:
+ ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
+ : CopyRewriter(MI), TII(TII) {
+ assert(MI.isExtractSubreg() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
+ /// There is only one rewritable source: Src.subIdx,
+ /// which defines dst.dstSubIdx.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 1)
+ return false;
+ // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
+ CurrentSrcIdx = 1;
+ const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
+ SrcReg = MOExtractedReg.getReg();
+ // If we have to compose sub-register indices, bails out.
+ if (MOExtractedReg.getSubReg())
+ return false;
+
+ SrcSubReg = CopyLike.getOperand(2).getImm();
+
+ // We want to track something that is compatible with the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+ return true;
+ }
+
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ // The only source we can rewrite is the input register.
+ if (CurrentSrcIdx != 1)
+ return false;
+
+ CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
+
+ // If we find a source that does not require to extract something,
+ // rewrite the operation with a copy.
+ if (!NewSubReg) {
+ // Move the current index to an invalid position.
+ // We do not want another call to this method to be able
+ // to do any change.
+ CurrentSrcIdx = -1;
+ // Rewrite the operation as a COPY.
+ // Get rid of the sub-register index.
+ CopyLike.RemoveOperand(2);
+ // Morph the operation into a COPY.
+ CopyLike.setDesc(TII.get(TargetOpcode::COPY));
+ return true;
+ }
+ CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for REG_SEQUENCE instruction.
+class RegSequenceRewriter : public CopyRewriter {
+public:
+ RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) {
+ assert(MI.isRegSequence() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
+ /// Each call will return a different source, walking all the available
+ /// source.
+ ///
+ /// The first call returns:
+ /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx1).
+ ///
+ /// The second call returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx2).
+ ///
+ /// And so on, until all the sources have been traversed, then
+ /// it returns false.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
+
+ // If this is the first call, move to the first argument.
+ if (CurrentSrcIdx == 0) {
+ CurrentSrcIdx = 1;
+ } else {
+ // Otherwise, move to the next argument and check that it is valid.
+ CurrentSrcIdx += 2;
+ if (CurrentSrcIdx >= CopyLike.getNumOperands())
+ return false;
+ }
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
+ SrcReg = MOInsertedReg.getReg();
+ // If we have to compose sub-register indices, bails out.
+ if ((SrcSubReg = MOInsertedReg.getSubReg()))
+ return false;
+
+ // We want to track something that is compatible with the related
+ // partial definition.
+ TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
+
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ // If we have to compose sub-registers, bails.
+ return MODef.getSubReg() == 0;
+ }
+
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ // We cannot rewrite out of bound operands.
+ // Moreover, rewritable sources are at odd positions.
+ if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
+ return false;
+
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+} // End namespace.
+
+/// \brief Get the appropriated CopyRewriter for \p MI.
+/// \return A pointer to a dynamically allocated CopyRewriter or nullptr
+/// if no rewriter works for \p MI.
+static CopyRewriter *getCopyRewriter(MachineInstr &MI,
+ const TargetInstrInfo &TII) {
+ switch (MI.getOpcode()) {
+ default:
+ return nullptr;
+ case TargetOpcode::COPY:
+ return new CopyRewriter(MI);
+ case TargetOpcode::INSERT_SUBREG:
+ return new InsertSubregRewriter(MI);
+ case TargetOpcode::EXTRACT_SUBREG:
+ return new ExtractSubregRewriter(MI, TII);
+ case TargetOpcode::REG_SEQUENCE:
+ return new RegSequenceRewriter(MI);
+ }
+ llvm_unreachable(nullptr);
+}
+
+/// \brief Optimize generic copy instructions to avoid cross
+/// register bank copy. The optimization looks through a chain of
+/// copies and tries to find a source that has a compatible register
+/// class.
+/// Two register classes are considered to be compatible if they share
+/// the same register bank.
+/// New copies issued by this optimization are register allocator
+/// friendly. This optimization does not remove any copy as it may
+/// overconstraint the register allocator, but replaces some operands
+/// when possible.
+/// \pre isCoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been rewritten. False otherwise.
+bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
+ assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
+ assert(MI->getDesc().getNumDefs() == 1 &&
+ "Coalescer can understand multiple defs?!");
+ const MachineOperand &MODef = MI->getOperand(0);
+ // Do not rewrite physical definitions.
+ if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ return false;
+
+ bool Changed = false;
+ // Get the right rewriter for the current copy.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
+ // If none exists, bails out.
+ if (!CpyRewriter)
+ return false;
+ // Rewrite each rewritable source.
+ unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
+ while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
+ TrackSubReg)) {
+ unsigned NewSrc = TrackReg;
+ unsigned NewSubReg = TrackSubReg;
+ // Try to find a more suitable source.
+ // If we failed to do so, or get the actual source,
+ // move to the next source.
+ if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
+ continue;
+ // Rewrite source.
+ if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
+ // We may have extended the live-range of NewSrc, account for that.
+ MRI->clearKillFlags(NewSrc);
+ Changed = true;
+ }
+ }
+ // TODO: We could have a clean-up method to tidy the instruction.
+ // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
+ // => v0 = COPY v1
+ // Currently we haven't seen motivating example for that and we
+ // want to avoid untested code.
+ NumRewrittenCopies += Changed == true;
+ return Changed;
+}
+
+/// \brief Optimize copy-like instructions to create
+/// register coalescer friendly instruction.
+/// The optimization tries to kill-off the \p MI by looking
+/// through a chain of copies to find a source that has a compatible
+/// register class.
+/// If such a source is found, it replace \p MI by a generic COPY
+/// operation.
+/// \pre isUncoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been optimized. In that case, \p MI has
+/// been removed from its parent.
+/// All COPY instructions created, are inserted in \p LocalMIs.
+bool PeepholeOptimizer::optimizeUncoalescableCopy(
+ MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
+ assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
+
+ // Check if we can rewrite all the values defined by this instruction.
+ SmallVector<
+ std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
+ 4> RewritePairs;
+ for (const MachineOperand &MODef : MI->defs()) {
+ if (MODef.isDead())
+ // We can ignore those.
+ continue;
+
+ // If a physical register is here, this is probably for a good reason.
+ // Do not rewrite that.
+ if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ return false;
+
+ // If we do not know how to rewrite this definition, there is no point
+ // in trying to kill this instruction.
+ TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
+ TargetInstrInfo::RegSubRegPair Src = Def;
+ if (!findNextSource(Src.Reg, Src.SubReg))
+ return false;
+ RewritePairs.push_back(std::make_pair(Def, Src));
+ }
+ // The change is possible for all defs, do it.
+ for (const auto &PairDefSrc : RewritePairs) {
+ const auto &Def = PairDefSrc.first;
+ const auto &Src = PairDefSrc.second;
+ // Rewrite the "copy" in a way the register coalescer understands.
+ assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+ "We do not rewrite physical registers");
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
+ unsigned NewVR = MRI->createVirtualRegister(DefRC);
+ MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ NewVR).addReg(Src.Reg, 0, Src.SubReg);
+ NewCopy->getOperand(0).setSubReg(Def.SubReg);
+ if (Def.SubReg)
+ NewCopy->getOperand(0).setIsUndef();
+ LocalMIs.insert(NewCopy);
+ MRI->replaceRegWith(Def.Reg, NewVR);
+ MRI->clearKillFlags(NewVR);
+ // We extended the lifetime of Src.
+ // Clear the kill flags to account for that.
+ MRI->clearKillFlags(Src.Reg);
+ }
+ // MI is now dead.
MI->eraseFromParent();
- ++NumCopiesBitcasts;
+ ++NumUncoalescableCopies;
return true;
}
@@ -673,8 +1061,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (DisablePeephole)
return false;
- TM = &MF.getTarget();
- TII = TM->getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
@@ -684,7 +1072,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = &*I;
bool SeenMoveImm = false;
- SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallPtrSet<MachineInstr*, 16> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
@@ -711,7 +1099,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->mayStore() || MI->isCall())
FoldAsLoadDefCandidates.clear();
- if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
+ if ((isUncoalescableCopy(*MI) &&
+ optimizeUncoalescableCopy(MI, LocalMIs)) ||
(MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
(MI->isSelect() && optimizeSelect(MI))) {
// MI is deleted.
@@ -720,6 +1109,17 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (MI->isConditionalBranch() && optimizeCondBranch(MI)) {
+ Changed = true;
+ continue;
+ }
+
+ if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
+ // MI is just rewritten.
+ Changed = true;
+ continue;
+ }
+
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
@@ -781,24 +1181,25 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
unsigned &SrcSubReg) {
assert(Def->isCopy() && "Invalid definition");
// Copy instruction are supposed to be: Def = Src.
// If someone breaks this assumption, bad things will happen everywhere.
- assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands");
+ assert(Def->getNumOperands() == 2 && "Invalid number of operands");
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of src.
// Bails as we do not support composing subreg yet.
return false;
// Otherwise, we want the whole source.
- SrcIdx = 1;
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ const MachineOperand &Src = Def->getOperand(1);
+ SrcReg = Src.getReg();
+ SrcSubReg = Src.getSubReg();
return true;
}
-bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
unsigned &SrcSubReg) {
assert(Def->isBitcast() && "Invalid definition");
@@ -814,7 +1215,7 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
// Bails as we do not support composing subreg yet.
return false;
- SrcIdx = Def->getDesc().getNumOperands();
+ unsigned SrcIdx = Def->getNumOperands();
for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
++OpIdx) {
const MachineOperand &MO = Def->getOperand(OpIdx);
@@ -826,13 +1227,16 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
return false;
SrcIdx = OpIdx;
}
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ const MachineOperand &Src = Def->getOperand(SrcIdx);
+ SrcReg = Src.getReg();
+ SrcSubReg = Src.getSubReg();
return true;
}
-bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
unsigned &SrcSubReg) {
- assert(Def->isRegSequence() && "Invalid definition");
+ assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
+ "Invalid definition");
if (Def->getOperand(DefIdx).getSubReg())
// If we are composing subreg, bails out.
@@ -851,19 +1255,26 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
// turn that into an assertion.
return false;
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return false;
+
+ SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
+ if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
+ return false;
+
// We are looking at:
// Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
// Check if one of the operand defines the subreg we are interested in.
- for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands();
- OpIdx != EndOpIdx; OpIdx += 2) {
- const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1);
- assert(MOSubIdx.isImm() &&
- "One of the subindex of the reg_sequence is not an immediate");
- if (MOSubIdx.getImm() == DefSubReg) {
- assert(Def->getOperand(OpIdx).isReg() &&
- "One of the source of the reg_sequence is not a register");
- SrcIdx = OpIdx;
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ for (auto &RegSeqInput : RegSeqInputRegs) {
+ if (RegSeqInput.SubIdx == DefSubReg) {
+ if (RegSeqInput.SubReg)
+ // Bails if we have to compose sub registers.
+ return false;
+
+ SrcReg = RegSeqInput.Reg;
+ SrcSubReg = RegSeqInput.SubReg;
return true;
}
}
@@ -874,61 +1285,68 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
return false;
}
-bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
unsigned &SrcSubReg) {
- assert(Def->isInsertSubreg() && "Invalid definition");
+ assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
+ "Invalid definition");
+
if (Def->getOperand(DefIdx).getSubReg())
// If we are composing subreg, bails out.
// Same remark as getNextSourceFromRegSequence.
// I.e., this may be turned into an assert.
return false;
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return false;
+
+ TargetInstrInfo::RegSubRegPair BaseReg;
+ TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
+ if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
+ return false;
+
// We are looking at:
// Def = INSERT_SUBREG v0, v1, sub1
// There are two cases:
// 1. DefSubReg == sub1, get v1.
// 2. DefSubReg != sub1, the value may be available through v0.
- // #1 Check if the inserted register matches the require sub index.
- unsigned InsertedSubReg = Def->getOperand(3).getImm();
- if (InsertedSubReg == DefSubReg) {
- SrcIdx = 2;
- SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ // #1 Check if the inserted register matches the required sub index.
+ if (InsertedReg.SubIdx == DefSubReg) {
+ SrcReg = InsertedReg.Reg;
+ SrcSubReg = InsertedReg.SubReg;
return true;
}
// #2 Otherwise, if the sub register we are looking for is not partial
// defined by the inserted element, we can look through the main
// register (v0).
- // To check the overlapping we need a MRI and a TRI.
- if (!MRI)
- return false;
-
const MachineOperand &MODef = Def->getOperand(DefIdx);
- const MachineOperand &MOBase = Def->getOperand(1);
// If the result register (Def) and the base register (v0) do not
// have the same register class or if we have to compose
// subregisters, bails out.
- if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) ||
- MOBase.getSubReg())
+ if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
+ BaseReg.SubReg)
return false;
- // Get the TRI and check if inserted sub register overlaps with the
- // sub register we are tracking.
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ // Get the TRI and check if the inserted sub-register overlaps with the
+ // sub-register we are tracking.
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
if (!TRI ||
(TRI->getSubRegIndexLaneMask(DefSubReg) &
- TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0)
+ TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
return false;
// At this point, the value is available in v0 via the same subreg
// we used for Def.
- SrcIdx = 1;
+ SrcReg = BaseReg.Reg;
SrcSubReg = DefSubReg;
return true;
}
-bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
unsigned &SrcSubReg) {
- assert(Def->isExtractSubreg() && "Invalid definition");
+ assert((Def->isExtractSubreg() ||
+ Def->isExtractSubregLike()) && "Invalid definition");
// We are looking at:
// Def = EXTRACT_SUBREG v0, sub0
@@ -937,17 +1355,26 @@ bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
if (DefSubReg)
return false;
+ if (!TII)
+ // We could handle the EXTRACT_SUBREG here, but we do not want to
+ // duplicate the code from the generic TII.
+ return false;
+
+ TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
+ if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
+ return false;
+
// Bails if we have to compose sub registers.
// Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
- if (Def->getOperand(1).getSubReg())
+ if (ExtractSubregInputReg.SubReg)
return false;
// Otherwise, the value is available in the v0.sub0.
- SrcIdx = 1;
- SrcSubReg = Def->getOperand(2).getImm();
+ SrcReg = ExtractSubregInputReg.Reg;
+ SrcSubReg = ExtractSubregInputReg.SubIdx;
return true;
}
-bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
+bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
unsigned &SrcSubReg) {
assert(Def->isSubregToReg() && "Invalid definition");
// We are looking at:
@@ -964,37 +1391,37 @@ bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
if (Def->getOperand(2).getSubReg())
return false;
- SrcIdx = 2;
+ SrcReg = Def->getOperand(2).getReg();
SrcSubReg = Def->getOperand(3).getImm();
return true;
}
-bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) {
+bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
assert(Def && "This method needs a valid definition");
assert(
(DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
if (Def->isCopy())
- return getNextSourceFromCopy(SrcIdx, SrcSubReg);
+ return getNextSourceFromCopy(SrcReg, SrcSubReg);
if (Def->isBitcast())
- return getNextSourceFromBitcast(SrcIdx, SrcSubReg);
+ return getNextSourceFromBitcast(SrcReg, SrcSubReg);
// All the remaining cases involve "complex" instructions.
// Bails if we did not ask for the advanced tracking.
if (!UseAdvancedTracking)
return false;
- if (Def->isRegSequence())
- return getNextSourceFromRegSequence(SrcIdx, SrcSubReg);
- if (Def->isInsertSubreg())
- return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg);
- if (Def->isExtractSubreg())
- return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg);
+ if (Def->isRegSequence() || Def->isRegSequenceLike())
+ return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
+ if (Def->isInsertSubreg() || Def->isInsertSubregLike())
+ return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
+ if (Def->isExtractSubreg() || Def->isExtractSubregLike())
+ return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
if (Def->isSubregToReg())
- return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg);
+ return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
return false;
}
-const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
+const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
unsigned &SrcSubReg) {
// If we reach a point where we cannot move up in the use-def chain,
// there is nothing we can get.
@@ -1003,20 +1430,18 @@ const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
const MachineInstr *PrevDef = nullptr;
// Try to find the next source.
- if (getNextSourceImpl(SrcIdx, SrcSubReg)) {
+ if (getNextSourceImpl(SrcReg, SrcSubReg)) {
// Update definition, definition index, and subregister for the
// next call of getNextSource.
- const MachineOperand &MO = Def->getOperand(SrcIdx);
- assert(MO.isReg() && !MO.isDef() && "Source is invalid");
// Update the current register.
- Reg = MO.getReg();
+ Reg = SrcReg;
// Update the return value before moving up in the use-def chain.
PrevDef = Def;
// If we can still move up in the use-def chain, move to the next
// defintion.
if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
- Def = MRI->getVRegDef(Reg);
- DefIdx = MRI->def_begin(Reg).getOperandNo();
+ Def = MRI.getVRegDef(Reg);
+ DefIdx = MRI.def_begin(Reg).getOperandNo();
DefSubReg = SrcSubReg;
return PrevDef;
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index db3933e..89e1d11 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -41,7 +41,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -98,6 +97,11 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ bool enablePostRAScheduler(
+ const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
};
char PostRAScheduler::ID = 0;
@@ -132,10 +136,10 @@ namespace {
public:
SchedulePostRATDList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- AliasAnalysis *AA, const RegisterClassInfo&,
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
~SchedulePostRATDList();
@@ -188,16 +192,17 @@ INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
"Post RA top-down list latency scheduler", false, false)
SchedulePostRATDList::SchedulePostRATDList(
- MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
- AliasAnalysis *AA, const RegisterClassInfo &RCI,
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
- SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
-
- const TargetMachine &TM = MF.getTarget();
- const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
+
+ const InstrItineraryData *InstrItins =
+ MF.getSubtarget().getInstrItineraryData();
HazardRec =
- TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+ MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
+ InstrItins, this);
assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
MRI.tracksLiveness()) &&
@@ -245,13 +250,23 @@ void SchedulePostRATDList::dumpSchedule() const {
}
#endif
+bool PostRAScheduler::enablePostRAScheduler(
+ const TargetSubtargetInfo &ST,
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
+ Mode = ST.getAntiDepBreakMode();
+ ST.getCriticalPathRCs(CriticalPathRCs);
+ return ST.enablePostMachineScheduler() &&
+ OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (skipOptnoneFunction(*Fn.getFunction()))
return false;
- TII = Fn.getTarget().getInstrInfo();
+ TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
@@ -267,9 +282,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
} else {
// Check that post-RA scheduling is enabled for this target.
// This may upgrade the AntiDepMode.
- const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
- if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
- CriticalPathRCs))
+ const TargetSubtargetInfo &ST =
+ Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
+ if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
+ AntiDepMode, CriticalPathRCs))
return false;
}
@@ -284,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
DEBUG(dbgs() << "PostRAScheduler\n");
- SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
+ SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
CriticalPathRCs);
// Loop over all of the basic blocks
@@ -543,10 +559,10 @@ void SchedulePostRATDList::ListScheduleTopDown() {
if (HT == ScheduleHazardRecognizer::NoHazard) {
if (HazardRec->ShouldPreferAnother(CurSUnit)) {
if (!NotPreferredSUnit) {
- // If this is the first non-preferred node for this cycle, then
- // record it and continue searching for a preferred node. If this
- // is not the first non-preferred node, then treat it as though
- // there had been a hazard.
+ // If this is the first non-preferred node for this cycle, then
+ // record it and continue searching for a preferred node. If this
+ // is not the first non-preferred node, then treat it as though
+ // there had been a hazard.
NotPreferredSUnit = CurSUnit;
continue;
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 3129927..b153800 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -138,8 +139,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
assert(WorkList.empty() && "Inconsistent worklist state");
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index b98d210..06530b9 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -41,6 +41,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -110,8 +111,8 @@ typedef SmallSetVector<int, 8> StackObjSet;
///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const Function* F = Fn.getFunction();
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
- const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
@@ -185,8 +186,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
/// variables for the function's frame information and eliminate call frame
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
- const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
- const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
@@ -239,8 +240,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
/// registers.
void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
- const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo();
- const TargetFrameLowering *TFI = F.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = F.getFrameInfo();
// Get the callee saved register list...
@@ -337,9 +338,9 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
if (CSI.empty())
return;
- const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
- const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
MachineBasicBlock::iterator I;
// Spill using target interface.
@@ -445,7 +446,7 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
StackProtector *SP = &getAnalysis<StackProtector>();
bool StackGrowsDown =
@@ -515,7 +516,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure the special register scavenging spill slot is closest to the
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
- const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
TFI.isFPCloseToIncomingSP() &&
RegInfo->useFPForScavengingIndex(Fn) &&
@@ -670,7 +671,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
/// prolog and epilog code to the function.
///
void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
// Add prologue to the function...
TFI.emitPrologue(Fn);
@@ -710,8 +711,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
SmallPtrSet<MachineBasicBlock*, 8> Reachable;
// Iterate over the reachable blocks in DFS order.
- for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
- DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+ for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
DFI != DFE; ++DFI) {
int SPAdj = 0;
// Check the exit state of the DFS stack predecessor.
@@ -738,11 +738,11 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
int &SPAdj) {
- const TargetMachine &TM = Fn.getTarget();
- assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
- const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
+ assert(Fn.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int FrameSetupOpcode = TII.getCallFrameSetupOpcode();
@@ -837,7 +837,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
/// iterate over the vreg use list, which at this point only contains machine
/// operands for which eliminateFrameIndex need a new scratch reg.
-void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+void
+PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Run through the instructions and find any virtual registers.
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
@@ -888,12 +889,16 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
+ MachineRegisterInfo &MRI = Fn.getRegInfo();
Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+
+ // Make sure MRI now accounts this register as used.
+ MRI.setPhysRegUsed(ScratchReg);
// Because this instruction was processed by the RS before this
// register was allocated, make sure that the RS now records the
// register as being used.
- RS->setUsed(ScratchReg);
+ RS->setRegUsed(ScratchReg);
}
}
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 5a6d39a..f88b8ef 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -16,8 +16,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_PEI_H
-#define LLVM_CODEGEN_PEI_H
+#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
+#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SparseBitVector.h"
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 12b2c90..b1c341d 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -107,13 +107,9 @@ bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
}
bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
- // Negative frame indices are used for special things that don't
- // appear in LLVM IR. Non-negative indices may be used for things
- // like static allocas.
if (!MFI)
- return FI >= 0;
- // Spill slots should not alias others.
- return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+ return true;
+ return MFI->isAliasedObjectIndex(FI);
}
bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 894aee7..122afd1 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#ifndef NDEBUG
#include "llvm/ADT/SparseBitVector.h"
@@ -102,7 +101,7 @@ void RegAllocBase::allocatePhysRegs() {
// register if possible and populate a list of new live intervals that
// result from splitting.
DEBUG(dbgs() << "\nselectOrSplit "
- << MRI->getRegClass(VirtReg->reg)->getName()
+ << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
<< ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
typedef SmallVector<unsigned, 4> VirtRegVec;
VirtRegVec SplitVRegs;
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index b333c36..bbd79cd 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -34,8 +34,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_REGALLOCBASE
-#define LLVM_CODEGEN_REGALLOCBASE
+#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H
+#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -106,4 +106,4 @@ private:
} // end namespace llvm
-#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
+#endif
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index b722098..0090332 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -33,7 +33,6 @@
#include "llvm/PassAnalysisSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cstdlib>
#include <queue>
@@ -157,7 +156,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
}
void RABasic::releaseMemory() {
- SpillerInstance.reset(nullptr);
+ SpillerInstance.reset();
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 97b9f76..8fc10b4 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -33,7 +33,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -53,7 +53,6 @@ namespace {
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
isBulkSpilling(false) {}
private:
- const TargetMachine *TM;
MachineFunction *MF;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
@@ -298,7 +297,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
LiveDbgValueMap[LRI->VirtReg];
for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
MachineInstr *DBG = LRIDbgValues[li];
- const MDNode *MDPtr = DBG->getOperand(2).getMetadata();
+ const MDNode *Var = DBG->getDebugVariable();
+ const MDNode *Expr = DBG->getDebugExpression();
bool IsIndirect = DBG->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
DebugLoc DL;
@@ -308,10 +308,13 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
DL = (--EI)->getDebugLoc();
} else
DL = MI->getDebugLoc();
- MachineBasicBlock *MBB = DBG->getParent();
MachineInstr *NewDV =
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(FI)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
}
@@ -545,7 +548,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
}
DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
- << RC->getName() << "\n");
+ << TRI->getRegClassName(RC) << "\n");
unsigned BestReg = 0, BestCost = spillImpossible;
for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
@@ -705,7 +708,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
continue;
if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
(MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
- if (ThroughRegs.insert(Reg))
+ if (ThroughRegs.insert(Reg).second)
DEBUG(dbgs() << ' ' << PrintReg(Reg));
}
}
@@ -862,13 +865,16 @@ void RAFast::AllocateBasicBlock() {
// Modify DBG_VALUE now that the value is in a spill slot.
bool IsIndirect = MI->isIndirectDebugValue();
uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *MDPtr =
- MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock *MBB = MI->getParent();
MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(SS)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
DEBUG(dbgs() << "Modifying debug info due to spill:"
<< "\t" << *NewDV);
// Scan NewDV operands from the beginning.
@@ -1070,9 +1076,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
<< "********** Function: " << Fn.getName() << '\n');
MF = &Fn;
MRI = &MF->getRegInfo();
- TM = &Fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
UsedInInstr.clear();
@@ -1093,9 +1098,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
}
// Add the clobber lists for all the instructions we skipped earlier.
- for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
- I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
- if (const uint16_t *Defs = (*I)->getImplicitDefs())
+ for (const MCInstrDesc *Desc : SkippedInstrs)
+ if (const uint16_t *Defs = Desc->getImplicitDefs())
while (*Defs)
MRI->setPhysRegUsed(*Defs++);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 901b993..8ef5dcd 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -486,7 +486,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
}
void RAGreedy::releaseMemory() {
- SpillerInstance.reset(nullptr);
+ SpillerInstance.reset();
ExtraRegInfo.clear();
GlobalCand.clear();
}
@@ -514,7 +514,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
- bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() &&
+ bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
@@ -817,7 +817,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
unsigned MinCost = RegClassInfo.getMinCost(RC);
if (MinCost >= CostPerUseLimit) {
- DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost
+ DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost
<< ", no cheaper registers to be found.\n");
return 0;
}
@@ -967,14 +967,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
BCS[B].Exit = SpillPlacement::PrefSpill;
if (++B == GroupSize) {
- ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
- SpillPlacer->addConstraints(Array);
+ SpillPlacer->addConstraints(makeArrayRef(BCS, B));
B = 0;
}
}
- ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B);
- SpillPlacer->addConstraints(Array);
+ SpillPlacer->addConstraints(makeArrayRef(BCS, B));
SpillPlacer->addLinks(makeArrayRef(TBS, T));
}
@@ -1013,7 +1011,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
// Compute through constraints from the interference, or assume that all
// through blocks prefer spilling when forming compact regions.
- ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
if (Cand.PhysReg)
addThroughConstraints(Cand.Intf, NewBlocks);
else
@@ -1791,9 +1789,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// instructions.
//
// Try to guess the size of the new interval.
- const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
- Uses[SplitBefore].distance(Uses[SplitAfter]) +
- (LiveBefore + LiveAfter)*SlotIndex::InstrDist);
+ const float EstWeight = normalizeSpillWeight(
+ blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
+ 1);
// Would this split be possible to allocate?
// Never allocate all gaps, we wouldn't be making progress.
DEBUG(dbgs() << " w=" << EstWeight);
@@ -2319,13 +2319,13 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
<< "********** Function: " << mf.getName() << '\n');
MF = &mf;
- const TargetMachine &TM = MF->getTarget();
- TRI = TM.getRegisterInfo();
- TII = TM.getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
RCI.runOnMachineFunction(mf);
EnableLocalReassign = EnableLocalReassignment ||
- TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel());
+ MF->getSubtarget().enableRALocalReassignment(
+ MF->getTarget().getOptLevel());
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index b8d2325..eb7e5633 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -49,9 +49,10 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <limits>
#include <memory>
+#include <queue>
#include <set>
#include <sstream>
#include <vector>
@@ -61,17 +62,17 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
static RegisterRegAlloc
-registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
createDefaultPBQPRegisterAllocator);
static cl::opt<bool>
-pbqpCoalescing("pbqp-coalescing",
+PBQPCoalescing("pbqp-coalescing",
cl::desc("Attempt coalescing during PBQP register allocation."),
cl::init(false), cl::Hidden);
#ifndef NDEBUG
static cl::opt<bool>
-pbqpDumpGraphs("pbqp-dump-graphs",
+PBQPDumpGraphs("pbqp-dump-graphs",
cl::desc("Dump graphs for each function/round in the compilation unit."),
cl::init(false), cl::Hidden);
#endif
@@ -88,8 +89,8 @@ public:
static char ID;
/// Construct a PBQP register allocator.
- RegAllocPBQP(std::unique_ptr<PBQPBuilder> &b, char *cPassID=nullptr)
- : MachineFunctionPass(ID), builder(b.release()), customPassID(cPassID) {
+ RegAllocPBQP(char *cPassID = nullptr)
+ : MachineFunctionPass(ID), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
@@ -117,301 +118,320 @@ private:
typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
typedef std::set<unsigned> RegSet;
- std::unique_ptr<PBQPBuilder> builder;
-
char *customPassID;
- MachineFunction *mf;
- const TargetMachine *tm;
- const TargetRegisterInfo *tri;
- const TargetInstrInfo *tii;
- MachineRegisterInfo *mri;
- const MachineBlockFrequencyInfo *mbfi;
-
- std::unique_ptr<Spiller> spiller;
- LiveIntervals *lis;
- LiveStacks *lss;
- VirtRegMap *vrm;
-
- RegSet vregsToAlloc, emptyIntervalVRegs;
+ RegSet VRegsToAlloc, EmptyIntervalVRegs;
/// \brief Finds the initial set of vreg intervals to allocate.
- void findVRegIntervalsToAlloc();
+ void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
+
+ /// \brief Constructs an initial graph.
+ void initializeGraph(PBQPRAGraph &G);
/// \brief Given a solved PBQP problem maps this solution back to a register
/// assignment.
- bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
- const PBQP::Solution &solution);
+ bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller);
/// \brief Postprocessing before final spilling. Sets basic block "live in"
/// variables.
- void finalizeAlloc() const;
+ void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
+ VirtRegMap &VRM) const;
};
char RegAllocPBQP::ID = 0;
-} // End anonymous namespace.
-
-unsigned PBQPRAProblem::getVRegForNode(PBQPRAGraph::NodeId node) const {
- Node2VReg::const_iterator vregItr = node2VReg.find(node);
- assert(vregItr != node2VReg.end() && "No vreg for node.");
- return vregItr->second;
-}
-
-PBQPRAGraph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
- VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
- assert(nodeItr != vreg2Node.end() && "No node for vreg.");
- return nodeItr->second;
-
-}
+/// @brief Set spill costs for each node in the PBQP reg-alloc graph.
+class SpillCosts : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // A minimum spill costs, so that register constraints can can be set
+ // without normalization in the [0.0:MinSpillCost( interval.
+ const PBQP::PBQPNum MinSpillCost = 10.0;
+
+ for (auto NId : G.nodeIds()) {
+ PBQP::PBQPNum SpillCost =
+ LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
+ if (SpillCost == 0.0)
+ SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
+ else
+ SpillCost += MinSpillCost;
+ PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
+ NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
+ G.setNodeCosts(NId, std::move(NodeCosts));
+ }
+ }
+};
-const PBQPRAProblem::AllowedSet&
- PBQPRAProblem::getAllowedSet(unsigned vreg) const {
- AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
- assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
- const AllowedSet &allowedSet = allowedSetItr->second;
- return allowedSet;
-}
+/// @brief Add interference edges between overlapping vregs.
+class Interference : public PBQPRAConstraint {
+private:
-unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
- assert(isPRegOption(vreg, option) && "Not a preg option.");
+private:
- const AllowedSet& allowedSet = getAllowedSet(vreg);
- assert(option <= allowedSet.size() && "Option outside allowed set.");
- return allowedSet[option - 1];
-}
+ typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
+ typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
+ typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
-PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
- const MachineBlockFrequencyInfo *mbfi,
- const RegSet &vregs) {
+ // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
+ // for the fast interference graph construction algorithm. The last is there
+ // to save us from looking up node ids via the VRegToNode map in the graph
+ // metadata.
+ typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>
+ IntervalInfo;
- LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
- MachineRegisterInfo *mri = &mf->getRegInfo();
- const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
+ static SlotIndex getStartPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].start;
+ }
- std::unique_ptr<PBQPRAProblem> p(new PBQPRAProblem());
- PBQPRAGraph &g = p->getGraph();
- RegSet pregs;
+ static SlotIndex getEndPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].end;
+ }
- // Collect the set of preg intervals, record that they're used in the MF.
- for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) {
- if (mri->def_empty(Reg))
- continue;
- pregs.insert(Reg);
- mri->setPhysRegUsed(Reg);
+ static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
+ return std::get<2>(I);
}
- // Iterate over vregs.
- for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
- vregItr != vregEnd; ++vregItr) {
- unsigned vreg = *vregItr;
- const TargetRegisterClass *trc = mri->getRegClass(vreg);
- LiveInterval *vregLI = &LIS->getInterval(vreg);
+ static bool lowestStartPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ // Condition reversed because priority queue has the *highest* element at
+ // the front, rather than the lowest.
+ return getStartPoint(I1) > getStartPoint(I2);
+ }
- // Record any overlaps with regmask operands.
- BitVector regMaskOverlaps;
- LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
+ static bool lowestEndPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ SlotIndex E1 = getEndPoint(I1);
+ SlotIndex E2 = getEndPoint(I2);
- // Compute an initial allowed set for the current vreg.
- typedef std::vector<unsigned> VRAllowed;
- VRAllowed vrAllowed;
- ArrayRef<MCPhysReg> rawOrder = trc->getRawAllocationOrder(*mf);
- for (unsigned i = 0; i != rawOrder.size(); ++i) {
- unsigned preg = rawOrder[i];
- if (mri->isReserved(preg))
- continue;
+ if (E1 < E2)
+ return true;
- // vregLI crosses a regmask operand that clobbers preg.
- if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg))
- continue;
+ if (E1 > E2)
+ return false;
- // vregLI overlaps fixed regunit interference.
- bool Interference = false;
- for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) {
- if (vregLI->overlaps(LIS->getRegUnit(*Units))) {
- Interference = true;
- break;
- }
- }
- if (Interference)
- continue;
-
- // preg is usable for this virtual register.
- vrAllowed.push_back(preg);
- }
-
- PBQP::Vector nodeCosts(vrAllowed.size() + 1, 0);
+ // If two intervals end at the same point, we need a way to break the tie or
+ // the set will assume they're actually equal and refuse to insert a
+ // "duplicate". Just compare the vregs - fast and guaranteed unique.
+ return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
+ }
- PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
- vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+ static bool isAtLastSegment(const IntervalInfo &I) {
+ return std::get<1>(I) == std::get<0>(I)->size() - 1;
+ }
- addSpillCosts(nodeCosts, spillCost);
+ static IntervalInfo nextSegment(const IntervalInfo &I) {
+ return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
+ }
- // Construct the node.
- PBQPRAGraph::NodeId nId = g.addNode(std::move(nodeCosts));
+public:
- // Record the mapping and allowed set in the problem.
- p->recordVReg(vreg, nId, vrAllowed.begin(), vrAllowed.end());
+ void apply(PBQPRAGraph &G) override {
+ // The following is loosely based on the linear scan algorithm introduced in
+ // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
+ // isn't linear, because the size of the active set isn't bound by the
+ // number of registers, but rather the size of the largest clique in the
+ // graph. Still, we expect this to be better than N^2.
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // Interferenc matrices are incredibly regular - they're only a function of
+ // the allowed sets, so we cache them to avoid the overhead of constructing
+ // and uniquing them.
+ IMatrixCache C;
+
+ typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
+ typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
+ decltype(&lowestStartPoint)> IntervalQueue;
+ IntervalSet Active(lowestEndPoint);
+ IntervalQueue Inactive(lowestStartPoint);
+
+ // Start by building the inactive set.
+ for (auto NId : G.nodeIds()) {
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ LiveInterval &LI = LIS.getInterval(VReg);
+ assert(!LI.empty() && "PBQP graph contains node for empty interval");
+ Inactive.push(std::make_tuple(&LI, 0, NId));
+ }
- }
+ while (!Inactive.empty()) {
+ // Tentatively grab the "next" interval - this choice may be overriden
+ // below.
+ IntervalInfo Cur = Inactive.top();
+
+ // Retire any active intervals that end before Cur starts.
+ IntervalSet::iterator RetireItr = Active.begin();
+ while (RetireItr != Active.end() &&
+ (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
+ // If this interval has subsequent segments, add the next one to the
+ // inactive list.
+ if (!isAtLastSegment(*RetireItr))
+ Inactive.push(nextSegment(*RetireItr));
+
+ ++RetireItr;
+ }
+ Active.erase(Active.begin(), RetireItr);
+
+ // One of the newly retired segments may actually start before the
+ // Cur segment, so re-grab the front of the inactive list.
+ Cur = Inactive.top();
+ Inactive.pop();
+
+ // At this point we know that Cur overlaps all active intervals. Add the
+ // interference edges.
+ PBQP::GraphBase::NodeId NId = getNodeId(Cur);
+ for (const auto &A : Active) {
+ PBQP::GraphBase::NodeId MId = getNodeId(A);
+
+ // Check that we haven't already added this edge
+ // FIXME: findEdge is expensive in the worst case (O(max_clique(G))).
+ // It might be better to replace this with a local bit-matrix.
+ if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId())
+ continue;
- for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
- vr1Itr != vrEnd; ++vr1Itr) {
- unsigned vr1 = *vr1Itr;
- const LiveInterval &l1 = lis->getInterval(vr1);
- const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
-
- for (RegSet::const_iterator vr2Itr = std::next(vr1Itr); vr2Itr != vrEnd;
- ++vr2Itr) {
- unsigned vr2 = *vr2Itr;
- const LiveInterval &l2 = lis->getInterval(vr2);
- const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
-
- assert(!l2.empty() && "Empty interval in vreg set?");
- if (l1.overlaps(l2)) {
- PBQP::Matrix edgeCosts(vr1Allowed.size()+1, vr2Allowed.size()+1, 0);
- addInterferenceCosts(edgeCosts, vr1Allowed, vr2Allowed, tri);
-
- g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
- std::move(edgeCosts));
+ // This is a new edge - add it to the graph.
+ createInterferenceEdge(G, NId, MId, C);
}
+
+ // Finally, add Cur to the Active set.
+ Active.insert(Cur);
}
}
- return p.release();
-}
+private:
-void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
- PBQP::PBQPNum spillCost) {
- costVec[0] = spillCost;
-}
+ void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId, IMatrixCache &C) {
-void PBQPBuilder::addInterferenceCosts(
- PBQP::Matrix &costMat,
- const PBQPRAProblem::AllowedSet &vr1Allowed,
- const PBQPRAProblem::AllowedSet &vr2Allowed,
- const TargetRegisterInfo *tri) {
- assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
- assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
- for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
- unsigned preg1 = vr1Allowed[i];
+ const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
+ const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
- for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
- unsigned preg2 = vr2Allowed[j];
+ // Try looking the edge costs up in the IMatrixCache first.
+ IMatrixKey K(&NRegs, &MRegs);
+ IMatrixCache::iterator I = C.find(K);
+ if (I != C.end()) {
+ G.addEdgeBypassingCostAllocator(NId, MId, I->second);
+ return;
+ }
- if (tri->regsOverlap(preg1, preg2)) {
- costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+ for (unsigned I = 0; I != NRegs.size(); ++I) {
+ unsigned PRegN = NRegs[I];
+ for (unsigned J = 0; J != MRegs.size(); ++J) {
+ unsigned PRegM = MRegs[J];
+ if (TRI.regsOverlap(PRegN, PRegM))
+ M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
}
}
+
+ PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
+ C[K] = G.getEdgeCostsPtr(EId);
}
-}
+};
-PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
- const LiveIntervals *lis,
- const MachineBlockFrequencyInfo *mbfi,
- const RegSet &vregs) {
- std::unique_ptr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs));
- PBQPRAGraph &g = p->getGraph();
+class Coalescing : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ MachineFunction &MF = G.getMetadata().MF;
+ MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
+ CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (const auto &MBB : MF) {
+ for (const auto &MI : MBB) {
+
+ // Skip not-coalescable or already coalesced copies.
+ if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
+ continue;
- const TargetMachine &tm = mf->getTarget();
- CoalescerPair cp(*tm.getRegisterInfo());
+ unsigned DstReg = CP.getDstReg();
+ unsigned SrcReg = CP.getSrcReg();
- // Scan the machine function and add a coalescing cost whenever CoalescerPair
- // gives the Ok.
- for (const auto &mbb : *mf) {
- for (const auto &mi : mbb) {
- if (!cp.setRegisters(&mi)) {
- continue; // Not coalescable.
- }
+ const float Scale = 1.0f / MBFI.getEntryFreq();
+ PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
- if (cp.getSrcReg() == cp.getDstReg()) {
- continue; // Already coalesced.
- }
+ if (CP.isPhys()) {
+ if (!MF.getRegInfo().isAllocatable(DstReg))
+ continue;
- unsigned dst = cp.getDstReg(),
- src = cp.getSrcReg();
+ PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
- const float copyFactor = 0.5; // Cost of copy relative to load. Current
- // value plucked randomly out of the air.
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
+ G.getNodeMetadata(NId).getAllowedRegs();
- PBQP::PBQPNum cBenefit =
- copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, &mi);
+ unsigned PRegOpt = 0;
+ while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
+ ++PRegOpt;
- if (cp.isPhys()) {
- if (!mf->getRegInfo().isAllocatable(dst)) {
- continue;
- }
-
- const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
- unsigned pregOpt = 0;
- while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
- ++pregOpt;
- }
- if (pregOpt < allowed.size()) {
- ++pregOpt; // +1 to account for spill option.
- PBQPRAGraph::NodeId node = p->getNodeForVReg(src);
- llvm::dbgs() << "Reading node costs for node " << node << "\n";
- llvm::dbgs() << "Source node: " << &g.getNodeCosts(node) << "\n";
- PBQP::Vector newCosts(g.getNodeCosts(node));
- addPhysRegCoalesce(newCosts, pregOpt, cBenefit);
- g.setNodeCosts(node, newCosts);
- }
- } else {
- const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
- const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
- PBQPRAGraph::NodeId node1 = p->getNodeForVReg(dst);
- PBQPRAGraph::NodeId node2 = p->getNodeForVReg(src);
- PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2);
- if (edge == g.invalidEdgeId()) {
- PBQP::Matrix costs(allowed1->size() + 1, allowed2->size() + 1, 0);
- addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit);
- g.addEdge(node1, node2, costs);
+ if (PRegOpt < Allowed.size()) {
+ PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
+ NewCosts[PRegOpt + 1] -= CBenefit;
+ G.setNodeCosts(NId, std::move(NewCosts));
+ }
} else {
- if (g.getEdgeNode1Id(edge) == node2) {
- std::swap(node1, node2);
- std::swap(allowed1, allowed2);
+ PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
+ PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
+ &G.getNodeMetadata(N1Id).getAllowedRegs();
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
+ &G.getNodeMetadata(N2Id).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
+ if (EId == G.invalidEdgeId()) {
+ PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
+ Allowed2->size() + 1, 0);
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.addEdge(N1Id, N2Id, std::move(Costs));
+ } else {
+ if (G.getEdgeNode1Id(EId) == N2Id) {
+ std::swap(N1Id, N2Id);
+ std::swap(Allowed1, Allowed2);
+ }
+ PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.setEdgeCosts(EId, std::move(Costs));
}
- PBQP::Matrix costs(g.getEdgeCosts(edge));
- addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit);
- g.setEdgeCosts(edge, costs);
}
}
}
}
- return p.release();
-}
-
-void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
- unsigned pregOption,
- PBQP::PBQPNum benefit) {
- costVec[pregOption] += -benefit;
-}
-
-void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
- PBQP::Matrix &costMat,
- const PBQPRAProblem::AllowedSet &vr1Allowed,
- const PBQPRAProblem::AllowedSet &vr2Allowed,
- PBQP::PBQPNum benefit) {
-
- assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
- assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
-
- for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
- unsigned preg1 = vr1Allowed[i];
- for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
- unsigned preg2 = vr2Allowed[j];
+private:
- if (preg1 == preg2) {
- costMat[i + 1][j + 1] += -benefit;
+ void addVirtRegCoalesce(
+ PBQPRAGraph::RawMatrix &CostMat,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
+ PBQP::PBQPNum Benefit) {
+ assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
+ assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
+ for (unsigned I = 0; I != Allowed1.size(); ++I) {
+ unsigned PReg1 = Allowed1[I];
+ for (unsigned J = 0; J != Allowed2.size(); ++J) {
+ unsigned PReg2 = Allowed2[J];
+ if (PReg1 == PReg2)
+ CostMat[I + 1][J + 1] -= Benefit;
}
}
}
-}
+};
+
+} // End anonymous namespace.
+
+// Out-of-line destructor/anchor for PBQPRAConstraint.
+PBQPRAConstraint::~PBQPRAConstraint() {}
+void PBQPRAConstraint::anchor() {}
+void PBQPRAConstraintList::anchor() {}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesCFG();
@@ -437,118 +457,197 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
MachineFunctionPass::getAnalysisUsage(au);
}
-void RegAllocPBQP::findVRegIntervalsToAlloc() {
+void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
+ LiveIntervals &LIS) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
// Iterate over all live ranges.
- for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (mri->reg_nodbg_empty(Reg))
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
continue;
- LiveInterval *li = &lis->getInterval(Reg);
+ LiveInterval &LI = LIS.getInterval(Reg);
// If this live interval is non-empty we will use pbqp to allocate it.
// Empty intervals we allocate in a simple post-processing stage in
// finalizeAlloc.
- if (!li->empty()) {
- vregsToAlloc.insert(li->reg);
+ if (!LI.empty()) {
+ VRegsToAlloc.insert(LI.reg);
} else {
- emptyIntervalVRegs.insert(li->reg);
+ EmptyIntervalVRegs.insert(LI.reg);
+ }
+ }
+}
+
+static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
+ const MachineFunction &MF) {
+ const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSR[i] != 0; ++i)
+ if (TRI.regsOverlap(reg, CSR[i]))
+ return true;
+ return false;
+}
+
+void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
+ MachineFunction &MF = G.getMetadata().MF;
+
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+
+ for (auto VReg : VRegsToAlloc) {
+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
+ LiveInterval &VRegLI = LIS.getInterval(VReg);
+
+ // Record any overlaps with regmask operands.
+ BitVector RegMaskOverlaps;
+ LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
+
+ // Compute an initial allowed set for the current vreg.
+ std::vector<unsigned> VRegAllowed;
+ ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
+ for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
+ unsigned PReg = RawPRegOrder[I];
+ if (MRI.isReserved(PReg))
+ continue;
+
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
+ continue;
+
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) {
+ if (VRegLI.overlaps(LIS.getRegUnit(*Units))) {
+ Interference = true;
+ break;
+ }
+ }
+ if (Interference)
+ continue;
+
+ // preg is usable for this virtual register.
+ VRegAllowed.push_back(PReg);
}
+
+ PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
+
+ // Tweak cost of callee saved registers, as using then force spilling and
+ // restoring them. This would only happen in the prologue / epilogue though.
+ for (unsigned i = 0; i != VRegAllowed.size(); ++i)
+ if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
+ NodeCosts[1 + i] += 1.0;
+
+ PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
+ G.getNodeMetadata(NId).setVReg(VReg);
+ G.getNodeMetadata(NId).setAllowedRegs(
+ G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
+ G.getMetadata().setNodeIdForVReg(VReg, NId);
}
}
-bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
- const PBQP::Solution &solution) {
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller) {
+ MachineFunction &MF = G.getMetadata().MF;
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const TargetRegisterInfo &TRI =
+ *MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ (void)TRI;
+
// Set to true if we have any spills
- bool anotherRoundNeeded = false;
+ bool AnotherRoundNeeded = false;
// Clear the existing allocation.
- vrm->clearAllVirt();
+ VRM.clearAllVirt();
- const PBQPRAGraph &g = problem.getGraph();
// Iterate over the nodes mapping the PBQP solution to a register
// assignment.
- for (auto NId : g.nodeIds()) {
- unsigned vreg = problem.getVRegForNode(NId);
- unsigned alloc = solution.getSelection(NId);
-
- if (problem.isPRegOption(vreg, alloc)) {
- unsigned preg = problem.getPRegForOption(vreg, alloc);
- DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> "
- << tri->getName(preg) << "\n");
- assert(preg != 0 && "Invalid preg selected.");
- vrm->assignVirt2Phys(vreg, preg);
- } else if (problem.isSpillOption(vreg, alloc)) {
- vregsToAlloc.erase(vreg);
- SmallVector<unsigned, 8> newSpills;
- LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
- spiller->spill(LRE);
-
- DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: "
+ for (auto NId : G.nodeIds()) {
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ unsigned AllocOption = Solution.getSelection(NId);
+
+ if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
+ unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
+ DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
+ << TRI.getName(PReg) << "\n");
+ assert(PReg != 0 && "Invalid preg selected.");
+ VRM.assignVirt2Phys(VReg, PReg);
+ } else {
+ VRegsToAlloc.erase(VReg);
+ SmallVector<unsigned, 8> NewSpills;
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM);
+ VRegSpiller.spill(LRE);
+
+ DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
<< LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
- for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
- itr != end; ++itr) {
- LiveInterval &li = lis->getInterval(*itr);
- assert(!li.empty() && "Empty spill range.");
- DEBUG(dbgs() << PrintReg(li.reg, tri) << " ");
- vregsToAlloc.insert(li.reg);
+ for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
+ I != E; ++I) {
+ LiveInterval &LI = LIS.getInterval(*I);
+ assert(!LI.empty() && "Empty spill range.");
+ DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+ VRegsToAlloc.insert(LI.reg);
}
DEBUG(dbgs() << ")\n");
// We need another round if spill intervals were added.
- anotherRoundNeeded |= !LRE.empty();
- } else {
- llvm_unreachable("Unknown allocation option.");
+ AnotherRoundNeeded |= !LRE.empty();
}
}
- return !anotherRoundNeeded;
+ return !AnotherRoundNeeded;
}
+void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
+ LiveIntervals &LIS,
+ VirtRegMap &VRM) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
-void RegAllocPBQP::finalizeAlloc() const {
// First allocate registers for the empty intervals.
for (RegSet::const_iterator
- itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
- itr != end; ++itr) {
- LiveInterval *li = &lis->getInterval(*itr);
+ I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end();
+ I != E; ++I) {
+ LiveInterval &LI = LIS.getInterval(*I);
- unsigned physReg = mri->getSimpleHint(li->reg);
+ unsigned PReg = MRI.getSimpleHint(LI.reg);
- if (physReg == 0) {
- const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
- physReg = liRC->getRawAllocationOrder(*mf).front();
+ if (PReg == 0) {
+ const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
+ PReg = RC.getRawAllocationOrder(MF).front();
}
- vrm->assignVirt2Phys(li->reg, physReg);
+ VRM.assignVirt2Phys(LI.reg, PReg);
}
}
-bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
+ unsigned NumInstr) {
+ // All intervals have a spill weight that is mostly proportional to the number
+ // of uses, with uses in loops having a bigger weight.
+ return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
+}
- mf = &MF;
- tm = &mf->getTarget();
- tri = tm->getRegisterInfo();
- tii = tm->getInstrInfo();
- mri = &mf->getRegInfo();
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<MachineBlockFrequencyInfo>();
- lis = &getAnalysis<LiveIntervals>();
- lss = &getAnalysis<LiveStacks>();
- mbfi = &getAnalysis<MachineBlockFrequencyInfo>();
+ calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI,
+ normalizePBQPSpillWeight);
- calculateSpillWeightsAndHints(*lis, MF, getAnalysis<MachineLoopInfo>(),
- *mbfi);
+ VirtRegMap &VRM = getAnalysis<VirtRegMap>();
- vrm = &getAnalysis<VirtRegMap>();
- spiller.reset(createInlineSpiller(*this, MF, *vrm));
+ std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
- mri->freezeReservedRegs(MF);
+ MF.getRegInfo().freezeReservedRegs(MF);
- DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
+ DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
// Allocator main loop:
//
@@ -560,72 +659,72 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// This process is continued till no more spills are generated.
// Find the vreg intervals in need of allocation.
- findVRegIntervalsToAlloc();
+ findVRegIntervalsToAlloc(MF, LIS);
#ifndef NDEBUG
- const Function* func = mf->getFunction();
- std::string fqn =
- func->getParent()->getModuleIdentifier() + "." +
- func->getName().str();
+ const Function &F = *MF.getFunction();
+ std::string FullyQualifiedName =
+ F.getParent()->getModuleIdentifier() + "." + F.getName().str();
#endif
// If there are non-empty intervals allocate them using pbqp.
- if (!vregsToAlloc.empty()) {
+ if (!VRegsToAlloc.empty()) {
- bool pbqpAllocComplete = false;
- unsigned round = 0;
+ const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl();
+ std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
+ llvm::make_unique<PBQPRAConstraintList>();
+ ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
+ ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
+ if (PBQPCoalescing)
+ ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
+ ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
- while (!pbqpAllocComplete) {
- DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
+ bool PBQPAllocComplete = false;
+ unsigned Round = 0;
- std::unique_ptr<PBQPRAProblem> problem(
- builder->build(mf, lis, mbfi, vregsToAlloc));
+ while (!PBQPAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
+
+ PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
+ initializeGraph(G);
+ ConstraintsRoot->apply(G);
#ifndef NDEBUG
- if (pbqpDumpGraphs) {
- std::ostringstream rs;
- rs << round;
- std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
- std::string tmp;
- raw_fd_ostream os(graphFileName.c_str(), tmp, sys::fs::F_Text);
- DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
- << graphFileName << "\"\n");
- problem->getGraph().dump(os);
+ if (PBQPDumpGraphs) {
+ std::ostringstream RS;
+ RS << Round;
+ std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
+ ".pbqpgraph";
+ std::error_code EC;
+ raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
+ DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
+ << GraphFileName << "\"\n");
+ G.dumpToStream(OS);
}
#endif
- PBQP::Solution solution =
- PBQP::RegAlloc::solve(problem->getGraph());
-
- pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
-
- ++round;
+ PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
+ PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
+ ++Round;
}
}
// Finalise allocation, allocate empty ranges.
- finalizeAlloc();
- vregsToAlloc.clear();
- emptyIntervalVRegs.clear();
+ finalizeAlloc(MF, LIS, VRM);
+ VRegsToAlloc.clear();
+ EmptyIntervalVRegs.clear();
- DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
return true;
}
-FunctionPass *
-llvm::createPBQPRegisterAllocator(std::unique_ptr<PBQPBuilder> &builder,
- char *customPassID) {
- return new RegAllocPBQP(builder, customPassID);
+FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
+ return new RegAllocPBQP(customPassID);
}
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
- std::unique_ptr<PBQPBuilder> Builder;
- if (pbqpCoalescing)
- Builder.reset(new PBQPBuilderWithCoalescing());
- else
- Builder.reset(new PBQPBuilder());
- return createPBQPRegisterAllocator(Builder);
+ return createPBQPRegisterAllocator();
}
#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 8b5445c..e0d1aa2 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -38,8 +37,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
MF = &mf;
// Allocate new array the first time we see a new target.
- if (MF->getTarget().getRegisterInfo() != TRI) {
- TRI = MF->getTarget().getRegisterInfo();
+ if (MF->getSubtarget().getRegisterInfo() != TRI) {
+ TRI = MF->getSubtarget().getRegisterInfo();
RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
unsigned NumPSets = TRI->getNumRegPressureSets();
PSetLimits.reset(new unsigned[NumPSets]);
@@ -138,7 +137,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
RCI.LastCostChange = LastCostChange;
DEBUG({
- dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
+ dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";
for (unsigned I = 0; I != RCI.NumRegs; ++I)
dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 65b0528..2d2dc92 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -94,11 +94,11 @@ namespace {
/// blocks exclusively containing copies.
bool JoinSplitEdges;
- /// WorkList - Copy instructions yet to be coalesced.
+ /// Copy instructions yet to be coalesced.
SmallVector<MachineInstr*, 8> WorkList;
SmallVector<MachineInstr*, 8> LocalWorkList;
- /// ErasedInstrs - Set of instruction pointers that have been erased, and
+ /// Set of instruction pointers that have been erased, and
/// that may be present in WorkList.
SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
@@ -114,21 +114,21 @@ namespace {
/// LiveRangeEdit callback.
void LRE_WillEraseInstruction(MachineInstr *MI) override;
- /// coalesceLocals - coalesce the LocalWorkList.
+ /// Coalesce the LocalWorkList.
void coalesceLocals();
- /// joinAllIntervals - join compatible live intervals
+ /// Join compatible live intervals
void joinAllIntervals();
- /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// Coalesce copies in the specified MBB, putting
/// copies that cannot yet be coalesced into WorkList.
void copyCoalesceInMBB(MachineBasicBlock *MBB);
- /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return
+ /// Try to coalesce all copies in CurrList. Return
/// true if any progress was made.
bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
- /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns
/// true if the copy was successfully coalesced away. If it is not
/// currently possible to coalesce this interval, but it may be possible if
@@ -136,7 +136,7 @@ namespace {
/// 'Again'.
bool joinCopy(MachineInstr *TheCopy, bool &Again);
- /// joinIntervals - Attempt to join these two intervals. On failure, this
+ /// Attempt to join these two intervals. On failure, this
/// returns false. The output "SrcInt" will not have been modified, so we
/// can use this information below to update aliases.
bool joinIntervals(CoalescerPair &CP);
@@ -147,39 +147,39 @@ namespace {
/// Attempt joining with a reserved physreg.
bool joinReservedPhysReg(CoalescerPair &CP);
- /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// We found a non-trivially-coalescable copy. If
/// the source value number is defined by a copy from the destination reg
/// see if we can merge these two destination reg valno# into a single
/// value number, eliminating a copy.
bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
- /// hasOtherReachingDefs - Return true if there are definitions of IntB
+ /// Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
VNInfo *AValNo, VNInfo *BValNo);
- /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// We found a non-trivially-coalescable copy.
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
- /// reMaterializeTrivialDef - If the source of a copy is defined by a
+ /// If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI,
bool &IsDefCopy);
- /// canJoinPhys - Return true if a physreg copy should be joined.
+ /// Return true if a physreg copy should be joined.
bool canJoinPhys(const CoalescerPair &CP);
- /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ /// Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
- /// eliminateUndefCopy - Handle copies of undef values.
+ /// Handle copies of undef values.
bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
public:
@@ -192,10 +192,10 @@ namespace {
void releaseMemory() override;
- /// runOnMachineFunction - pass entry point
+ /// This is the pass entry point.
bool runOnMachineFunction(MachineFunction&) override;
- /// print - Implement the dump method.
+ /// Implement the dump method.
void print(raw_ostream &O, const Module* = nullptr) const override;
};
} /// end anonymous namespace
@@ -407,7 +407,7 @@ void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
ErasedInstrs.insert(MI);
}
-/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// We found a non-trivially-coalescable copy with IntA
/// being the source and IntB being the dest, thus this defines a value number
/// in IntB. If the source value number (in IntA) is defined by a copy from B,
/// see if we can merge these two pieces of B into a single value number,
@@ -512,7 +512,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
return true;
}
-/// hasOtherReachingDefs - Return true if there are definitions of IntB
+/// Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
LiveInterval &IntB,
@@ -542,7 +542,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
return false;
}
-/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// We found a non-trivially-coalescable copy with
/// IntA being the source and IntB being the dest, thus this defines a value
/// number in IntB. If the source value number (in IntA) is defined by a
/// commutable instruction and its other operand is coalesced to the copy dest
@@ -725,7 +725,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return true;
}
-/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
MachineInstr *CopyMI,
@@ -904,7 +904,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
return true;
}
-/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef>
+/// ProcessImpicitDefs may leave some copies of <undef>
/// values, it only removes local variables. When we have a copy like:
///
/// %vreg1 = COPY %vreg2<undef>
@@ -944,11 +944,10 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
return true;
}
-/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
-/// update the subregister number if it is not zero. If DstReg is a
-/// physical register and the existing subregister number of the def / use
-/// being updated is not zero, make sure to set it to the correct physical
-/// subregister.
+/// Replace all defs and uses of SrcReg to DstReg and update the subregister
+/// number if it is not zero. If DstReg is a physical register and the existing
+/// subregister number of the def / use being updated is not zero, make sure to
+/// set it to the correct physical subregister.
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
unsigned DstReg,
unsigned SubIdx) {
@@ -966,7 +965,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// the UseMI operands removes them from the SrcReg use-def chain, but when
// SrcReg is DstReg we could encounter UseMI twice if it has multiple
// operands mentioning the virtual register.
- if (SrcReg == DstReg && !Visited.insert(UseMI))
+ if (SrcReg == DstReg && !Visited.insert(UseMI).second)
continue;
SmallVector<unsigned,8> Ops;
@@ -1003,7 +1002,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
}
}
-/// canJoinPhys - Return true if a copy involving a physreg should be joined.
+/// Return true if a copy involving a physreg should be joined.
bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
@@ -1021,7 +1020,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
return false;
}
-/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns true
/// if the copy was successfully coalesced away. If it is not currently
/// possible to coalesce this interval, but it may be possible if other
@@ -1037,6 +1036,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
return false;
}
+ if (CP.getNewRC()) {
+ auto SrcRC = MRI->getRegClass(CP.getSrcReg());
+ auto DstRC = MRI->getRegClass(CP.getDstReg());
+ unsigned SrcIdx = CP.getSrcIdx();
+ unsigned DstIdx = CP.getDstIdx();
+ if (CP.isFlipped()) {
+ std::swap(SrcIdx, DstIdx);
+ std::swap(SrcRC, DstRC);
+ }
+ if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
+ CP.getNewRC())) {
+ DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
+ return false;
+ }
+ }
+
// Dead code elimination. This really should be handled by MachineDCE, but
// sometimes dead copies slip through, and we can't generate invalid live
// ranges.
@@ -1090,9 +1105,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
return false;
}
} else {
+ // When possible, let DstReg be the larger interval.
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
+ LIS->getInterval(CP.getDstReg()).size())
+ CP.flip();
+
DEBUG({
- dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName()
- << " with ";
+ dbgs() << "\tConsidering merging to "
+ << TRI->getRegClassName(CP.getNewRC()) << " with ";
if (CP.getDstIdx() && CP.getSrcIdx())
dbgs() << PrintReg(CP.getDstReg()) << " in "
<< TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
@@ -1102,11 +1122,6 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
<< PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
});
-
- // When possible, let DstReg be the larger interval.
- if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
- LIS->getInterval(CP.getDstReg()).size())
- CP.flip();
}
// Okay, attempt to join these two intervals. On failure, this returns false.
@@ -1171,7 +1186,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
- dbgs() << "\tJoined. Result = ";
+ dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
+ << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
+ dbgs() << "\tResult = ";
if (CP.isPhys())
dbgs() << PrintReg(CP.getDstReg(), TRI);
else
@@ -1423,7 +1440,7 @@ public:
/// Add erased instructions to ErasedInstrs.
/// Add foreign virtual registers to ShrinkRegs if their live range ended at
/// the erased instrs.
- void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs);
/// Get the value assignments suitable for passing to LiveInterval::join.
@@ -1936,7 +1953,7 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
-void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs) {
for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
// Get the def location before markUnused() below invalidates it.
@@ -2035,8 +2052,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
return true;
}
-/// joinIntervals - Attempt to join these two intervals. On failure, this
-/// returns false.
+/// Attempt to join these two intervals. On failure, this returns false.
bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
}
@@ -2208,8 +2224,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &fn.getRegInfo();
TM = &fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = TM->getSubtargetImpl()->getRegisterInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
@@ -2250,7 +2266,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
continue;
if (MRI->recomputeRegClass(Reg, *TM)) {
DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
- << MRI->getRegClass(Reg)->getName() << '\n');
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
++NumInflated;
}
}
@@ -2261,7 +2277,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
return true;
}
-/// print - Implement the dump method.
+/// Implement the dump method.
void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
LIS->print(O, m);
}
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index e57ceab..04067a1 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
-#define LLVM_CODEGEN_REGISTER_COALESCER_H
+#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
namespace llvm {
@@ -22,38 +22,36 @@ namespace llvm {
class TargetRegisterClass;
class TargetInstrInfo;
- /// CoalescerPair - A helper class for register coalescers. When deciding if
+ /// A helper class for register coalescers. When deciding if
/// two registers can be coalesced, CoalescerPair can determine if a copy
/// instruction would become an identity copy after coalescing.
class CoalescerPair {
const TargetRegisterInfo &TRI;
- /// DstReg - The register that will be left after coalescing. It can be a
+ /// The register that will be left after coalescing. It can be a
/// virtual or physical register.
unsigned DstReg;
- /// SrcReg - the virtual register that will be coalesced into dstReg.
+ /// The virtual register that will be coalesced into dstReg.
unsigned SrcReg;
- /// DstIdx - The sub-register index of the old DstReg in the new coalesced
- /// register.
+ /// The sub-register index of the old DstReg in the new coalesced register.
unsigned DstIdx;
- /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced
- /// register.
+ /// The sub-register index of the old SrcReg in the new coalesced register.
unsigned SrcIdx;
- /// Partial - True when the original copy was a partial subregister copy.
+ /// True when the original copy was a partial subregister copy.
bool Partial;
- /// CrossClass - True when both regs are virtual, and newRC is constrained.
+ /// True when both regs are virtual and newRC is constrained.
bool CrossClass;
- /// Flipped - True when DstReg and SrcReg are reversed from the original
+ /// True when DstReg and SrcReg are reversed from the original
/// copy instruction.
bool Flipped;
- /// NewRC - The register class of the coalesced register, or NULL if DstReg
+ /// The register class of the coalesced register, or NULL if DstReg
/// is a physreg. This register class may be a super-register of both
/// SrcReg and DstReg.
const TargetRegisterClass *NewRC;
@@ -70,49 +68,47 @@ namespace llvm {
: TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
- /// setRegisters - set registers to match the copy instruction MI. Return
+ /// Set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
bool setRegisters(const MachineInstr*);
- /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// Swap SrcReg and DstReg. Return false if swapping is impossible
/// because DstReg is a physical register, or SubIdx is set.
bool flip();
- /// isCoalescable - Return true if MI is a copy instruction that will become
+ /// Return true if MI is a copy instruction that will become
/// an identity copy after coalescing.
bool isCoalescable(const MachineInstr*) const;
- /// isPhys - Return true if DstReg is a physical register.
+ /// Return true if DstReg is a physical register.
bool isPhys() const { return !NewRC; }
- /// isPartial - Return true if the original copy instruction did not copy
+ /// Return true if the original copy instruction did not copy
/// the full register, but was a subreg operation.
bool isPartial() const { return Partial; }
- /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller
+ /// Return true if DstReg is virtual and NewRC is a smaller
/// register class than DstReg's.
bool isCrossClass() const { return CrossClass; }
- /// isFlipped - Return true when getSrcReg is the register being defined by
+ /// Return true when getSrcReg is the register being defined by
/// the original copy instruction.
bool isFlipped() const { return Flipped; }
- /// getDstReg - Return the register (virtual or physical) that will remain
+ /// Return the register (virtual or physical) that will remain
/// after coalescing.
unsigned getDstReg() const { return DstReg; }
- /// getSrcReg - Return the virtual register that will be coalesced away.
+ /// Return the virtual register that will be coalesced away.
unsigned getSrcReg() const { return SrcReg; }
- /// getDstIdx - Return the subregister index that DstReg will be coalesced
- /// into, or 0.
+ /// Return the subregister index that DstReg will be coalesced into, or 0.
unsigned getDstIdx() const { return DstIdx; }
- /// getSrcIdx - Return the subregister index that SrcReg will be coalesced
- /// into, or 0.
+ /// Return the subregister index that SrcReg will be coalesced into, or 0.
unsigned getSrcIdx() const { return SrcIdx; }
- /// getNewRC - Return the register class of the coalesced register.
+ /// Return the register class of the coalesced register.
const TargetRegisterClass *getNewRC() const { return NewRC; }
};
} // End llvm namespace
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 617e459..9925efb 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -185,7 +184,7 @@ void RegPressureTracker::init(const MachineFunction *mf,
reset();
MF = mf;
- TRI = MF->getTarget().getRegisterInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
RCI = rci;
MRI = &MF->getRegInfo();
MBB = mbb;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 72b6285..7626dd2 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -24,24 +24,16 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
-/// setUsed - Set the register and its sub-registers as being used.
-void RegScavenger::setUsed(unsigned Reg) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- RegsAvailable.reset(*SubRegs);
-}
-
-bool RegScavenger::isAliasUsed(unsigned Reg) const {
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- if (isUsed(*AI, *AI == Reg))
- return true;
- return false;
+/// setUsed - Set the register units of this register as used.
+void RegScavenger::setRegUsed(unsigned Reg) {
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ RegUnitsAvailable.reset(*RUI);
}
void RegScavenger::initRegState() {
@@ -51,8 +43,8 @@ void RegScavenger::initRegState() {
I->Restore = nullptr;
}
- // All registers started out unused.
- RegsAvailable.set();
+ // All register units start out unused.
+ RegUnitsAvailable.set();
if (!MBB)
return;
@@ -60,22 +52,21 @@ void RegScavenger::initRegState() {
// Live-in registers are in use.
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I)
- setUsed(*I);
+ setRegUsed(*I);
// Pristine CSRs are also unavailable.
BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
for (int I = PR.find_first(); I>0; I = PR.find_next(I))
- setUsed(I);
+ setRegUsed(I);
}
void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
MachineFunction &MF = *mbb->getParent();
- const TargetMachine &TM = MF.getTarget();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+ assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
// It is not possible to use the register scavenger after late optimization
@@ -85,17 +76,11 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
// Self-initialize.
if (!MBB) {
- NumPhysRegs = TRI->getNumRegs();
- RegsAvailable.resize(NumPhysRegs);
- KillRegs.resize(NumPhysRegs);
- DefRegs.resize(NumPhysRegs);
-
- // Create callee-saved registers bitvector.
- CalleeSavedRegs.resize(NumPhysRegs);
- const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
- if (CSRegs != nullptr)
- for (unsigned i = 0; CSRegs[i]; ++i)
- CalleeSavedRegs.set(CSRegs[i]);
+ NumRegUnits = TRI->getNumRegUnits();
+ RegUnitsAvailable.resize(NumRegUnits);
+ KillRegUnits.resize(NumRegUnits);
+ DefRegUnits.resize(NumRegUnits);
+ TmpRegUnits.resize(NumRegUnits);
}
MBB = mbb;
@@ -104,10 +89,9 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
Tracking = false;
}
-void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- BV.set(*SubRegs);
+void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ BV.set(*RUI);
}
void RegScavenger::determineKillsAndDefs() {
@@ -122,12 +106,25 @@ void RegScavenger::determineKillsAndDefs() {
// predicated, conservatively assume "kill" markers do not actually kill the
// register. Similarly ignores "dead" markers.
bool isPred = TII->isPredicated(MI);
- KillRegs.reset();
- DefRegs.reset();
+ KillRegUnits.reset();
+ DefRegUnits.reset();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isRegMask())
- (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
+ if (MO.isRegMask()) {
+
+ TmpRegUnits.clear();
+ for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) {
+ for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
+ if (MO.clobbersPhysReg(*RURI)) {
+ TmpRegUnits.set(RU);
+ break;
+ }
+ }
+ }
+
+ // Apply the mask.
+ (isPred ? DefRegUnits : KillRegUnits) |= TmpRegUnits;
+ }
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -139,13 +136,13 @@ void RegScavenger::determineKillsAndDefs() {
if (MO.isUndef())
continue;
if (!isPred && MO.isKill())
- addRegWithSubRegs(KillRegs, Reg);
+ addRegUnits(KillRegUnits, Reg);
} else {
assert(MO.isDef());
if (!isPred && MO.isDead())
- addRegWithSubRegs(KillRegs, Reg);
+ addRegUnits(KillRegUnits, Reg);
else
- addRegWithSubRegs(DefRegs, Reg);
+ addRegUnits(DefRegUnits, Reg);
}
}
}
@@ -158,8 +155,8 @@ void RegScavenger::unprocess() {
determineKillsAndDefs();
// Commit the changes.
- setUsed(KillRegs);
- setUnused(DefRegs);
+ setUsed(KillRegUnits);
+ setUnused(DefRegUnits);
}
if (MBBI == MBB->begin()) {
@@ -208,7 +205,7 @@ void RegScavenger::forward() {
if (MO.isUse()) {
if (MO.isUndef())
continue;
- if (!isUsed(Reg)) {
+ if (!isRegUsed(Reg)) {
// Check if it's partial live: e.g.
// D0 = insert_subreg D0<undef>, S0
// ... D0
@@ -219,15 +216,23 @@ void RegScavenger::forward() {
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- if (isUsed(*SubRegs)) {
+ if (isRegUsed(*SubRegs)) {
SubUsed = true;
break;
}
- if (!SubUsed) {
+ bool SuperUsed = false;
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
+ if (isRegUsed(*SR)) {
+ SuperUsed = true;
+ break;
+ }
+ }
+ if (!SubUsed && !SuperUsed) {
MBB->getParent()->verify(nullptr, "In Register Scavenger");
llvm_unreachable("Using an undefined register!");
}
(void)SubUsed;
+ (void)SuperUsed;
}
} else {
assert(MO.isDef());
@@ -243,23 +248,23 @@ void RegScavenger::forward() {
#endif // NDEBUG
// Commit the changes.
- setUnused(KillRegs);
- setUsed(DefRegs);
+ setUnused(KillRegUnits);
+ setUsed(DefRegUnits);
}
-void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
- used = RegsAvailable;
- used.flip();
- if (includeReserved)
- used |= MRI->getReservedRegs();
- else
- used.reset(MRI->getReservedRegs());
+bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
+ if (includeReserved && isReserved(Reg))
+ return true;
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ if (!RegUnitsAvailable.test(*RUI))
+ return true;
+ return false;
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (!isAliasUsed(*I)) {
+ if (!isRegUsed(*I)) {
DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
"\n");
return *I;
@@ -273,13 +278,13 @@ BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
BitVector Mask(TRI->getNumRegs());
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (!isAliasUsed(*I))
+ if (!isRegUsed(*I))
Mask.set(*I);
return Mask;
}
/// findSurvivorReg - Return the candidate register that is unused for the
-/// longest after StargMII. UseMI is set to the instruction where the search
+/// longest after StartMII. UseMI is set to the instruction where the search
/// stopped.
///
/// No more than InstrLimit instructions are inspected.
@@ -375,9 +380,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
}
// Try to find a register that's unused if there is one, as then we won't
- // have to spill. Search explicitly rather than masking out based on
- // RegsAvailable, as RegsAvailable does not take aliases into account.
- // That's what getRegsAvailable() is for.
+ // have to spill.
BitVector Available = getRegsAvailable(RC);
Available &= Candidates;
if (Available.any())
@@ -388,7 +391,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
// If we found an unused register there is no reason to spill it.
- if (!isAliasUsed(SReg)) {
+ if (!isRegUsed(SReg)) {
DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
return SReg;
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6a2a080..6f8b337 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -21,6 +21,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -35,11 +36,9 @@ static cl::opt<bool> StressSchedOpt(
void SchedulingPriorityQueue::anchor() { }
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
- : TM(mf.getTarget()),
- TII(TM.getInstrInfo()),
- TRI(TM.getRegisterInfo()),
- MF(mf), MRI(mf.getRegInfo()),
- EntrySU(), ExitSU() {
+ : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()),
+ TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf),
+ MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
#ifndef NDEBUG
StressSched = StressSchedOpt;
#endif
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 0f8b21c..d8d8422 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -50,12 +50,11 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction"));
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
- const MachineLoopInfo &mli,
- const MachineDominatorTree &mdt,
+ const MachineLoopInfo *mli,
bool IsPostRAFlag,
bool RemoveKillFlags,
LiveIntervals *lis)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
CanHandleTerminators(false), FirstDbgValue(nullptr) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
@@ -64,7 +63,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
"Virtual registers must be removed prior to PostRA scheduling");
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
}
/// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -110,7 +109,7 @@ static void getUnderlyingObjects(const Value *V,
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
V = *I;
- if (!Visited.insert(V))
+ if (!Visited.insert(V).second)
continue;
if (Operator::getOpcode(V) == Instruction::IntToPtr) {
const Value *O =
@@ -512,9 +511,18 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
MachineInstr *MIa,
MachineInstr *MIb) {
+ const MachineFunction *MF = MIa->getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
// Cover a trivial case - no edge is need to itself.
if (MIa == MIb)
return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if ((MIa->mayLoad() || MIa->mayStore()) &&
+ (MIb->mayLoad() || MIb->mayStore()))
+ if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
+ return false;
// FIXME: Need to handle multiple memory operands to support all targets.
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
@@ -563,9 +571,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
AliasAnalysis::AliasResult AAResult = AA->alias(
AliasAnalysis::Location(MMOa->getValue(), Overlapa,
- UseTBAA ? MMOa->getTBAAInfo() : nullptr),
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
AliasAnalysis::Location(MMOb->getValue(), Overlapb,
- UseTBAA ? MMOb->getTBAAInfo() : nullptr));
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != AliasAnalysis::NoAlias);
}
@@ -575,12 +583,12 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
static unsigned
iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
- SmallPtrSet<const SUnit*, 16> &Visited) {
+ SmallPtrSetImpl<const SUnit*> &Visited) {
if (!SUa || !SUb || SUb == ExitSU)
return *Depth;
// Remember visited nodes.
- if (!Visited.insert(SUb))
+ if (!Visited.insert(SUb).second)
return *Depth;
// If there is _some_ dependency already in place, do not
// descend any further.
@@ -656,7 +664,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
bool isNormalMemory = false) {
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
- if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index f59c6cf..b2e4617 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -21,7 +21,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <fstream>
using namespace llvm;
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 004c685..38833a4 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -78,7 +78,7 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
else {
// A nonempty itinerary must have a SchedModel.
- IssueWidth = ItinData->SchedModel->IssueWidth;
+ IssueWidth = ItinData->SchedModel.IssueWidth;
DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
<< ScoreboardDepth << '\n');
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7c42e4d..a1291ed 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17,7 +17,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -32,7 +34,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -76,6 +77,10 @@ namespace {
"slicing"),
cl::init(false));
+ static cl::opt<bool>
+ MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner may split indexing from loads"));
+
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
@@ -87,56 +92,70 @@ namespace {
bool LegalTypes;
bool ForCodeSize;
- // Worklist of all of the nodes that need to be simplified.
- //
- // This has the semantics that when adding to the worklist,
- // the item added must be next to be processed. It should
- // also only appear once. The naive approach to this takes
- // linear time.
- //
- // To reduce the insert/remove time to logarithmic, we use
- // a set and a vector to maintain our worklist.
- //
- // The set contains the items on the worklist, but does not
- // maintain the order they should be visited.
- //
- // The vector maintains the order nodes should be visited, but may
- // contain duplicate or removed nodes. When choosing a node to
- // visit, we pop off the order stack until we find an item that is
- // also in the contents set. All operations are O(log N).
- SmallPtrSet<SDNode*, 64> WorkListContents;
- SmallVector<SDNode*, 64> WorkListOrder;
+ /// \brief Worklist of all of the nodes that need to be simplified.
+ ///
+ /// This must behave as a stack -- new nodes to process are pushed onto the
+ /// back and when processing we pop off of the back.
+ ///
+ /// The worklist will not contain duplicates but may contain null entries
+ /// due to nodes being deleted from the underlying DAG.
+ SmallVector<SDNode *, 64> Worklist;
+
+ /// \brief Mapping from an SDNode to its position on the worklist.
+ ///
+ /// This is used to find and remove nodes from the worklist (by nulling
+ /// them) when they are deleted from the underlying DAG. It relies on
+ /// stable indices of nodes within the worklist.
+ DenseMap<SDNode *, unsigned> WorklistMap;
+
+ /// \brief Set of nodes which have been combined (at least once).
+ ///
+ /// This is used to allow us to reliably add any operands of a DAG node
+ /// which have not yet been combined to the worklist.
+ SmallPtrSet<SDNode *, 64> CombinedNodes;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
- /// AddUsersToWorkList - When an instruction is simplified, add all users of
- /// the instruction to the work lists because they might get more simplified
- /// now.
- ///
- void AddUsersToWorkList(SDNode *N) {
+ /// When an instruction is simplified, add all users of the instruction to
+ /// the work lists because they might get more simplified now.
+ void AddUsersToWorklist(SDNode *N) {
for (SDNode *Node : N->uses())
- AddToWorkList(Node);
+ AddToWorklist(Node);
}
- /// visit - call the node-specific routine that knows how to fold each
- /// particular type of node.
+ /// Call the node-specific routine that folds each particular type of node.
SDValue visit(SDNode *N);
public:
- /// AddToWorkList - Add to the work list making sure its instance is at the
- /// back (next to be processed.)
- void AddToWorkList(SDNode *N) {
- WorkListContents.insert(N);
- WorkListOrder.push_back(N);
+ /// Add to the worklist making sure its instance is at the back (next to be
+ /// processed.)
+ void AddToWorklist(SDNode *N) {
+ // Skip handle nodes as they can't usefully be combined and confuse the
+ // zero-use deletion strategy.
+ if (N->getOpcode() == ISD::HANDLENODE)
+ return;
+
+ if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
+ Worklist.push_back(N);
}
- /// removeFromWorkList - remove all instances of N from the worklist.
- ///
- void removeFromWorkList(SDNode *N) {
- WorkListContents.erase(N);
+ /// Remove all instances of N from the worklist.
+ void removeFromWorklist(SDNode *N) {
+ CombinedNodes.erase(N);
+
+ auto It = WorklistMap.find(N);
+ if (It == WorklistMap.end())
+ return; // Not in the worklist.
+
+ // Null out the entry rather than erasing it to avoid a linear operation.
+ Worklist[It->second] = nullptr;
+ WorklistMap.erase(It);
}
+ void deleteAndRecombine(SDNode *N);
+ bool recursivelyDeleteUnusedNodes(SDNode *N);
+
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
@@ -154,9 +173,9 @@ namespace {
private:
- /// SimplifyDemandedBits - Check the specified integer node value to see if
- /// it can be simplified or if things it uses can be simplified by bit
- /// propagation. If so, return true.
+ /// Check the specified integer node value to see if it can be simplified or
+ /// if things it uses can be simplified by bit propagation.
+ /// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
@@ -167,6 +186,7 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
+ SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
/// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
@@ -192,7 +212,7 @@ namespace {
SDValue Trunc, SDValue ExtLoad, SDLoc DL,
ISD::NodeType ExtType);
- /// combine - call the node-specific routine that knows how to fold each
+ /// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
SDValue combine(SDNode *N);
@@ -256,6 +276,7 @@ namespace {
SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
+ SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
@@ -269,6 +290,8 @@ namespace {
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
+ SDValue visitFMINNUM(SDNode *N);
+ SDValue visitFMAXNUM(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -304,7 +327,12 @@ namespace {
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
+ SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
+ SDValue BuildReciprocalEstimate(SDValue Op);
+ SDValue BuildRsqrtEstimate(SDValue Op);
+ SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
+ SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -321,17 +349,16 @@ namespace {
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
- /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+ /// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases);
- /// isAlias - Return true if there is any possibility that the two addresses
- /// overlap.
+ /// Return true if there is any possibility that the two addresses overlap.
bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
- /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
- /// looking for a better chain (aliasing node.)
+ /// Walk up chain skipping non-aliasing memory nodes, looking for a better
+ /// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
/// Merge consecutive store operations into a wide store.
@@ -359,13 +386,13 @@ namespace {
FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
}
- /// Run - runs the dag combiner on all nodes in the work list
+ /// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
SelectionDAG &getDAG() const { return DAG; }
- /// getShiftAmountTy - Returns a type large enough to hold any valid
- /// shift amount - before type legalization these can be huge.
+ /// Returns a type large enough to hold any valid shift amount - before type
+ /// legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
@@ -374,15 +401,14 @@ namespace {
: TLI.getPointerTy();
}
- /// isTypeLegal - This method returns true if we are running before type
- /// legalization or if the specified VT is legal.
+ /// This method returns true if we are running before type legalization or
+ /// if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
if (!LegalTypes) return true;
return TLI.isTypeLegal(VT);
}
- /// getSetCCResultType - Convenience wrapper around
- /// TargetLowering::getSetCCResultType
+ /// Convenience wrapper around TargetLowering::getSetCCResultType
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(*DAG.getContext(), VT);
}
@@ -391,16 +417,16 @@ namespace {
namespace {
-/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
-class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+class WorklistRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
- explicit WorkListRemover(DAGCombiner &dc)
+ explicit WorklistRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
void NodeDeleted(SDNode *N, SDNode *E) override {
- DC.removeFromWorkList(N);
+ DC.removeFromWorklist(N);
}
};
}
@@ -410,11 +436,11 @@ public:
//===----------------------------------------------------------------------===//
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
- ((DAGCombiner*)DC)->AddToWorkList(N);
+ ((DAGCombiner*)DC)->AddToWorklist(N);
}
void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
- ((DAGCombiner*)DC)->removeFromWorkList(N);
+ ((DAGCombiner*)DC)->removeFromWorklist(N);
}
SDValue TargetLowering::DAGCombinerInfo::
@@ -442,9 +468,24 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Helper Functions
//===----------------------------------------------------------------------===//
-/// isNegatibleForFree - Return 1 if we can compute the negated form of the
-/// specified expression for the same cost as the expression itself, or 2 if we
-/// can compute the negated form more cheaply than the expression itself.
+void DAGCombiner::deleteAndRecombine(SDNode *N) {
+ removeFromWorklist(N);
+
+ // If the operands of this node are only used by the node, they will now be
+ // dead. Make sure to re-visit them and recursively delete dead nodes.
+ for (const SDValue &Op : N->ops())
+ // For an operand generating multiple values, one of the values may
+ // become dead allowing further simplification (e.g. split index
+ // arithmetic from an indexed load).
+ if (Op->hasOneUse() || Op->getNumValues() > 1)
+ AddToWorklist(Op.getNode());
+
+ DAG.DeleteNode(N);
+}
+
+/// Return 1 if we can compute the negated form of the specified expression for
+/// the same cost as the expression itself, or 2 if we can compute the negated
+/// form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
@@ -507,10 +548,10 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
}
}
-/// GetNegatedExpression - If isNegatibleForFree returns true, this function
-/// returns the newly negated expression.
+/// If isNegatibleForFree returns true, return the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, unsigned Depth = 0) {
+ const TargetOptions &Options = DAG.getTarget().Options;
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
@@ -527,12 +568,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- assert(DAG.getTarget().Options.UnsafeFPMath);
+ assert(Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -544,7 +584,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(DAG.getTarget().Options.UnsafeFPMath);
+ assert(Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -557,12 +597,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
+ assert(!Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -587,7 +626,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
}
-// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// Return true if this node is a setcc, or is a select_cc
// that selects between the target values used for true and false, making it
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
// the appropriate nodes based on the type of node we are checking. This
@@ -606,15 +645,19 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
return false;
+ if (TLI.getBooleanContents(N.getValueType()) ==
+ TargetLowering::UndefinedBooleanContent)
+ return false;
+
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(4);
return true;
}
-// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
-// one use. If this is true, it allows the users to invert the operation for
-// free when it is profitable to do so.
+/// Return true if this is a SetCC-equivalent operation with only one use.
+/// If this is true, it allows the users to invert the operation for free when
+/// it is profitable to do so.
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
@@ -622,7 +665,7 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// Returns true if N is a BUILD_VECTOR node whose
/// elements are all the same constant or undefined.
static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
@@ -643,7 +686,7 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
return N.getNode();
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
- if(BV && BV->isConstant())
+ if (BV && BV->isConstant())
return BV;
return nullptr;
}
@@ -669,6 +712,23 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
return nullptr;
}
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// float.
+static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+
+ if (CN && UndefElements.none())
+ return CN;
+ }
+
+ return nullptr;
+}
+
SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
@@ -687,7 +747,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
if (!OpNode.getNode())
return SDValue();
- AddToWorkList(OpNode.getNode());
+ AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
@@ -708,7 +768,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
if (!OpNode.getNode())
return SDValue();
- AddToWorkList(OpNode.getNode());
+ AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
}
}
@@ -730,14 +790,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
if (To[i].getNode()) {
- AddToWorkList(To[i].getNode());
- AddUsersToWorkList(To[i].getNode());
+ AddToWorklist(To[i].getNode());
+ AddUsersToWorklist(To[i].getNode());
}
}
}
@@ -745,14 +805,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
- if (N->use_empty()) {
- // Nodes can be reintroduced into the worklist. Make sure we do not
- // process a node that has been replaced.
- removeFromWorkList(N);
-
- // Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
- }
+ if (N->use_empty())
+ deleteAndRecombine(N);
return SDValue(N, 0);
}
@@ -760,32 +814,22 @@ void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
- AddToWorkList(TLO.New.getNode());
- AddUsersToWorkList(TLO.New.getNode());
+ AddToWorklist(TLO.New.getNode());
+ AddUsersToWorklist(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
- if (TLO.Old.getNode()->use_empty()) {
- removeFromWorkList(TLO.Old.getNode());
-
- // If the operands of this node are only used by the node, they will now
- // be dead. Make sure to visit them first to delete dead nodes early.
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
- if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
- AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
-
- DAG.DeleteNode(TLO.Old.getNode());
- }
+ if (TLO.Old.getNode()->use_empty())
+ deleteAndRecombine(TLO.Old.getNode());
}
-/// SimplifyDemandedBits - Check the specified integer node value to see if
-/// it can be simplified or if things it uses can be simplified by bit
-/// propagation. If so, return true.
+/// Check the specified integer node value to see if it can be simplified or if
+/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownZero, KnownOne;
@@ -793,7 +837,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
return false;
// Revisit the node.
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
// Replace the old value with the new one.
++NodesCombined;
@@ -817,12 +861,11 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
dbgs() << "\nWith: ";
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
- removeFromWorkList(Load);
- DAG.DeleteNode(Load);
- AddToWorkList(Trunc.getNode());
+ deleteAndRecombine(Load);
+ AddToWorklist(Trunc.getNode());
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
@@ -872,7 +915,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
- AddToWorkList(NewOp.getNode());
+ AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
@@ -887,16 +930,16 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
- AddToWorkList(NewOp.getNode());
+ AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
}
-/// PromoteIntBinOp - Promote the specified integer binary operation if the
-/// target indicates it is beneficial. e.g. On x86, it's usually better to
-/// promote i16 operations to i32 since i16 instructions are longer.
+/// Promote the specified integer binary operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
@@ -934,9 +977,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
return SDValue();
}
- AddToWorkList(NN0.getNode());
+ AddToWorklist(NN0.getNode());
if (NN1.getNode())
- AddToWorkList(NN1.getNode());
+ AddToWorklist(NN1.getNode());
if (Replace0)
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
@@ -952,9 +995,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
return SDValue();
}
-/// PromoteIntShiftOp - Promote the specified integer shift operation if the
-/// target indicates it is beneficial. e.g. On x86, it's usually better to
-/// promote i16 operations to i32 since i16 instructions are longer.
+/// Promote the specified integer shift operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
@@ -986,7 +1029,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!N0.getNode())
return SDValue();
- AddToWorkList(N0.getNode());
+ AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
@@ -1066,17 +1109,45 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
dbgs() << "\nTo: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
- removeFromWorkList(N);
- DAG.DeleteNode(N);
- AddToWorkList(Result.getNode());
+ deleteAndRecombine(N);
+ AddToWorklist(Result.getNode());
return true;
}
return false;
}
+/// \brief Recursively delete a node which has no uses and any operands for
+/// which it is the only use.
+///
+/// Note that this both deletes the nodes and removes them from the worklist.
+/// It also adds any nodes who have had a user deleted to the worklist as they
+/// may now have only one use and subject to other combines.
+bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
+ if (!N->use_empty())
+ return false;
+
+ SmallSetVector<SDNode *, 16> Nodes;
+ Nodes.insert(N);
+ do {
+ N = Nodes.pop_back_val();
+ if (!N)
+ continue;
+
+ if (N->use_empty()) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Nodes.insert(N->getOperand(i).getNode());
+
+ removeFromWorklist(N);
+ DAG.DeleteNode(N);
+ } else {
+ AddToWorklist(N);
+ }
+ } while (!Nodes.empty());
+ return true;
+}
//===----------------------------------------------------------------------===//
// Main DAG Combiner implementation
@@ -1088,44 +1159,69 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
+ // Early exit if this basic block is in an optnone function.
+ AttributeSet FnAttrs =
+ DAG.getMachineFunction().getFunction()->getAttributes();
+ if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeNone))
+ return;
+
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
- AddToWorkList(I);
+ AddToWorklist(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
- // The root of the dag may dangle to deleted nodes until the dag combiner is
- // done. Set it to null to avoid confusion.
- DAG.setRoot(SDValue());
-
// while the worklist isn't empty, find a node and
// try and combine it.
- while (!WorkListContents.empty()) {
+ while (!WorklistMap.empty()) {
SDNode *N;
- // The WorkListOrder holds the SDNodes in order, but it may contain
- // duplicates.
- // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
- // worklist *should* contain, and check the node we want to visit is should
- // actually be visited.
+ // The Worklist holds the SDNodes in order, but it may contain null entries.
do {
- N = WorkListOrder.pop_back_val();
- } while (!WorkListContents.erase(N));
+ N = Worklist.pop_back_val();
+ } while (!N);
+
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
- if (N->use_empty() && N != &Dummy) {
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- AddToWorkList(N->getOperand(i).getNode());
-
- DAG.DeleteNode(N);
+ if (recursivelyDeleteUnusedNodes(N))
continue;
+
+ WorklistRemover DeadNodes(*this);
+
+ // If this combine is running after legalizing the DAG, re-legalize any
+ // nodes pulled off the worklist.
+ if (Level == AfterLegalizeDAG) {
+ SmallSetVector<SDNode *, 16> UpdatedNodes;
+ bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
+
+ for (SDNode *LN : UpdatedNodes) {
+ AddToWorklist(LN);
+ AddUsersToWorklist(LN);
+ }
+ if (!NIsValid)
+ continue;
}
+ DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+
+ // Add any operands of the new node which have not yet been combined to the
+ // worklist as well. Because the worklist uniques things already, this
+ // won't repeatedly process the same operand.
+ CombinedNodes.insert(N);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (!CombinedNodes.count(N->getOperand(i).getNode()))
+ AddToWorklist(N->getOperand(i).getNode());
+
SDValue RV = combine(N);
if (!RV.getNode())
@@ -1144,15 +1240,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << "\nReplacing.3 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- RV.getNode()->dump(&DAG);
- dbgs() << '\n');
+ DEBUG(dbgs() << " ... into: ";
+ RV.getNode()->dump(&DAG));
// Transfer debug value.
DAG.TransferDbgValues(SDValue(N, 0), RV);
- WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
@@ -1163,26 +1255,14 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
}
// Push the new node and any users onto the worklist
- AddToWorkList(RV.getNode());
- AddUsersToWorkList(RV.getNode());
-
- // Add any uses of the old node to the worklist in case this node is the
- // last one that uses them. They may become dead after this node is
- // deleted.
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- AddToWorkList(N->getOperand(i).getNode());
+ AddToWorklist(RV.getNode());
+ AddUsersToWorklist(RV.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
- // something else needing this node.
- if (N->use_empty()) {
- // Nodes can be reintroduced into the worklist. Make sure we do not
- // process a node that has been replaced.
- removeFromWorkList(N);
-
- // Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
- }
+ // something else needing this node. This will also take care of adding any
+ // operands which have lost a user to the worklist.
+ recursivelyDeleteUnusedNodes(N);
}
// If the root changed (e.g. it was a dead load, update the root).
@@ -1244,6 +1324,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
+ case ISD::FSQRT: return visitFSQRT(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
@@ -1255,6 +1336,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FMINNUM: return visitFMINNUM(N);
+ case ISD::FMAXNUM: return visitFMAXNUM(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
@@ -1347,8 +1430,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
return RV;
}
-/// getInputChainForNode - Given a node, return its input chain if it has one,
-/// otherwise return a null sd operand.
+/// Given a node, return its input chain if it has one, otherwise return a null
+/// sd operand.
static SDValue getInputChainForNode(SDNode *N) {
if (unsigned NumOps = N->getNumOperands()) {
if (N->getOperand(0).getValueType() == MVT::Other)
@@ -1402,7 +1485,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// Queue up for processing.
TFs.push_back(Op.getNode());
// Clean up in case the token factor is removed.
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
Changed = true;
break;
}
@@ -1410,7 +1493,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
default:
// Only add if it isn't already in the list.
- if (SeenOps.insert(Op.getNode()))
+ if (SeenOps.insert(Op.getNode()).second)
Ops.push_back(Op);
else
Changed = true;
@@ -1440,44 +1523,21 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
// First add the users of this node to the work list so that they
// can be tried again once they have new operands.
- AddUsersToWorkList(N);
+ AddUsersToWorklist(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
} while (!N->use_empty());
- removeFromWorkList(N);
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static
-SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1,
- SelectionDAG &DAG) {
- EVT VT = N0.getValueType();
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
- ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
-
- if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
- isa<ConstantSDNode>(N00.getOperand(1))) {
- // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
- N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
- DAG.getNode(ISD::SHL, SDLoc(N00), VT,
- N00.getOperand(0), N01),
- DAG.getNode(ISD::SHL, SDLoc(N01), VT,
- N00.getOperand(1), N01));
- return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
- }
-
- return SDValue();
-}
-
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1594,16 +1654,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
}
- // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
- if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
- SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
- if (Result.getNode()) return Result;
- }
- if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
- SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
- if (Result.getNode()) return Result;
- }
-
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL &&
N1.getOperand(0).getOpcode() == ISD::SUB)
@@ -1647,6 +1697,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
+ // add X, (sextinreg Y i1) -> sub X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDLoc DL(N);
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
+ }
+ }
+
return SDValue();
}
@@ -1812,6 +1873,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
VT);
}
+ // sub X, (sextinreg Y i1) -> add X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDLoc DL(N);
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
+ }
+ }
+
return SDValue();
}
@@ -1933,7 +2005,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1)))) {
SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
N1, N0.getOperand(1));
- AddToWorkList(C3.getNode());
+ AddToWorklist(C3.getNode());
return DAG.getNode(ISD::MUL, SDLoc(N), VT,
N0.getOperand(0), C3);
}
@@ -2016,9 +2088,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
(-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
- if (TLI.isPow2DivCheap())
+ if (TLI.isPow2SDivCheap())
return SDValue();
+ // Target-specific implementation of sdiv x, pow2.
+ SDValue Res = BuildSDIVPow2(N);
+ if (Res.getNode())
+ return Res;
+
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
@@ -2026,7 +2103,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
DAG.getConstant(VT.getScalarSizeInBits() - 1,
getShiftAmountTy(N0.getValueType())));
- AddToWorkList(SGN.getNode());
+ AddToWorklist(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL =
@@ -2034,8 +2111,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
DAG.getConstant(VT.getScalarSizeInBits() - lg2,
getShiftAmountTy(SGN.getValueType())));
SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
- AddToWorkList(SRL.getNode());
- AddToWorkList(ADD.getNode()); // Divide by pow2
+ AddToWorklist(SRL.getNode());
+ AddToWorklist(ADD.getNode()); // Divide by pow2
SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
@@ -2044,7 +2121,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (N1C->getAPIntValue().isNonNegative())
return SRA;
- AddToWorkList(SRA.getNode());
+ AddToWorklist(SRA.getNode());
return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
}
@@ -2096,7 +2173,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
DAG.getConstant(SHC->getAPIntValue()
.logBase2(),
ADDVT));
- AddToWorkList(Add.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
}
}
@@ -2138,13 +2215,13 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
- AddToWorkList(Div.getNode());
+ AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorkList(Mul.getNode());
+ AddToWorklist(Mul.getNode());
return Sub;
}
}
@@ -2181,7 +2258,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
VT));
- AddToWorkList(Add.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
}
}
@@ -2191,13 +2268,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
- AddToWorkList(Div.getNode());
+ AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorkList(Mul.getNode());
+ AddToWorklist(Mul.getNode());
return Sub;
}
}
@@ -2286,10 +2363,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return SDValue();
}
-/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
-/// compute two values. LoOp and HiOp give the opcodes for the two computations
-/// that are being performed. Return true if a simplification was made.
-///
+/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
+/// give the opcodes for the two computations that are being performed. Return
+/// true if a simplification was made.
SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp) {
// If the high half is not needed, just compute the low half.
@@ -2297,8 +2373,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
if (!HiExists &&
(!LegalOperations ||
TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
- SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
+ SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
return CombineTo(N, Res, Res);
}
@@ -2307,8 +2382,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
if (!LoExists &&
(!LegalOperations ||
TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
- SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
+ SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
return CombineTo(N, Res, Res);
}
@@ -2318,9 +2392,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
// If the two computed results can be simplified separately, separate them.
if (LoExists) {
- SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
- AddToWorkList(Lo.getNode());
+ SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ AddToWorklist(Lo.getNode());
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
(!LegalOperations ||
@@ -2329,9 +2402,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
}
if (HiExists) {
- SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
- ArrayRef<SDUse>(N->op_begin(), N->op_end()));
- AddToWorkList(Hi.getNode());
+ SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ AddToWorklist(Hi.getNode());
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
(!LegalOperations ||
@@ -2436,8 +2508,8 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
return SDValue();
}
-/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
-/// two operands of the same opcode, try to simplify it.
+/// If this is a binary operator with two operands of the same opcode, try to
+/// simplify it.
SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
EVT VT = N0.getValueType();
@@ -2470,7 +2542,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
}
@@ -2484,7 +2556,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
N0.getOperand(0).getValueType(),
N0.getOperand(0), N1.getOperand(0));
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
ORNode, N0.getOperand(1));
}
@@ -2509,7 +2581,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
return BC;
}
}
@@ -2556,7 +2628,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(0), N1->getOperand(0));
- AddToWorkList(NewNode.getNode());
+ AddToWorklist(NewNode.getNode());
return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
&SVN0->getMask()[0]);
}
@@ -2577,7 +2649,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(1), N1->getOperand(1));
- AddToWorkList(NewNode.getNode());
+ AddToWorklist(NewNode.getNode());
return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
&SVN0->getMask()[0]);
}
@@ -2603,9 +2675,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and x, 0) -> 0, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0, because undef node may exist in N0
+ return DAG.getConstant(
+ APInt::getNullValue(
+ N0.getValueType().getScalarType().getSizeInBits()),
+ N0.getValueType());
if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(
+ APInt::getNullValue(
+ N1.getValueType().getScalarType().getSizeInBits()),
+ N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -2768,21 +2848,21 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
LR.getValueType(), LL, RL);
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
// fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
LR.getValueType(), LL, RL);
- AddToWorkList(ANDNode.getNode());
+ AddToWorklist(ANDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
}
// fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
LR.getValueType(), LL, RL);
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
}
@@ -2795,7 +2875,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
cast<ConstantSDNode>(RR)->isNullValue()))) {
SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
LL, DAG.getConstant(1, LL.getValueType()));
- AddToWorkList(ADDNode.getNode());
+ AddToWorklist(ADDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ADDNode,
DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
}
@@ -2843,7 +2923,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
- AddToWorkList(N);
+ AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2863,7 +2943,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
- AddToWorkList(N);
+ AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2894,7 +2974,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
- AddToWorkList(N);
+ AddToWorklist(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2921,7 +3001,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
Alignment = MinAlign(Alignment, PtrOff);
}
- AddToWorkList(NewPtr.getNode());
+ AddToWorklist(NewPtr.getNode());
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
@@ -2929,8 +3009,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
LN0->getChain(), NewPtr,
LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Alignment, LN0->getTBAAInfo());
- AddToWorkList(N);
+ LN0->isInvariant(), Alignment, LN0->getAAInfo());
+ AddToWorklist(N);
CombineTo(LN0, Load, Load.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -2976,8 +3056,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue();
}
-/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
-///
+/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits) {
if (!LegalOperations)
@@ -3082,10 +3161,13 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
return Res;
}
-/// isBSwapHWordElement - Return true if the specified node is an element
-/// that makes up a 32-bit packed halfword byteswap. i.e.
-/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
-static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
+/// Return true if the specified node is an element that makes up a 32-bit
+/// packed halfword byteswap.
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
if (!N.getNode()->hasOneUse())
return false;
@@ -3152,8 +3234,11 @@ static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
return true;
}
-/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
-/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// Match a 32-bit packed halfword bswap. That is
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
/// => (rotl (bswap x), 16)
SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!LegalOperations)
@@ -3165,7 +3250,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
- SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr);
// Look for either
// (or (or (and), (and)), (or (and), (and)))
// (or (or (or (and), (and)), (and)), (and))
@@ -3173,6 +3257,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
return SDValue();
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
+ SDNode *Parts[4] = {};
if (N1.getOpcode() == ISD::OR &&
N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
@@ -3246,15 +3331,25 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
- return N0;
+ // do not return N0, because undef node may exist in N0
+ return DAG.getConstant(
+ APInt::getAllOnesValue(
+ N0.getValueType().getScalarType().getSizeInBits()),
+ N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
- return N1;
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(
+ APInt::getAllOnesValue(
+ N1.getValueType().getScalarType().getSizeInBits()),
+ N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
// Do this only if the resulting shuffle is legal.
if (isa<ShuffleVectorSDNode>(N0) &&
isa<ShuffleVectorSDNode>(N1) &&
+ // Avoid folding a node with illegal type.
+ TLI.isTypeLegal(VT) &&
N0->getOperand(1) == N1->getOperand(1) &&
ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
bool CanFold = true;
@@ -3366,7 +3461,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
(Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
LR.getValueType(), LL, RL);
- AddToWorkList(ORNode.getNode());
+ AddToWorklist(ORNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
}
// fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
@@ -3375,7 +3470,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
(Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
LR.getValueType(), LL, RL);
- AddToWorkList(ANDNode.getNode());
+ AddToWorklist(ANDNode.getNode());
return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
}
}
@@ -3438,7 +3533,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return SDValue();
}
-/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
if (isa<ConstantSDNode>(Op.getOperand(1))) {
@@ -3735,7 +3830,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return RXOR;
// fold !(x cc y) -> (x !cc y)
- if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
isInt);
@@ -3761,7 +3856,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue V = N0.getOperand(0);
V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
DAG.getConstant(1, V.getValueType()));
- AddToWorkList(V.getNode());
+ AddToWorklist(V.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
}
@@ -3773,7 +3868,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
}
}
@@ -3785,7 +3880,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+ AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
}
}
@@ -3794,7 +3889,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N0->getOperand(1) == N1) {
SDValue X = N0->getOperand(0);
SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
- AddToWorkList(NotX.getNode());
+ AddToWorklist(NotX.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
}
// fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
@@ -3828,8 +3923,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return SDValue();
}
-/// visitShiftByConstant - Handle transforms common to the three shifts, when
-/// the shift amount is a constant.
+/// Handle transforms common to the three shifts, when the shift amount is a
+/// constant.
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// We can't and shouldn't fold opaque constants.
if (Amt->isOpaque())
@@ -4059,7 +4154,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT CountVT = NewOp0.getOperand(1).getValueType();
SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
NewOp0, DAG.getConstant(c2, CountVT));
- AddToWorkList(NewSHL.getNode());
+ AddToWorklist(NewSHL.getNode());
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
}
@@ -4101,6 +4196,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
HiBitsMask);
}
+ // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Variant of version done on multiply, except mul by a power of 2 is turned
+ // into a shift.
+ APInt Val;
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ (isa<ConstantSDNode>(N0.getOperand(1)) ||
+ isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+ SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+ SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+ }
+
if (N1C) {
SDValue NewSHL = visitShiftByConstant(N, N1C);
if (NewSHL.getNode())
@@ -4345,7 +4452,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
N0.getOperand(0),
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
- AddToWorkList(SmallShift.getNode());
+ AddToWorklist(SmallShift.getNode());
APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
@@ -4387,7 +4494,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (ShAmt) {
Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
}
return DAG.getNode(ISD::XOR, SDLoc(N), VT,
@@ -4439,12 +4546,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N->hasOneUse()) {
SDNode *Use = *N->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
- AddToWorkList(Use);
+ AddToWorklist(Use);
else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
// Also look pass the truncate.
Use = *Use->use_begin();
if (Use->getOpcode() == ISD::BRCOND)
- AddToWorkList(Use);
+ AddToWorklist(Use);
}
}
@@ -4545,7 +4652,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
N0, DAG.getConstant(1, VT0));
XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
N0, DAG.getConstant(1, VT0));
- AddToWorkList(XORNode.getNode());
+ AddToWorklist(XORNode.getNode());
if (VT.bitsGT(VT0))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
@@ -4553,13 +4660,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold (select C, 0, X) -> (and (not C), X)
if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
- AddToWorkList(NOTNode.getNode());
+ AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
}
// fold (select C, X, 1) -> (or (not C), X)
if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
- AddToWorkList(NOTNode.getNode());
+ AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
}
// fold (select C, X, 0) -> (and C, X)
@@ -4582,7 +4689,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N0.getOpcode() == ISD::SETCC) {
if ((!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
- TLI.isOperationLegal(ISD::SELECT_CC, VT))
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
@@ -4616,12 +4723,17 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
- MVT VT = N->getSimpleValueType(0);
+ EVT VT = N->getValueType(0);
int NumElems = VT.getVectorNumElements();
assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
RHS.getOpcode() == ISD::CONCAT_VECTORS &&
Cond.getOpcode() == ISD::BUILD_VECTOR);
+ // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
+ // binary ones here.
+ if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
+ return SDValue();
+
// We're sure we have an even number of elements due to the
// concat_vectors we have as arguments to vselect.
// Skip BV elements until we find one that's not an UNDEF
@@ -4690,8 +4802,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
ISD::SRA, DL, VT, LHS,
DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
}
@@ -4718,8 +4830,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// Add the new VSELECT nodes to the work list in case they need to be split
// again.
- AddToWorkList(Lo.getNode());
- AddToWorkList(Hi.getNode());
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
@@ -4761,7 +4873,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
N0, N1, CC, SDLoc(N), false);
if (SCC.getNode()) {
- AddToWorkList(SCC.getNode());
+ AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
if (!SCCC->isNullValue())
@@ -4958,7 +5070,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5134,14 +5246,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- SDValue SetCC = DAG.getSetCC(DL,
- SetCCVT,
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
N0.getOperand(0), N0.getOperand(1), CC);
- EVT SelectVT = getSetCCResultType(VT);
- return DAG.getSelect(DL, VT,
- DAG.getSExtOrTrunc(SetCC, DL, SelectVT),
+ return DAG.getSelect(DL, VT, SetCC,
NegOne, DAG.getConstant(0, VT));
-
}
}
}
@@ -5239,7 +5347,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5257,7 +5365,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5265,10 +5373,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorkList(Op.getNode());
+ AddToWorklist(Op.getNode());
}
return DAG.getZeroExtendInReg(Op, SDLoc(N),
N0.getValueType().getScalarType());
@@ -5487,7 +5595,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorkList(oye);
+ AddToWorklist(oye);
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -5528,8 +5636,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
@@ -5614,9 +5721,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
-/// GetDemandedBits - See if the specified operand can be simplified with the
-/// knowledge that only the bits specified by Mask are used. If so, return the
-/// simpler operand, otherwise return a null SDValue.
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by Mask are used. If so, return the simpler operand,
+/// otherwise return a null SDValue.
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
@@ -5657,11 +5764,11 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
return SDValue();
}
-/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
-/// bits and then truncated to a narrower type and where N is a multiple
-/// of number of bits of the narrower type, transform it to a narrower load
-/// from address + N / num of bits of new type. If the result is to be
-/// extended, also fold the extension to form a extending load.
+/// If the result of a wider load is shifted to right of N bits and then
+/// truncated to a narrower type and where N is a multiple of number of bits of
+/// the narrower type, transform it to a narrower load from address + N / num of
+/// bits of new type. If the result is to be extended, also fold the extension
+/// to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
@@ -5786,22 +5893,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
PtrType, LN0->getBasePtr(),
DAG.getConstant(PtrOff, PtrType));
- AddToWorkList(NewPtr.getNode());
+ AddToWorklist(NewPtr.getNode());
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
+ LN0->isInvariant(), NewAlign, LN0->getAAInfo());
else
Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign, LN0->getTBAAInfo());
+ LN0->isInvariant(), NewAlign, LN0->getAAInfo());
// Replace the old load's chain with the new load's chain.
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
// Shift the result left, if we've swallowed a left shift.
@@ -5900,7 +6007,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- AddToWorkList(ExtLoad.getNode());
+ AddToWorklist(ExtLoad.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
@@ -6022,6 +6129,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+ if (N0.getOpcode() == ISD::SELECT) {
+ EVT SrcVT = N0.getValueType();
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+ TLI.isTruncateFree(SrcVT, VT)) {
+ SDLoc SL(N0);
+ SDValue Cond = N0.getOperand(0);
+ SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+ }
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -6121,7 +6241,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
continue;
}
SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
- AddToWorkList(NV.getNode());
+ AddToWorklist(NV.getNode());
Opnds.push_back(NV);
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
@@ -6143,7 +6263,7 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
return Elt.getOperand(Elt.getResNo()).getNode();
}
-/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// build_pair (load, load) -> load
/// if load locations are consecutive.
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
@@ -6209,7 +6329,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// Ideally this won't happen very often, because instcombine
// and the earlier dagcombine runs (where illegal nodes are
// permitted) should have folded most of them already.
- DAG.DeleteNode(Res.getNode());
+ deleteAndRecombine(Res.getNode());
}
}
@@ -6238,12 +6358,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->isInvariant(), OrigAlign,
- LN0->getTBAAInfo());
- AddToWorkList(N);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::BITCAST, SDLoc(N0),
- N0.getValueType(), Load),
- Load.getValue(1));
+ LN0->getAAInfo());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
}
}
@@ -6257,7 +6373,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
!VT.isVector() && !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
N0.getOperand(0));
- AddToWorkList(NewConv.getNode());
+ AddToWorklist(NewConv.getNode());
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
@@ -6280,34 +6396,34 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (isTypeLegal(IntXVT)) {
SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
IntXVT, N0.getOperand(1));
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
// If X has a different width than the result/lhs, sext it or truncate it.
unsigned VTWidth = VT.getSizeInBits();
if (OrigXWidth < VTWidth) {
X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
} else if (OrigXWidth > VTWidth) {
// To get the sign bit in the right place, we have to shift it right
// before truncating.
X = DAG.getNode(ISD::SRL, SDLoc(X),
X.getValueType(), X,
DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
}
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, VT));
- AddToWorkList(X.getNode());
+ AddToWorklist(X.getNode());
SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
Cst, DAG.getConstant(~SignBit, VT));
- AddToWorkList(Cst.getNode());
+ AddToWorklist(Cst.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
}
@@ -6328,9 +6444,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
return CombineConsecutiveLoads(N, VT);
}
-/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
-/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
-/// destination element value type.
+/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
+/// operands. DstEltVT indicates the destination element value type.
SDValue DAGCombiner::
ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
@@ -6363,7 +6478,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
DstEltVT, Op));
- AddToWorkList(Ops.back().getNode());
+ AddToWorklist(Ops.back().getNode());
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
@@ -6466,6 +6581,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
@@ -6476,193 +6592,143 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
+
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
- // fold (fadd A, 0) -> A
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
- N1CFP->getValueAPF().isZero())
- return N0;
+
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
+
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
- // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
- N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(1)))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, SDLoc(N), VT,
- N0.getOperand(1), N1));
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has a hard time dealing with FP constants.
+ bool AllowNewConst = (Level < AfterLegalizeDAG);
- // No FP constant should be created after legalization as Instruction
- // Selection pass has hard time in dealing with FP constant.
- //
- // We don't need test this condition for transformation like following, as
- // the DAG being transformed implies it is legal to take FP constant as
- // operand.
- //
- // (fadd (fmul c, x), x) -> (fmul c+1, x)
- //
- bool AllowNewFpConst = (Level < AfterLegalizeDAG);
-
- // If allow, fold (fadd (fneg x), x) -> 0.0
- if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
- N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
- return DAG.getConstantFP(0.0, VT);
-
- // If allow, fold (fadd x, (fneg x)) -> 0.0
- if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
- N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
- return DAG.getConstantFP(0.0, VT);
-
- // In unsafe math mode, we can fold chains of FADD's of the same value
- // into multiplications. This transform is not safe in general because
- // we are reducing the number of rounding steps.
- if (DAG.getTarget().Options.UnsafeFPMath &&
- TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
- !N0CFP && !N1CFP) {
- if (N0.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-
- // (fadd (fmul c, x), x) -> (fmul x, c+1)
- if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP00, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, NewCFP);
- }
+ // fold (fadd A, 0) -> A
+ if (N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
- // (fadd (fmul x, c), x) -> (fmul x, c+1)
- if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP01, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, NewCFP);
- }
+ // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ N0.getOperand(1), N1));
+
+ // If allowed, fold (fadd (fneg x), x) -> 0.0
+ if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+ return DAG.getConstantFP(0.0, VT);
+
+ // If allowed, fold (fadd x, (fneg x)) -> 0.0
+ if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+ return DAG.getConstantFP(0.0, VT);
+
+ // We can fold chains of FADD's of the same value into multiplications.
+ // This transform is not safe in general because we are reducing the number
+ // of rounding steps.
+ if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+
+ // (fadd (fmul x, c), x) -> (fmul x, c+1)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+ }
- // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
- if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
- N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(1) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP00, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(1), NewCFP);
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ N0.getOperand(0), NewCFP);
+ }
}
- // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
- if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
- N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP01, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0), NewCFP);
- }
- }
+ if (N1.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
- if (N1.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+ // (fadd x, (fmul x, c)) -> (fmul x, c+1)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+ }
- // (fadd x, (fmul c, x)) -> (fmul x, c+1)
- if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP10, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, NewCFP);
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
+ if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N0.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+ }
}
- // (fadd x, (fmul x, c)) -> (fmul x, c+1)
- if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP11, 0),
- DAG.getConstantFP(1.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, NewCFP);
+ if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul x, 3.0)
+ if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ N1, DAG.getConstantFP(3.0, VT));
}
-
- // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
- if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
- N0.getOperand(0) == N0.getOperand(1) &&
- N1.getOperand(1) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP10, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1.getOperand(1), NewCFP);
+ if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0)
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+ N0, DAG.getConstantFP(3.0, VT));
}
- // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
- if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
+ if (AllowNewConst &&
+ N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
- N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
- SDValue(CFP11, 0),
- DAG.getConstantFP(2.0, VT));
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1.getOperand(0), NewCFP);
- }
- }
-
- if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- // (fadd (fadd x, x), x) -> (fmul x, 3.0)
- if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
- (N0.getOperand(0) == N1))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N1, DAG.getConstantFP(3.0, VT));
- }
-
- if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
- if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
- N1.getOperand(0) == N0)
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0, DAG.getConstantFP(3.0, VT));
+ N0.getOperand(0), DAG.getConstantFP(4.0, VT));
}
-
- // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
- if (AllowNewFpConst &&
- N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
- N0.getOperand(0) == N0.getOperand(1) &&
- N1.getOperand(0) == N1.getOperand(1) &&
- N0.getOperand(0) == N1.getOperand(0))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0),
- DAG.getConstantFP(4.0, VT));
- }
+ } // enable-unsafe-fp-math
// FADD -> FMA combines:
- if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
- DAG.getTarget().Options.UnsafeFPMath) &&
- DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+ if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1), N1);
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
+ if (N1.getOpcode() == ISD::FMUL &&
+ (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, SDLoc(N), VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
@@ -6673,10 +6739,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
@@ -6687,60 +6754,60 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
- // fold (fsub A, 0) -> A
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N1CFP && N1CFP->getValueAPF().isZero())
- return N0;
- // fold (fsub 0, B) -> -B
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N0CFP && N0CFP->getValueAPF().isZero()) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return GetNegatedExpression(N1, DAG, LegalOperations);
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, dl, VT, N1);
- }
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
- // If 'unsafe math' is enabled, fold
- // (fsub x, x) -> 0.0 &
- // (fsub x, (fadd x, y)) -> (fneg y) &
- // (fsub x, (fadd y, x)) -> (fneg y)
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->getValueAPF().isZero())
+ return N0;
+
+ // (fsub 0, B) -> -B
+ if (N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
+ }
+
+ // (fsub x, x) -> 0.0
if (N0 == N1)
return DAG.getConstantFP(0.0f, VT);
+ // (fsub x, (fadd x, y)) -> (fneg y)
+ // (fsub x, (fadd y, x)) -> (fneg y)
if (N1.getOpcode() == ISD::FADD) {
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
- if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
- &DAG.getTarget().Options))
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
return GetNegatedExpression(N11, DAG, LegalOperations);
- if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
- &DAG.getTarget().Options))
+ if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
return GetNegatedExpression(N10, DAG, LegalOperations);
}
}
// FSUB -> FMA combines:
- if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
- DAG.getTarget().Options.UnsafeFPMath) &&
- DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+ if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, dl, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, dl, VT, N1));
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
+ if (N1.getOpcode() == ISD::FMUL &&
+ (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
return DAG.getNode(ISD::FMA, dl, VT,
DAG.getNode(ISD::FNEG, dl, VT,
N1.getOperand(0)),
@@ -6749,7 +6816,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG &&
N0.getOperand(0).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
+ TLI.enableAggressiveFMAFusion(VT))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(ISD::FMA, dl, VT,
@@ -6764,47 +6832,82 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
EVT VT = N->getValueType(0);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
+ // This just handles C1 * C2 for vectors. Other vector folds are below.
SDValue FoldedVOp = SimplifyVBinOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
+ if (FoldedVOp.getNode())
+ return FoldedVOp;
+ // Canonicalize vector constant to RHS.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR &&
+ N1.getOpcode() != ISD::BUILD_VECTOR)
+ if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
+ if (BV0->isConstant())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
}
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
+
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
- // fold (fmul A, 0) -> 0
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N1CFP && N1CFP->getValueAPF().isZero())
- return N1;
- // fold (fmul A, 0) -> 0, vector edition.
- if (DAG.getTarget().Options.UnsafeFPMath &&
- ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
+
// fold (fmul A, 1.0) -> A
if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;
+
+ if (Options.UnsafeFPMath) {
+ // fold (fmul A, 0) -> 0
+ if (N1CFP && N1CFP->getValueAPF().isZero())
+ return N1;
+
+ // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (N0.getOpcode() == ISD::FMUL) {
+ // Fold scalars or any vector constants (not just splats).
+ // This fold is done in general by InstCombine, but extra fmul insts
+ // may have been generated during lowering.
+ SDValue N01 = N0.getOperand(1);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
+ if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+ (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+ SDLoc SL(N);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+ }
+ }
+
+ // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
+ // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
+ // during an early run of DAGCombiner can prevent folding with fmuls
+ // inserted during lowering.
+ if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+ SDLoc SL(N);
+ const SDValue Two = DAG.getConstantFP(2.0, VT);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+ }
+ }
+
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
+
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -6814,14 +6917,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
- // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
- if (DAG.getTarget().Options.UnsafeFPMath &&
- N1CFP && N0.getOpcode() == ISD::FMUL &&
- N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(1), N1));
-
return SDValue();
}
@@ -6833,8 +6928,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ // Constant fold FMA.
+ if (isa<ConstantFPSDNode>(N0) &&
+ isa<ConstantFPSDNode>(N1) &&
+ isa<ConstantFPSDNode>(N2)) {
+ return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+ }
+
+ if (Options.UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
@@ -6850,7 +6953,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FMUL &&
N0 == N2.getOperand(0) &&
N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
@@ -6860,7 +6963,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (DAG.getTarget().Options.UnsafeFPMath &&
+ if (Options.UnsafeFPMath &&
N0.getOpcode() == ISD::FMUL && N1CFP &&
N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
return DAG.getNode(ISD::FMA, dl, VT,
@@ -6878,19 +6981,19 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
- AddToWorkList(RHSNeg.getNode());
+ AddToWorklist(RHSNeg.getNode());
return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
}
}
// (fma x, c, x) -> (fmul x, (c+1))
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
+ if (Options.UnsafeFPMath && N1CFP && N0 == N2)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
N1, DAG.getConstantFP(1.0, VT)));
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ if (Options.UnsafeFPMath && N1CFP &&
N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
return DAG.getNode(ISD::FMUL, dl, VT, N0,
DAG.getNode(ISD::FADD, dl, VT,
@@ -6906,7 +7009,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
// fold vector ops
if (VT.isVector()) {
@@ -6918,30 +7022,79 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
- // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
- if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
- // Compute the reciprocal 1.0 / c2.
- APFloat N1APF = N1CFP->getValueAPF();
- APFloat Recip(N1APF.getSemantics(), 1); // 1.0
- APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
- // Only do the transform if the reciprocal is a legal fp immediate that
- // isn't too nasty (eg NaN, denormal, ...).
- if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
- (!LegalOperations ||
- // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
- // backend)... we should handle this gracefully after Legalize.
- // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
- TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(Recip, VT)))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
- DAG.getConstantFP(Recip, VT));
+ if (Options.UnsafeFPMath) {
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
+
+ // If this FDIV is part of a reciprocal square root, it may be folded
+ // into a target-specific square root estimate instruction.
+ if (N1.getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FP_ROUND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ } else if (N1.getOpcode() == ISD::FMUL) {
+ // Look through an FMUL. Even though this won't remove the FDIV directly,
+ // it's still worthwhile to get rid of the FSQRT if possible.
+ SDValue SqrtOp;
+ SDValue OtherOp;
+ if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(0);
+ OtherOp = N1.getOperand(1);
+ } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(1);
+ OtherOp = N1.getOperand(0);
+ }
+ if (SqrtOp.getNode()) {
+ // We found a FSQRT, so try to make this fold:
+ // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
+ if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
+ }
+ }
+
+ // Fold into a reciprocal estimate and multiply instead of a real divide.
+ if (SDValue RV = BuildReciprocalEstimate(N1)) {
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+ }
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
- &DAG.getTarget().Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -6968,6 +7121,31 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFSQRT(SDNode *N) {
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
+ if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
+ EVT VT = RV.getValueType();
+ RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+ AddToWorklist(RV.getNode());
+
+ // Unfortunately, RV is now NaN if the input was exactly 0.
+ // Select out this case and force the answer to 0.
+ SDValue Zero = DAG.getConstantFP(0.0, VT);
+ SDValue ZeroCmp =
+ DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+ AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
+ SDLoc(N), VT, ZeroCmp, Zero, RV);
+ return RV;
+ }
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -7162,7 +7340,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
- AddToWorkList(Tmp.getNode());
+ AddToWorklist(Tmp.getNode());
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
Tmp, N0.getOperand(1));
}
@@ -7213,8 +7391,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+ TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -7231,6 +7408,43 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP)
+ return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+// FIXME: FNEG and FABS have a lot in common; refactor.
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7240,24 +7454,36 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (FoldedVOp.getNode()) return FoldedVOp;
}
+ // Constant fold FNEG.
+ if (isa<ConstantFPSDNode>(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
- // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+ // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
- if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
- !VT.isVector() &&
- N0.getNode()->hasOneUse() &&
- N0.getOperand(0).getValueType().isInteger()) {
+ if (!TLI.isFNegFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST &&
+ N0.getNode()->hasOneUse()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x80... per scalar element
+ // and splat it.
+ SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x80...
+ SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+ }
Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
- DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
- AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N),
- VT, Int);
+ DAG.getConstant(SignMask, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
}
}
@@ -7279,45 +7505,50 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- EVT VT = N->getValueType(0);
-
- // fold (fceil c1) -> fceil(c1)
- if (N0CFP)
- return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
-
- return SDValue();
-}
+ SDValue N1 = N->getOperand(1);
+ const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- EVT VT = N->getValueType(0);
+ if (N0CFP && N1CFP) {
+ const APFloat &C0 = N0CFP->getValueAPF();
+ const APFloat &C1 = N1CFP->getValueAPF();
+ return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0));
+ }
- // fold (ftrunc c1) -> ftrunc(c1)
- if (N0CFP)
- return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+ if (N0CFP) {
+ EVT VT = N->getValueType(0);
+ // Canonicalize to constant on RHS.
+ return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
+ }
return SDValue();
}
-SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- EVT VT = N->getValueType(0);
+ SDValue N1 = N->getOperand(1);
+ const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
- // fold (ffloor c1) -> ffloor(c1)
- if (N0CFP)
- return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+ if (N0CFP && N1CFP) {
+ const APFloat &C0 = N0CFP->getValueAPF();
+ const APFloat &C1 = N1CFP->getValueAPF();
+ return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0));
+ }
+
+ if (N0CFP) {
+ EVT VT = N->getValueType(0);
+ // Canonicalize to constant on RHS.
+ return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
+ }
return SDValue();
}
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
if (VT.isVector()) {
@@ -7326,30 +7557,40 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
}
// fold (fabs c1) -> fabs(c1)
- if (N0CFP)
+ if (isa<ConstantFPSDNode>(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
// fold (fabs (fabs x)) -> (fabs x)
if (N0.getOpcode() == ISD::FABS)
return N->getOperand(0);
+
// fold (fabs (fneg x)) -> (fabs x)
// fold (fabs (fcopysign x, y)) -> (fabs x)
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
- // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+ // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
// constant pool values.
if (!TLI.isFAbsFree(VT) &&
- N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
- N0.getOperand(0).getValueType().isInteger() &&
- !N0.getOperand(0).getValueType().isVector()) {
+ N0.getOpcode() == ISD::BITCAST &&
+ N0.getNode()->hasOneUse()) {
SDValue Int = N0.getOperand(0);
EVT IntVT = Int.getValueType();
if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x7f... per scalar element
+ // and splat it.
+ SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x7f...
+ SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+ }
Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
- DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
- AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N),
- N->getValueType(0), Int);
+ DAG.getConstant(SignMask, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
}
}
@@ -7429,15 +7670,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// will convert it back to (X & C1) >> C2.
CombineTo(N, NewBRCond, false);
// Truncate is dead.
- if (Trunc) {
- removeFromWorkList(Trunc);
- DAG.DeleteNode(Trunc);
- }
+ if (Trunc)
+ deleteAndRecombine(Trunc);
// Replace the uses of SRL with SETCC
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- removeFromWorkList(N1.getNode());
- DAG.DeleteNode(N1.getNode());
+ deleteAndRecombine(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -7464,10 +7702,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
dbgs() << "\nWith: ";
Tmp.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
- removeFromWorkList(TheXor);
- DAG.DeleteNode(TheXor);
+ deleteAndRecombine(TheXor);
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, Tmp, N2);
}
@@ -7495,10 +7732,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- removeFromWorkList(N1.getNode());
- DAG.DeleteNode(N1.getNode());
+ deleteAndRecombine(N1.getNode());
return DAG.getNode(ISD::BRCOND, SDLoc(N),
MVT::Other, Chain, SetCC, N2);
}
@@ -7523,7 +7759,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), SDLoc(N),
false);
- if (Simp.getNode()) AddToWorkList(Simp.getNode());
+ if (Simp.getNode()) AddToWorklist(Simp.getNode());
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
@@ -7535,9 +7771,8 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
-/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
-/// uses N as its base pointer and that N may be folded in the load / store
-/// addressing mode.
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -7576,12 +7811,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
}
-/// CombineToPreIndexedLoadStore - Try turning a load / store into a
-/// pre-indexed load / store when the base pointer is an add or subtract
-/// and it has other uses besides the load / store. After the
-/// transformation, the new indexed load / store has effectively folded
-/// the add / subtract in and all of its other uses are redirected to the
-/// new load / store.
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
@@ -7733,7 +7967,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
@@ -7742,7 +7976,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
}
// Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
if (Swapped)
std::swap(BasePtr, Offset);
@@ -7792,23 +8026,20 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
SDLoc(OtherUses[i]),
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
- removeFromWorkList(OtherUses[i]);
- DAG.DeleteNode(OtherUses[i]);
+ deleteAndRecombine(OtherUses[i]);
}
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
- removeFromWorkList(Ptr.getNode());
- DAG.DeleteNode(Ptr.getNode());
+ deleteAndRecombine(Ptr.getNode());
return true;
}
-/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
-/// add / sub of the base pointer node into a post-indexed load / store.
-/// The transformation folded the add / subtract into the new indexed
-/// load / store effectively and all of its uses are redirected to the
-/// new load / store.
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
@@ -7903,7 +8134,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
@@ -7912,13 +8143,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
}
// Finally, since the node is now dead, remove it from the graph.
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
Result.getValue(isLoad ? 1 : 0));
- removeFromWorkList(Op);
- DAG.DeleteNode(Op);
+ deleteAndRecombine(Op);
return true;
}
}
@@ -7927,6 +8157,30 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
+/// \brief Return the base-pointer arithmetic from an indexed \p LD.
+SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ assert(AM != ISD::UNINDEXED);
+ SDValue BP = LD->getOperand(1);
+ SDValue Inc = LD->getOperand(2);
+
+ // Some backends use TargetConstants for load offsets, but don't expect
+ // TargetConstants in general ADD nodes. We can convert these constants into
+ // regular Constants (if the constant is not opaque).
+ assert((Inc.getOpcode() != ISD::TargetConstant ||
+ !cast<ConstantSDNode>(Inc)->isOpaque()) &&
+ "Cannot split out indexing using opaque target constants");
+ if (Inc.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
+ Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+ ConstInc->getValueType(0));
+ }
+
+ unsigned Opc =
+ (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
+ return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
+}
+
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -7950,33 +8204,46 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << "\nWith chain: ";
Chain.getNode()->dump(&DAG);
dbgs() << "\n");
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
- if (N->use_empty()) {
- removeFromWorkList(N);
- DAG.DeleteNode(N);
- }
+ if (N->use_empty())
+ deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
+
+ // If this load has an opaque TargetConstant offset, then we cannot split
+ // the indexing into an add/sub directly (that TargetConstant may not be
+ // valid for a different type of node, and we cannot convert an opaque
+ // target constant into a regular constant).
+ bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
+ cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
+
+ if (!N->hasAnyUseOfValue(0) &&
+ ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ SDValue Index;
+ if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
+ Index = SplitIndexingFromLoad(LD);
+ // Try to fold the base pointer arithmetic into subsequent loads and
+ // stores.
+ AddUsersToWorklist(N);
+ } else
+ Index = DAG.getUNDEF(N->getValueType(1));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
Undef.getNode()->dump(&DAG);
dbgs() << " and 2 other values\n");
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
- DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
- removeFromWorkList(N);
- DAG.DeleteNode(N);
+ deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -8004,15 +8271,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), Align,
- LD->getTBAAInfo());
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), Align, LD->getAAInfo());
return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
- TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
#ifndef NDEBUG
if (CombinerAAOnlyFunc.getNumOccurrences() &&
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
@@ -8042,7 +8309,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
MVT::Other, Chain, ReplLoad.getValue(1));
// Make sure the new and old chains are cleaned up.
- AddToWorkList(Token.getNode());
+ AddToWorklist(Token.getNode());
// Replace uses with load result and token factor. Don't add users
// to work list.
@@ -8342,7 +8609,7 @@ struct LoadedSlice {
// At this point, we know that we perform a cross-register-bank copy.
// Check if it is expensive.
- const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
// Assume bitcasts are cheap, unless both register classes do not
// explicitly share a common sub class.
if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
@@ -8606,9 +8873,9 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
return true;
}
-/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
-/// load is having specific bytes cleared out. If so, return the byte size
-/// being masked out and the shift amount.
+/// Check to see if V is (and load (ptr), imm), where the load is having
+/// specific bytes cleared out. If so, return the byte size being masked out
+/// and the shift amount.
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
@@ -8681,9 +8948,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
}
-/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
-/// provides a value as specified by MaskInfo. If so, replace the specified
-/// store with a narrower store of truncated IVal.
+/// Check to see if IVal is something that provides a value as specified by
+/// MaskInfo. If so, replace the specified store with a narrower store of
+/// truncated IVal.
static SDNode *
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue IVal, StoreSDNode *St,
@@ -8738,10 +9005,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
}
-/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
-/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
-/// of the loaded bits, try narrowing the load and store if it would end up
-/// being a win for performance or code size.
+/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
+/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
+/// narrowing the load and store if it would end up being a win for performance
+/// or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
if (ST->isVolatile())
@@ -8841,7 +9108,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), NewAlign,
- LD->getTBAAInfo());
+ LD->getAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, SDLoc(N),
@@ -8849,10 +9116,10 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
ST->getPointerInfo().getWithOffset(PtrOff),
false, false, NewAlign);
- AddToWorkList(NewPtr.getNode());
- AddToWorkList(NewLD.getNode());
- AddToWorkList(NewVal.getNode());
- WorkListRemover DeadNodes(*this);
+ AddToWorklist(NewPtr.getNode());
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewVal.getNode());
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
++OpsNarrowed;
return NewST;
@@ -8862,10 +9129,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
}
-/// TransformFPLoadStorePair - For a given floating point load / store pair,
-/// if the load value isn't used by any other operations, then consider
-/// transforming the pair to integer load / store operations if the target
-/// deems the transformation profitable.
+/// For a given floating point load / store pair, if the load value isn't used
+/// by any other operations, then consider transforming the pair to integer
+/// load / store operations if the target deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -8907,9 +9173,9 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
ST->getPointerInfo(),
false, false, STAlign);
- AddToWorkList(NewLD.getNode());
- AddToWorkList(NewST.getNode());
- WorkListRemover DeadNodes(*this);
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewST.getNode());
+ WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
++LdStFP2Int;
return NewST;
@@ -9039,7 +9305,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
return false;
// Only look at ends of store sequences.
- SDValue Chain = SDValue(St, 1);
+ SDValue Chain = SDValue(St, 0);
if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
return false;
@@ -9070,7 +9336,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
StoreSDNode *Index = St;
while (Index) {
// If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 1)->hasOneUse())
+ if (Index != St && !SDValue(Index, 0)->hasOneUse())
break;
// Find the base pointer and offset for this memory node.
@@ -9301,8 +9567,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Since we know that St is redundant, just iterate.
while (!St->use_empty())
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- removeFromWorkList(St);
- DAG.DeleteNode(St);
+ deleteAndRecombine(St);
}
return true;
@@ -9361,6 +9626,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (LoadNodes.size() < 2)
return false;
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ St->getAlignment() >= RequiredAlignment)
+ return false;
+
// Scan the memory operations on the chain and find the first non-consecutive
// load memory address. These variables hold the index in the store node
// array.
@@ -9476,8 +9748,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
- removeFromWorkList(St);
- DAG.DeleteNode(St);
+ deleteAndRecombine(St);
}
return true;
@@ -9503,7 +9774,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), OrigAlign,
- ST->getTBAAInfo());
+ ST->getAAInfo());
}
// Turn 'store undef, Ptr' -> nothing.
@@ -9557,19 +9828,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
- ST->getAlignment(), TBAAInfo);
+ ST->getAlignment(), AAInfo);
Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
Ptr, ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal,
- Alignment, TBAAInfo);
+ Alignment, AAInfo);
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
St0, St1);
}
@@ -9586,7 +9857,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align,
- ST->getTBAAInfo());
+ ST->getAAInfo());
}
}
@@ -9596,8 +9867,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (NewST.getNode())
return NewST;
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
- TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
#ifndef NDEBUG
if (CombinerAAOnlyFunc.getNumOccurrences() &&
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
@@ -9625,7 +9896,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
MVT::Other, Chain, ReplStore);
// Make sure the new and old chains are cleaned up.
- AddToWorkList(Token.getNode());
+ AddToWorklist(Token.getNode());
// Don't add users to work list.
return CombineTo(N, Token, false);
@@ -9647,7 +9918,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
APInt::getLowBitsSet(
Value.getValueType().getScalarType().getSizeInBits(),
ST->getMemoryVT().getScalarType().getSizeInBits()));
- AddToWorkList(Value.getNode());
+ AddToWorklist(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
Ptr, ST->getMemoryVT(), ST->getMemOperand());
@@ -9674,6 +9945,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // If this is a store followed by a store with the same value to the same
+ // location, then the store is dead/noop.
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+ if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
+ ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
+ ST1->isUnindexed() && !ST1->isVolatile()) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
@@ -9741,7 +10023,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// Swap nodes.
SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
InVec.getOperand(0), InVal, EltNo);
- AddToWorkList(NewOp.getNode());
+ AddToWorklist(NewOp.getNode());
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
}
@@ -9829,32 +10111,32 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
? ISD::ZEXTLOAD
: ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
- NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
- OriginalLoad->isNonTemporal(), Align,
- OriginalLoad->getTBAAInfo());
+ Load = DAG.getExtLoad(
+ ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
+ VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
} else {
Load = DAG.getLoad(
VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
- OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
if (ResultVT.bitsLT(VecEltVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
else
Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
}
- WorkListRemover DeadNodes(*this);
+ WorklistRemover DeadNodes(*this);
SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddToWorkList(Load.getNode());
- AddUsersToWorkList(Load.getNode()); // Add users too
+ AddToWorklist(Load.getNode());
+ AddUsersToWorklist(Load.getNode()); // Add users too
// Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorkList(EVE);
+ AddToWorklist(EVE);
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -9952,7 +10234,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
- ISD::isNormalLoad(InVec.getNode())) {
+ ISD::isNormalLoad(InVec.getNode()) &&
+ !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
SDValue Index = N->getOperand(1);
if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
@@ -10135,7 +10418,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
// The new BUILD_VECTOR node has the potential to be further optimized.
- AddToWorkList(BV.getNode());
+ AddToWorklist(BV.getNode());
// Bitcast to the desired type.
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
@@ -10201,7 +10484,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
Opnds.push_back(In.getOperand(0));
}
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
- AddToWorkList(BV.getNode());
+ AddToWorklist(BV.getNode());
return DAG.getNode(Opcode, dl, VT, BV);
}
@@ -10227,9 +10510,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
// at most two distinct vectors, turn this into a shuffle node.
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
// May only combine to shuffle after legalize if shuffle is legal.
- if (LegalOperations &&
- !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
SDValue VecIn1, VecIn2;
@@ -10319,10 +10605,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
VecIn1.getValueType() != VT)
return SDValue();
- // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
- if (!isTypeLegal(VT))
- return SDValue();
-
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[2];
Ops[0] = VecIn1;
@@ -10513,6 +10795,92 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
}
+static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
+ SDValue V, SelectionDAG &DAG) {
+ SDLoc DL(V);
+ EVT VT = V.getValueType();
+
+ switch (V.getOpcode()) {
+ default:
+ return V;
+
+ case ISD::CONCAT_VECTORS: {
+ EVT OpVT = V->getOperand(0).getValueType();
+ int OpSize = OpVT.getVectorNumElements();
+ SmallBitVector OpUsedElements(OpSize, false);
+ bool FoundSimplification = false;
+ SmallVector<SDValue, 4> NewOps;
+ NewOps.reserve(V->getNumOperands());
+ for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
+ SDValue Op = V->getOperand(i);
+ bool OpUsed = false;
+ for (int j = 0; j < OpSize; ++j)
+ if (UsedElements[i * OpSize + j]) {
+ OpUsedElements[j] = true;
+ OpUsed = true;
+ }
+ NewOps.push_back(
+ OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
+ : DAG.getUNDEF(OpVT));
+ FoundSimplification |= Op == NewOps.back();
+ OpUsedElements.reset();
+ }
+ if (FoundSimplification)
+ V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
+ return V;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue BaseV = V->getOperand(0);
+ SDValue SubV = V->getOperand(1);
+ auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ if (!IdxN)
+ return V;
+
+ int SubSize = SubV.getValueType().getVectorNumElements();
+ int Idx = IdxN->getZExtValue();
+ bool SubVectorUsed = false;
+ SmallBitVector SubUsedElements(SubSize, false);
+ for (int i = 0; i < SubSize; ++i)
+ if (UsedElements[i + Idx]) {
+ SubVectorUsed = true;
+ SubUsedElements[i] = true;
+ UsedElements[i + Idx] = false;
+ }
+
+ // Now recurse on both the base and sub vectors.
+ SDValue SimplifiedSubV =
+ SubVectorUsed
+ ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
+ : DAG.getUNDEF(SubV.getValueType());
+ SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
+ if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
+ V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
+ return V;
+ }
+ }
+}
+
+static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
+ SDValue N1, SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ int NumElts = VT.getVectorNumElements();
+ SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
+ for (int M : SVN->getMask())
+ if (M >= 0 && M < NumElts)
+ N0UsedElements[M] = true;
+ else if (M >= NumElts)
+ N1UsedElements[M - NumElts] = true;
+
+ SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
+ SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
+ if (S0 == N0 && S1 == N1)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
@@ -10665,6 +11033,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // There are various patterns used to build up a vector from smaller vectors,
+ // subvectors, or elements. Scan chains of these and replace unused insertions
+ // or components with undef.
+ if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
+ return S;
+
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.getOpcode() == ISD::UNDEF ||
@@ -10699,7 +11073,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Idx = OtherSV->getMaskElt(Idx);
Mask.push_back(Idx);
}
-
+
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
+
bool CommuteOperands = false;
if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
// To be valid, the combine shuffle mask should only reference elements
@@ -10738,12 +11120,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// The combined shuffle must map each index to itself.
IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
}
-
+
if (IsIdentityMask) {
if (CommuteOperands)
// optimize shuffle(shuffle(x, y), undef) -> y.
return OtherSV->getOperand(1);
-
+
// optimize shuffle(shuffle(x, undef), undef) -> x
// optimize shuffle(shuffle(x, y), undef) -> x
return OtherSV->getOperand(0);
@@ -10751,16 +11133,134 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// It may still be beneficial to combine the two shuffles if the
// resulting shuffle is legal.
+ if (TLI.isTypeLegal(VT)) {
+ if (!CommuteOperands) {
+ if (TLI.isShuffleMaskLegal(Mask, VT))
+ // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+ &Mask[0]);
+ } else {
+ // Compute the commuted shuffle mask.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElts)
+ Mask[i] = idx + NumElts;
+ else
+ Mask[i] = idx - NumElts;
+ }
+
+ if (TLI.isShuffleMaskLegal(Mask, VT))
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1,
+ &Mask[0]);
+ }
+ }
+ }
+
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ TLI.isTypeLegal(VT)) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so that next rule
+ // will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+ // Don't try to fold shuffles with illegal type.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = OtherSV->getOperand(0);
+ SDValue SV1 = OtherSV->getOperand(1);
+ bool HasSameOp0 = N1 == SV0;
+ bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+ if (!HasSameOp0 && !IsSV1Undef && N1 != SV1)
+ // Early exit.
+ return SDValue();
+
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ if (Idx < (int)NumElts) {
+ Idx = OtherSV->getMaskElt(Idx);
+ if (IsSV1Undef && Idx >= (int) NumElts)
+ Idx = -1; // Propagate Undef.
+ } else
+ Idx = HasSameOp0 ? Idx - NumElts : Idx;
+
+ Mask.push_back(Idx);
+ }
+
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
+
+ // Avoid introducing shuffles with illegal mask.
+ if (TLI.isShuffleMaskLegal(Mask, VT)) {
+ if (IsSV1Undef)
+ // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
+ }
+
+ // Compute the commuted shuffle mask.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElts)
+ Mask[i] = idx + NumElts;
+ else
+ Mask[i] = idx - NumElts;
+ }
+
if (TLI.isShuffleMaskLegal(Mask, VT)) {
- if (!CommuteOperands)
- // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
- &Mask[0]);
-
- // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
- return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
- &Mask[0]);
+ if (IsSV1Undef)
+ // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]);
+ // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]);
}
}
@@ -10794,8 +11294,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return SDValue();
}
-/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
-/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
+/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
@@ -10817,7 +11317,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
- Indices.push_back(NumElts);
+ Indices.push_back(NumElts+i);
else
return SDValue();
}
@@ -10841,7 +11341,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
-/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
"SimplifyVBinOp only works on vectors!");
@@ -10896,7 +11396,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
FoldOp.getOpcode() != ISD::ConstantFP)
break;
Ops.push_back(FoldOp);
- AddToWorkList(FoldOp.getNode());
+ AddToWorklist(FoldOp.getNode());
}
if (Ops.size() == LHS.getNumOperands())
@@ -10918,7 +11418,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
LHS.getOperand(0), RHS.getOperand(0));
- AddUsersToWorkList(N);
+ AddUsersToWorklist(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
&SVN0->getMask()[0]);
}
@@ -10927,7 +11427,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return SDValue();
}
-/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+/// Visit a binary vector operation, like FABS/FNEG.
SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
"SimplifyVUnaryOp only works on vectors!");
@@ -10950,7 +11450,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
FoldOp.getOpcode() != ISD::ConstantFP)
break;
Ops.push_back(FoldOp);
- AddToWorkList(FoldOp.getNode());
+ AddToWorklist(FoldOp.getNode());
}
if (Ops.size() != N0.getNumOperands())
@@ -10977,9 +11477,9 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
SCC.getOperand(4));
- AddToWorkList(SETCC.getNode());
- return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
- SCC.getOperand(2), SCC.getOperand(3), SETCC);
+ AddToWorklist(SETCC.getNode());
+ return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
}
return SCC;
@@ -10987,12 +11487,11 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
return SDValue();
}
-/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
-/// are the two values being selected between, see if we can simplify the
-/// select. Callers of this should assume that TheSelect is deleted if this
-/// returns true. As such, they should return the appropriate thing (e.g. the
-/// node) back to the top-level of the DAG combiner loop to avoid it being
-/// looked at.
+/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
+/// being selected between, see if we can simplify the select. Callers of this
+/// should assume that TheSelect is deleted if this returns true. As such, they
+/// should return the appropriate thing (e.g. the node) back to the top-level of
+/// the DAG combiner loop to avoid it being looked at.
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
@@ -11071,22 +11570,27 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
}
SDValue Load;
+ // It is safe to replace the two loads if they have different alignments,
+ // but the new load must be the minimum (most restrictive) alignment of the
+ // inputs.
+ bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
+ unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
Load = DAG.getLoad(TheSelect->getValueType(0),
SDLoc(TheSelect),
- // FIXME: Discards pointer and TBAA info.
+ // FIXME: Discards pointer and AA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->isVolatile(), LLD->isNonTemporal(),
- LLD->isInvariant(), LLD->getAlignment());
+ isInvariant, Alignment);
} else {
Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
RLD->getExtensionType() : LLD->getExtensionType(),
SDLoc(TheSelect),
TheSelect->getValueType(0),
- // FIXME: Discards pointer and TBAA info.
+ // FIXME: Discards pointer and AA info.
LLD->getChain(), Addr, MachinePointerInfo(),
LLD->getMemoryVT(), LLD->isVolatile(),
- LLD->isNonTemporal(), LLD->getAlignment());
+ LLD->isNonTemporal(), isInvariant, Alignment);
}
// Users of the select now use the result of the load.
@@ -11102,7 +11606,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
return false;
}
-/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3,
@@ -11118,7 +11622,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// Determine if the condition we're dealing with is constant
SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
N0, N1, CC, DL, false);
- if (SCC.getNode()) AddToWorkList(SCC.getNode());
+ if (SCC.getNode()) AddToWorklist(SCC.getNode());
ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
// fold select_cc true, x, y -> x
@@ -11186,13 +11690,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
SDValue Cond = DAG.getSetCC(DL,
getSetCCResultType(N0.getValueType()),
N0, N1, CC);
- AddToWorkList(Cond.getNode());
+ AddToWorklist(Cond.getNode());
SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
Cond, One, Zero);
- AddToWorkList(CstOffset.getNode());
+ AddToWorklist(CstOffset.getNode());
CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
CstOffset);
- AddToWorkList(CPIdx.getNode());
+ AddToWorklist(CPIdx.getNode());
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(), false,
false, false, Alignment);
@@ -11217,11 +11721,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
getShiftAmountTy(N0.getValueType()));
SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
XType, N0, ShCt);
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
}
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
@@ -11231,11 +11735,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
XType, N0,
DAG.getConstant(XType.getSizeInBits()-1,
getShiftAmountTy(N0.getValueType())));
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
if (XType.bitsGT(AType)) {
Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorkList(Shift.getNode());
+ AddToWorklist(Shift.getNode());
}
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
@@ -11305,8 +11809,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
N2.getValueType(), SCC);
}
- AddToWorkList(SCC.getNode());
- AddToWorkList(Temp.getNode());
+ AddToWorklist(SCC.getNode());
+ AddToWorklist(Temp.getNode());
if (N2C->getAPIntValue() == 1)
return Temp;
@@ -11385,8 +11889,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
getShiftAmountTy(N0.getValueType())));
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
XType, N0, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
}
}
@@ -11394,7 +11898,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
return SDValue();
}
-/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+/// This is a stub for TargetLowering::SimplifySetCC.
SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
SDValue N1, ISD::CondCode Cond,
SDLoc DL, bool foldBooleans) {
@@ -11403,10 +11907,10 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
}
-/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
-/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// Given an ISD::SDIV node expressing a divide by constant, return
+/// a DAG expression to select that will generate the same value by multiplying
+/// by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
@@ -11421,14 +11925,33 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
for (SDNode *N : Built)
- AddToWorkList(N);
+ AddToWorklist(N);
return S;
}
-/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant,
-/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
+/// DAG expression that will generate the same value by right shifting.
+SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (!C->getAPIntValue())
+ return SDValue();
+
+ std::vector<SDNode *> Built;
+ SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
+
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+}
+
+/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
+/// expression that will generate the same value by multiplying by a magic
+/// number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
@@ -11443,13 +11966,145 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
for (SDNode *N : Built)
- AddToWorkList(N);
+ AddToWorklist(N);
return S;
}
-/// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself. Provides base object and offset as
-// results.
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // Expose the DAG combiner to the target combiner implementations.
+ TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+
+ unsigned Iterations = 0;
+ if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
+ if (Iterations) {
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue FPOne = DAG.getConstantFP(1.0, VT);
+
+ AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+ AddToWorklist(Est.getNode());
+ }
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+/// As a result, we precompute A/2 prior to the iteration loop.
+SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
+ unsigned Iterations) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+
+ // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+ AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+ AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+ AddToWorklist(Est.getNode());
+ }
+ return Est;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
+SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
+ unsigned Iterations) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
+ SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
+
+ // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+ AddToWorklist(HalfEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+ AddToWorklist(Est.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+ AddToWorklist(Est.getNode());
+
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+ AddToWorklist(Est.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+ AddToWorklist(Est.getNode());
+ }
+ return Est;
+}
+
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // Expose the DAG combiner to the target combiner implementations.
+ TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+ unsigned Iterations = 0;
+ bool UseOneConstNR = false;
+ if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+ AddToWorklist(Est.getNode());
+ if (Iterations) {
+ Est = UseOneConstNR ?
+ BuildRsqrtNROneConst(Op, Est, Iterations) :
+ BuildRsqrtNRTwoConst(Op, Est, Iterations);
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+/// Return true if base is a frame index, which is known not to alias with
+/// anything but itself. Provides base object and offset as results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
@@ -11485,8 +12140,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
return isa<FrameIndexSDNode>(Base);
}
-/// isAlias - Return true if there is any possibility that the two addresses
-/// overlap.
+/// Return true if there is any possibility that the two addresses overlap.
bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
// If they are the same then they must be aliases.
if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
@@ -11545,8 +12199,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
return false;
}
- bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
- TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
+ ? CombinerGlobalAA
+ : DAG.getSubtarget().useAA();
#ifndef NDEBUG
if (CombinerAAOnlyFunc.getNumOccurrences() &&
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
@@ -11564,10 +12219,10 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
AliasAnalysis::AliasResult AAResult =
AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
Overlap1,
- UseTBAA ? Op0->getTBAAInfo() : nullptr),
+ UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
Overlap2,
- UseTBAA ? Op1->getTBAAInfo() : nullptr));
+ UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -11576,7 +12231,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
return true;
}
-/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases) {
@@ -11612,7 +12267,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
}
// Don't bother if we've been before.
- if (!Visited.insert(Chain.getNode()))
+ if (!Visited.insert(Chain.getNode()).second)
continue;
switch (Chain.getOpcode()) {
@@ -11687,10 +12342,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// like register copies will interfere with trivial cases).
SmallVector<const SDNode *, 16> Worklist;
- for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(),
- IE = Visited.end(); I != IE; ++I)
- if (*I != OriginalChain.getNode())
- Worklist.push_back(*I);
+ for (const SDNode *N : Visited)
+ if (N != OriginalChain.getNode())
+ Worklist.push_back(N);
while (!Worklist.empty()) {
const SDNode *M = Worklist.pop_back_val();
@@ -11701,7 +12355,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
for (SDNode::use_iterator UI = M->use_begin(),
UIE = M->use_end(); UI != UIE; ++UI)
- if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) {
+ if (UI.getUse().getValueType() == MVT::Other &&
+ Visited.insert(*UI).second) {
if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
// We've not visited this use, and we care about it (it could have an
// ordering dependency with the original node).
@@ -11717,8 +12372,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
}
}
-/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
-/// for a better chain (aliasing node.)
+/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
+/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
@@ -11737,11 +12392,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
-// SelectionDAG::Combine - This is the entry point for the file.
-//
+/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
CodeGenOpt::Level OptLevel) {
- /// run - This is the main entry point to this class.
- ///
+ /// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 445572a..8facbc2 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
@@ -64,19 +65,32 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "isel"
STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
- "target-independent selector");
+ "target-independent selector");
STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
- "target-specific selector");
+ "target-specific selector");
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
-/// startNewBlock - Set the current block to which generated machine
-/// instructions will be appended, and clear the local CSE map.
-///
+void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned AttrIdx) {
+ IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
+ IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
+ IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
+ IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
+ IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
+ IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
+ IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ Alignment = CS->getParamAlignment(AttrIdx);
+}
+
+/// Set the current block to which generated machine instructions will be
+/// appended, and clear the local CSE map.
void FastISel::startNewBlock() {
LocalValueMap.clear();
@@ -89,18 +103,19 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
-bool FastISel::LowerArguments() {
+bool FastISel::lowerArguments() {
if (!FuncInfo.CanLowerReturn)
// Fallback to SDISel argument lowering code to deal with sret pointer
// parameter.
return false;
- if (!FastLowerArguments())
+ if (!fastLowerArguments())
return false;
// Enter arguments into ValueMap for uses in non-entry BBs.
for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
- E = FuncInfo.Fn->arg_end(); I != E; ++I) {
+ E = FuncInfo.Fn->arg_end();
+ I != E; ++I) {
DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
assert(VI != LocalValueMap.end() && "Missed an argument?");
FuncInfo.ValueMap[I] = VI->second;
@@ -112,22 +127,30 @@ void FastISel::flushLocalValueMap() {
LocalValueMap.clear();
LastLocalValue = EmitStartPt;
recomputeInsertPt();
+ SavedInsertPt = FuncInfo.InsertPt;
}
-bool FastISel::hasTrivialKill(const Value *V) const {
+bool FastISel::hasTrivialKill(const Value *V) {
// Don't consider constants or arguments to have trivial kills.
const Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return false;
// No-op casts are trivially coalesced by fast-isel.
- if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (const auto *Cast = dyn_cast<CastInst>(I))
if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
!hasTrivialKill(Cast->getOperand(0)))
return false;
+ // Even the value might have only one use in the LLVM IR, it is possible that
+ // FastISel might fold the use into another instruction and now there is more
+ // than one use at the Machine Instruction level.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg && !MRI.use_empty(Reg))
+ return false;
+
// GEPs with all zero indices are trivially coalesced by fast-isel.
- if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(I))
if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
return false;
@@ -160,7 +183,7 @@ unsigned FastISel::getRegForValue(const Value *V) {
// Look up the value to see if we already have a register for it.
unsigned Reg = lookUpRegForValue(V);
- if (Reg != 0)
+ if (Reg)
return Reg;
// In bottom-up mode, just create the virtual register which will be used
@@ -181,29 +204,24 @@ unsigned FastISel::getRegForValue(const Value *V) {
return Reg;
}
-/// materializeRegForValue - Helper for getRegForValue. This function is
-/// called when the value isn't already available in a register and must
-/// be materialized with new instructions.
-unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
unsigned Reg = 0;
-
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getValue().getActiveBits() <= 64)
- Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
- } else if (isa<AllocaInst>(V)) {
- Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
- } else if (isa<ConstantPointerNull>(V)) {
+ Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V))
+ Reg = fastMaterializeAlloca(cast<AllocaInst>(V));
+ else if (isa<ConstantPointerNull>(V))
// Translate this as an integer zero so that it can be
// local-CSE'd with actual integer zeros.
- Reg =
- getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext())));
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- if (CF->isNullValue()) {
- Reg = TargetMaterializeFloatZero(CF);
- } else {
+ Reg = getRegForValue(
+ Constant::getNullValue(DL.getIntPtrType(V->getContext())));
+ else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue())
+ Reg = fastMaterializeFloatZero(CF);
+ else
// Try to emit the constant directly.
- Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
- }
+ Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF);
if (!Reg) {
// Try to emit the constant by using an integer constant with a cast.
@@ -213,22 +231,22 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
uint64_t x[2];
uint32_t IntBitWidth = IntVT.getSizeInBits();
bool isExact;
- (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
if (isExact) {
APInt IntVal(IntBitWidth, x);
unsigned IntegerReg =
- getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
if (IntegerReg != 0)
- Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
- IntegerReg, /*Kill=*/false);
+ Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
+ /*Kill=*/false);
}
}
- } else if (const Operator *Op = dyn_cast<Operator>(V)) {
- if (!SelectOperator(Op, Op->getOpcode()))
+ } else if (const auto *Op = dyn_cast<Operator>(V)) {
+ if (!selectOperator(Op, Op->getOpcode()))
if (!isa<Instruction>(Op) ||
- !TargetSelectInstruction(cast<Instruction>(Op)))
+ !fastSelectInstruction(cast<Instruction>(Op)))
return 0;
Reg = lookUpRegForValue(Op);
} else if (isa<UndefValue>(V)) {
@@ -236,15 +254,26 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
+ return Reg;
+}
+
+/// Helper for getRegForValue. This function is called when the value isn't
+/// already available in a register and must be materialized with new
+/// instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+ // Give the target-specific code a try first.
+ if (isa<Constant>(V))
+ Reg = fastMaterializeConstant(cast<Constant>(V));
- // If target-independent code couldn't handle the value, give target-specific
- // code a try.
- if (!Reg && isa<Constant>(V))
- Reg = TargetMaterializeConstant(cast<Constant>(V));
+ // If target-specific code couldn't or didn't want to handle the value, then
+ // give target-independent code a try.
+ if (!Reg)
+ Reg = materializeConstant(V, VT);
// Don't cache constant materializations in the general ValueMap.
// To do so would require tracking what uses they dominate.
- if (Reg != 0) {
+ if (Reg) {
LocalValueMap[V] = Reg;
LastLocalValue = MRI.getVRegDef(Reg);
}
@@ -262,13 +291,7 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
return LocalValueMap[V];
}
-/// UpdateValueMap - Update the value map to include the new mapping for this
-/// instruction, or insert an extra copy to get the result in a previous
-/// determined register.
-/// NOTE: This is only necessary because we might select a block that uses
-/// a value before we select the block that defines the value. It might be
-/// possible to fix this by selecting blocks in reverse postorder.
-void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
if (!isa<Instruction>(I)) {
LocalValueMap[I] = Reg;
return;
@@ -281,7 +304,7 @@ void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
else if (Reg != AssignedReg) {
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
for (unsigned i = 0; i < NumRegs; i++)
- FuncInfo.RegFixups[AssignedReg+i] = Reg+i;
+ FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
AssignedReg = Reg;
}
@@ -299,13 +322,12 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
MVT PtrVT = TLI.getPointerTy();
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
- IdxN, IdxNIsKill);
+ IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
+ IdxNIsKill);
IdxNIsKill = true;
- }
- else if (IdxVT.bitsGT(PtrVT)) {
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
- IdxN, IdxNIsKill);
+ } else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN =
+ fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
IdxNIsKill = true;
}
return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
@@ -327,7 +349,7 @@ void FastISel::recomputeInsertPt() {
void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
- assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!");
while (I != E) {
MachineInstr *Dead = &*I;
++I;
@@ -342,7 +364,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() {
DebugLoc OldDL = DbgLoc;
recomputeInsertPt();
DbgLoc = DebugLoc();
- SavePoint SP = { OldInsertPt, OldDL };
+ SavePoint SP = {OldInsertPt, OldDL};
return SP;
}
@@ -355,10 +377,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
DbgLoc = OldInsertPt.DL;
}
-/// SelectBinaryOp - Select and emit code for a binary operator instruction,
-/// which has an opcode which directly corresponds to the given ISD opcode.
-///
-bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
if (VT == MVT::Other || !VT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
@@ -371,9 +390,8 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
if (!TLI.isTypeLegal(VT)) {
// MVT::i1 is special. Allow AND, OR, or XOR because they
// don't require additional zeroing, which makes them easy.
- if (VT == MVT::i1 &&
- (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
- ISDOpcode == ISD::XOR))
+ if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
VT = TLI.getTypeToTransformTo(I->getContext(), VT);
else
return false;
@@ -381,38 +399,36 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Check if the first operand is a constant, and handle it as "ri". At -O0,
// we don't have anything that canonicalizes operand order.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
unsigned Op1 = getRegForValue(I->getOperand(1));
- if (Op1 == 0) return false;
-
+ if (!Op1)
+ return false;
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1,
- Op1IsKill, CI->getZExtValue(),
- VT.getSimpleVT());
- if (ResultReg == 0) return false;
+ unsigned ResultReg =
+ fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
+ CI->getZExtValue(), VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-
unsigned Op0 = getRegForValue(I->getOperand(0));
- if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail.
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// Check if the second operand is a constant and handle it appropriately.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t Imm = CI->getZExtValue();
// Transform "sdiv exact X, 8" -> "sra X, 3".
if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
- cast<BinaryOperator>(I)->isExact() &&
- isPowerOf2_64(Imm)) {
+ cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) {
Imm = Log2_64(Imm);
ISDOpcode = ISD::SRA;
}
@@ -424,54 +440,49 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::AND;
}
- unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
- if (ResultReg == 0) return false;
+ if (!ResultReg)
+ return false;
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// Check if the second operand is a constant float.
- if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
- unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
ISDOpcode, Op0, Op0IsKill, CF);
- if (ResultReg != 0) {
+ if (ResultReg) {
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
unsigned Op1 = getRegForValue(I->getOperand(1));
- if (Op1 == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
// Now we have both operands in registers. Emit the instruction.
- unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode,
- Op0, Op0IsKill,
- Op1, Op1IsKill);
- if (ResultReg == 0)
+ unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
+ if (!ResultReg)
// Target-specific code wasn't able to find a machine opcode for
// the given ISD opcode and type. Halt "fast" selection and bail.
return false;
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool FastISel::SelectGetElementPtr(const User *I) {
+bool FastISel::selectGetElementPtr(const User *I) {
unsigned N = getRegForValue(I->getOperand(0));
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool NIsKill = hasTrivialKill(I->getOperand(0));
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
@@ -481,18 +492,18 @@ bool FastISel::SelectGetElementPtr(const User *I) {
uint64_t MaxOffs = 2048;
Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
- for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
- E = I->op_end(); OI != E; ++OI) {
+ for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1,
+ E = I->op_end();
+ OI != E; ++OI) {
const Value *Idx = *OI;
- if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ if (auto *StTy = dyn_cast<StructType>(Ty)) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
if (TotalOffs >= MaxOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
TotalOffs = 0;
@@ -503,15 +514,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
Ty = cast<SequentialType>(Ty)->getElementType();
// If this is a constant subscript, handle it quickly.
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->isZero()) continue;
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
// N = N + Offset
TotalOffs +=
- DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+ DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
if (TotalOffs >= MaxOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
TotalOffs = 0;
@@ -519,9 +530,8 @@ bool FastISel::SelectGetElementPtr(const User *I) {
continue;
}
if (TotalOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
NIsKill = true;
TotalOffs = 0;
@@ -532,43 +542,37 @@ bool FastISel::SelectGetElementPtr(const User *I) {
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
unsigned IdxN = Pair.first;
bool IdxNIsKill = Pair.second;
- if (IdxN == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
if (ElementSize != 1) {
- IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
- if (IdxN == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
IdxNIsKill = true;
}
- N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
}
}
if (TotalOffs) {
- N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
- if (N == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
}
// We successfully emitted code for the given LLVM Instruction.
- UpdateValueMap(I, N);
+ updateValueMap(I, N);
return true;
}
-/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands
-/// to a stackmap or patchpoint machine instruction.
bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
const CallInst *CI, unsigned StartIdx) {
for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
Value *Val = CI->getArgOperand(i);
// Check for constants and encode them with a StackMaps::ConstantOp prefix.
- if (auto *C = dyn_cast<ConstantInt>(Val)) {
+ if (const auto *C = dyn_cast<ConstantInt>(Val)) {
Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
} else if (isa<ConstantPointerNull>(Val)) {
@@ -585,16 +589,15 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
return false;
} else {
unsigned Reg = getRegForValue(Val);
- if (Reg == 0)
+ if (!Reg)
return false;
Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
}
}
-
return true;
}
-bool FastISel::SelectStackmap(const CallInst *I) {
+bool FastISel::selectStackmap(const CallInst *I) {
// void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
// [live variables...])
assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
@@ -621,7 +624,7 @@ bool FastISel::SelectStackmap(const CallInst *I) {
assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
"Expected a constant integer.");
const auto *NumBytes =
- cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
// Push live variables for the stack map (skipping the first two arguments
@@ -637,13 +640,13 @@ bool FastISel::SelectStackmap(const CallInst *I) {
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
for (unsigned i = 0; ScratchRegs[i]; ++i)
Ops.push_back(MachineOperand::CreateReg(
- ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
- /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(0);
+ .addImm(0);
// Issue STACKMAP.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -654,7 +657,8 @@ bool FastISel::SelectStackmap(const CallInst *I) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
- .addImm(0).addImm(0);
+ .addImm(0)
+ .addImm(0);
// Inform the Frame Information that we have a stackmap in this function.
FuncInfo.MF->getFrameInfo()->setHasStackMap();
@@ -662,11 +666,370 @@ bool FastISel::SelectStackmap(const CallInst *I) {
return true;
}
-bool FastISel::SelectCall(const User *I) {
+/// \brief Lower an argument list according to the target calling convention.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
+ unsigned NumArgs, const Value *Callee,
+ bool ForceRetVoidTy, CallLoweringInfo &CLI) {
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ ImmutableCallSite CS(CI);
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ ArgI != ArgE; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, AttrI);
+ Args.push_back(Entry);
+ }
+
+ Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext())
+ : CI->getType();
+ CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::selectPatchpoint(const CallInst *I) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+ CallingConv::ID CC = I->getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !I->getType()->isVoidTy();
+ Value *Callee = I->getOperand(PatchPointOpers::TargetPos);
+
+ // Get the real number of arguments participating in the call <numArgs>
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
+ "Expected a constant integer.");
+ const auto *NumArgsVal =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = NumArgsVal->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // This includes all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ CallLoweringInfo CLI;
+ if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
+ return false;
+
+ assert(CLI.Call && "No call instruction specified.");
+
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add an explicit result reg if we use the anyreg calling convention.
+ if (IsAnyRegCC && HasDef) {
+ assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
+ CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
+ CLI.NumResultRegs = 1;
+ Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true));
+ }
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Assume that the callee is a constant address or null pointer.
+ // FIXME: handle function symbols in the future.
+ uint64_t CalleeAddr;
+ if (const auto *C = dyn_cast<IntToPtrInst>(Callee))
+ CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
+ if (C->getOpcode() == Instruction::IntToPtr)
+ CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ else
+ llvm_unreachable("Unsupported ConstantExpr.");
+ } else if (isa<ConstantPointerNull>(Callee))
+ CalleeAddr = 0;
+ else
+ llvm_unreachable("Unsupported callee address.");
+
+ Ops.push_back(MachineOperand::CreateImm(CalleeAddr));
+
+ // Adjust <numArgs> to account for any arguments that have been passed on
+ // the stack instead.
+ unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
+ Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs));
+
+ // Add the calling convention
+ Ops.push_back(MachineOperand::CreateImm((unsigned)CC));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC) {
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
+ unsigned Reg = getRegForValue(I->getArgOperand(i));
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ }
+ }
+
+ // Push the arguments from the call instruction.
+ for (auto Reg : CLI.OutRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+
+ // Push live variables for the stack map.
+ if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
+ return false;
+
+ // Push the register mask info.
+ Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC)));
+
+ // Add scratch registers as implicit def and early clobber.
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+
+ // Add implicit defs (return values).
+ for (auto Reg : CLI.InRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true,
+ /*IsImpl=*/true));
+
+ // Insert the patchpoint instruction before the call generated by the target.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
+ TII.get(TargetOpcode::PATCHPOINT));
+
+ for (auto &MO : Ops)
+ MIB.addOperand(MO);
+
+ MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ // Delete the original call instruction.
+ CLI.Call->eraseFromParent();
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+
+ if (CLI.NumResultRegs)
+ updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
+ return true;
+}
+
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+ Attrs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
+ unsigned NumArgs) {
+ ImmutableCallSite CS(CI);
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
+
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, ArgI + 1);
+ Args.push_back(Entry);
+ }
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
+ // Handle the incoming return values from the call.
+ CLI.clearIns();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, CLI.RetTy, RetTys);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(
+ CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ // FIXME: sret demotion isn't supported yet - bail out.
+ if (!CanLowerReturn)
+ return false;
+
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.clearOuts();
+ for (auto &Arg : CLI.getArgs()) {
+ Type *FinalType = Arg.Ty;
+ if (Arg.IsByVal)
+ FinalType = cast<PointerType>(Arg.Ty)->getElementType();
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg);
+
+ ISD::ArgFlagsTy Flags;
+ if (Arg.IsZExt)
+ Flags.setZExt();
+ if (Arg.IsSExt)
+ Flags.setSExt();
+ if (Arg.IsInReg)
+ Flags.setInReg();
+ if (Arg.IsSRet)
+ Flags.setSRet();
+ if (Arg.IsByVal)
+ Flags.setByVal();
+ if (Arg.IsInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling in
+ // the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsByVal || Arg.IsInAlloca) {
+ PointerType *Ty = cast<PointerType>(Arg.Ty);
+ Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should come from FE. BE will guess if this info is
+ // not there, but there are cases it cannot get right.
+ unsigned FrameAlign = Arg.Alignment;
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (Arg.IsNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ CLI.OutVals.push_back(Arg.Val);
+ CLI.OutFlags.push_back(Flags);
+ }
+
+ if (!fastLowerCall(CLI))
+ return false;
+
+ // Set all unused physreg defs as dead.
+ assert(CLI.Call && "No call instruction specified.");
+ CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ if (CLI.NumResultRegs && CLI.CS)
+ updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+
+ return true;
+}
+
+bool FastISel::lowerCall(const CallInst *CI) {
+ ImmutableCallSite CS(CI);
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FuncTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FuncTy->getReturnType();
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within fastLowerCall.
+ bool IsTailCall = CI->isTailCall();
+ if (IsTailCall && !isInTailCallPosition(CS, TM))
+ IsTailCall = false;
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
+ .setTailCall(IsTailCall);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::selectCall(const User *I) {
const CallInst *Call = cast<CallInst>(I);
// Handle simple inline asms.
if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ // If the inline asm has side effects, then make sure that no local value
+ // lives across by flushing the local value map.
+ if (IA->hasSideEffects())
+ flushLocalValueMap();
+
// Don't attempt to handle constraints.
if (!IA->getConstraintString().empty())
return false;
@@ -679,34 +1042,46 @@ bool FastISel::SelectCall(const User *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::INLINEASM))
- .addExternalSymbol(IA->getAsmString().c_str())
- .addImm(ExtraInfo);
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
return true;
}
MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
ComputeUsesVAFloatArgument(*Call, &MMI);
- const Function *F = Call->getCalledFunction();
- if (!F) return false;
+ // Handle intrinsic function calls.
+ if (const auto *II = dyn_cast<IntrinsicInst>(Call))
+ return selectIntrinsicCall(II);
+
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ flushLocalValueMap();
- // Handle selected intrinsic function calls.
- switch (F->getIntrinsicID()) {
- default: break;
- // At -O0 we don't care about the lifetime intrinsics.
+ return lowerCall(Call);
+}
+
+bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ // At -O0 we don't care about the lifetime intrinsics.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
- // The donothing intrinsic does, well, nothing.
+ // The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
return true;
-
case Intrinsic::dbg_declare: {
- const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
DIVariable DIVar(DI->getVariable());
assert((!DIVar || DIVar.isVariable()) &&
- "Variable in DbgDeclareInst should be either null or a DIVariable.");
- if (!DIVar ||
- !FuncInfo.MF->getMMI().hasDebugInfo()) {
+ "Variable in DbgDeclareInst should be either null or a DIVariable.");
+ if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) {
DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
@@ -719,11 +1094,11 @@ bool FastISel::SelectCall(const User *I) {
unsigned Offset = 0;
Optional<MachineOperand> Op;
- if (const Argument *Arg = dyn_cast<Argument>(Address))
+ if (const auto *Arg = dyn_cast<Argument>(Address))
// Some arguments' frame index is recorded during argument lowering.
Offset = FuncInfo.getArgumentFrameIndex(Arg);
if (Offset)
- Op = MachineOperand::CreateFI(Offset);
+ Op = MachineOperand::CreateFI(Offset);
if (!Op)
if (unsigned Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
@@ -750,13 +1125,14 @@ bool FastISel::SelectCall(const User *I) {
Op->setIsDebug(true);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
- DI->getVariable());
+ DI->getVariable(), DI->getExpression());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
.addOperand(*Op)
.addImm(0)
- .addMetadata(DI->getVariable());
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -766,33 +1142,41 @@ bool FastISel::SelectCall(const User *I) {
}
case Intrinsic::dbg_value: {
// This form of DBG_VALUE is target-independent.
- const DbgValueInst *DI = cast<DbgValueInst>(Call);
+ const DbgValueInst *DI = cast<DbgValueInst>(II);
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(0U).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
- } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ .addReg(0U)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addCImm(CI).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
+ .addCImm(CI)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addImm(CI->getZExtValue()).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ .addImm(CI->getZExtValue())
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addFPImm(CF).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
+ .addFPImm(CF)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = DI->getOffset() != 0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect,
- Reg, DI->getOffset(), DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
+ DI->getOffset(), DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -801,46 +1185,38 @@ bool FastISel::SelectCall(const User *I) {
return true;
}
case Intrinsic::objectsize: {
- ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
+ ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
unsigned long long Res = CI->isZero() ? -1ULL : 0;
- Constant *ResCI = ConstantInt::get(Call->getType(), Res);
+ Constant *ResCI = ConstantInt::get(II->getType(), Res);
unsigned ResultReg = getRegForValue(ResCI);
- if (ResultReg == 0)
+ if (!ResultReg)
return false;
- UpdateValueMap(Call, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::expect: {
- unsigned ResultReg = getRegForValue(Call->getArgOperand(0));
- if (ResultReg == 0)
+ unsigned ResultReg = getRegForValue(II->getArgOperand(0));
+ if (!ResultReg)
return false;
- UpdateValueMap(Call, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::experimental_stackmap:
- return SelectStackmap(Call);
+ return selectStackmap(II);
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ return selectPatchpoint(II);
}
- // Usually, it does not make sense to initialize a value,
- // make an unrelated function call and use the value, because
- // it tends to be spilled on the stack. So, we move the pointer
- // to the last local value to the beginning of the block, so that
- // all the values which have already been materialized,
- // appear after the call. It also makes sense to skip intrinsics
- // since they tend to be inlined.
- if (!isa<IntrinsicInst>(Call))
- flushLocalValueMap();
-
- // An arbitrary call. Bail.
- return false;
+ return fastLowerIntrinsicCall(II);
}
-bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+bool FastISel::selectCast(const User *I, unsigned Opcode) {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
- if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
- DstVT == MVT::Other || !DstVT.isSimple())
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
+ !DstVT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
@@ -859,24 +1235,22 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
- unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
- DstVT.getSimpleVT(),
- Opcode,
- InputReg, InputRegIsKill);
+ unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ Opcode, InputReg, InputRegIsKill);
if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool FastISel::SelectBitCast(const User *I) {
+bool FastISel::selectBitCast(const User *I) {
// If the bitcast doesn't change the type, just use the operand value.
if (I->getType() == I->getOperand(0)->getType()) {
unsigned Reg = getRegForValue(I->getOperand(0));
- if (Reg == 0)
+ if (!Reg)
return false;
- UpdateValueMap(I, Reg);
+ updateValueMap(I, Reg);
return true;
}
@@ -891,17 +1265,15 @@ bool FastISel::SelectBitCast(const User *I) {
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DstVT = DstEVT.getSimpleVT();
unsigned Op0 = getRegForValue(I->getOperand(0));
- if (Op0 == 0)
- // Unhandled operand. Halt "fast" selection and bail.
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
-
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT == DstVT) {
- const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
- const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+ const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
// Don't attempt a cross-class copy. It will likely fail.
if (SrcClass == DstClass) {
ResultReg = createResultReg(DstClass);
@@ -912,28 +1284,27 @@ bool FastISel::SelectBitCast(const User *I) {
// If the reg-reg copy failed, select a BITCAST opcode.
if (!ResultReg)
- ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
+ ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool
-FastISel::SelectInstruction(const Instruction *I) {
+bool FastISel::selectInstruction(const Instruction *I) {
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(I))
- if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+ if (!handlePHINodesInSuccessorBlocks(I->getParent()))
return false;
DbgLoc = I->getDebugLoc();
- MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+ SavedInsertPt = FuncInfo.InsertPt;
- if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ if (const auto *Call = dyn_cast<CallInst>(I)) {
const Function *F = Call->getCalledFunction();
LibFunc::Func Func;
@@ -951,40 +1322,39 @@ FastISel::SelectInstruction(const Instruction *I) {
}
// First, try doing target-independent selection.
- if (SelectOperator(I, I->getOpcode())) {
- ++NumFastIselSuccessIndependent;
- DbgLoc = DebugLoc();
- return true;
- }
- // Remove dead code. However, ignore call instructions since we've flushed
- // the local value map and recomputed the insert point.
- if (!isa<CallInst>(I)) {
+ if (!SkipTargetIndependentISel) {
+ if (selectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
+ DbgLoc = DebugLoc();
+ return true;
+ }
+ // Remove dead code.
recomputeInsertPt();
if (SavedInsertPt != FuncInfo.InsertPt)
removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ SavedInsertPt = FuncInfo.InsertPt;
}
-
// Next, try calling the target to attempt to handle the instruction.
- SavedInsertPt = FuncInfo.InsertPt;
- if (TargetSelectInstruction(I)) {
+ if (fastSelectInstruction(I)) {
++NumFastIselSuccessTarget;
DbgLoc = DebugLoc();
return true;
}
- // Check for dead code and remove as necessary.
+ // Remove dead code.
recomputeInsertPt();
if (SavedInsertPt != FuncInfo.InsertPt)
removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
DbgLoc = DebugLoc();
+ // Undo phi node updates, because they will be added again by SelectionDAG.
+ if (isa<TerminatorInst>(I))
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
}
-/// FastEmitBranch - Emit an unconditional branch to the given block,
-/// unless it is the immediate (fall-through) successor, and update
-/// the CFG.
-void
-FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
+/// Emit an unconditional branch to the given block, unless it is the immediate
+/// (fall-through) successor, and update the CFG.
+void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// For more accurate line information if this is the only instruction
@@ -1002,54 +1372,51 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
}
-/// SelectFNeg - Emit an FNeg operation.
-///
-bool
-FastISel::SelectFNeg(const User *I) {
+/// Emit an FNeg operation.
+bool FastISel::selectFNeg(const User *I) {
unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
- if (OpReg == 0) return false;
-
+ if (!OpReg)
+ return false;
bool OpRegIsKill = hasTrivialKill(I);
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(I->getType());
- unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
- ISD::FNEG, OpReg, OpRegIsKill);
- if (ResultReg != 0) {
- UpdateValueMap(I, ResultReg);
+ unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
+ OpReg, OpRegIsKill);
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
return true;
}
// Bitcast the value to integer, twiddle the sign bit with xor,
// and then bitcast it back to floating-point.
- if (VT.getSizeInBits() > 64) return false;
+ if (VT.getSizeInBits() > 64)
+ return false;
EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
if (!TLI.isTypeLegal(IntVT))
return false;
- unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
ISD::BITCAST, OpReg, OpRegIsKill);
- if (IntReg == 0)
+ if (!IntReg)
return false;
- unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
- IntReg, /*Kill=*/true,
- UINT64_C(1) << (VT.getSizeInBits()-1),
- IntVT.getSimpleVT());
- if (IntResultReg == 0)
+ unsigned IntResultReg = fastEmit_ri_(
+ IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
+ if (!IntResultReg)
return false;
- ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
- ISD::BITCAST, IntResultReg, /*Kill=*/true);
- if (ResultReg == 0)
+ ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
+ IntResultReg, /*IsKill=*/true);
+ if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool
-FastISel::SelectExtractValue(const User *U) {
+bool FastISel::selectExtractValue(const User *U) {
const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
if (!EVI)
return false;
@@ -1085,55 +1452,54 @@ FastISel::SelectExtractValue(const User *U) {
for (unsigned i = 0; i < VTIndex; i++)
ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
- UpdateValueMap(EVI, ResultReg);
+ updateValueMap(EVI, ResultReg);
return true;
}
-bool
-FastISel::SelectOperator(const User *I, unsigned Opcode) {
+bool FastISel::selectOperator(const User *I, unsigned Opcode) {
switch (Opcode) {
case Instruction::Add:
- return SelectBinaryOp(I, ISD::ADD);
+ return selectBinaryOp(I, ISD::ADD);
case Instruction::FAdd:
- return SelectBinaryOp(I, ISD::FADD);
+ return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
- return SelectBinaryOp(I, ISD::SUB);
+ return selectBinaryOp(I, ISD::SUB);
case Instruction::FSub:
// FNeg is currently represented in LLVM IR as a special case of FSub.
if (BinaryOperator::isFNeg(I))
- return SelectFNeg(I);
- return SelectBinaryOp(I, ISD::FSUB);
+ return selectFNeg(I);
+ return selectBinaryOp(I, ISD::FSUB);
case Instruction::Mul:
- return SelectBinaryOp(I, ISD::MUL);
+ return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
- return SelectBinaryOp(I, ISD::FMUL);
+ return selectBinaryOp(I, ISD::FMUL);
case Instruction::SDiv:
- return SelectBinaryOp(I, ISD::SDIV);
+ return selectBinaryOp(I, ISD::SDIV);
case Instruction::UDiv:
- return SelectBinaryOp(I, ISD::UDIV);
+ return selectBinaryOp(I, ISD::UDIV);
case Instruction::FDiv:
- return SelectBinaryOp(I, ISD::FDIV);
+ return selectBinaryOp(I, ISD::FDIV);
case Instruction::SRem:
- return SelectBinaryOp(I, ISD::SREM);
+ return selectBinaryOp(I, ISD::SREM);
case Instruction::URem:
- return SelectBinaryOp(I, ISD::UREM);
+ return selectBinaryOp(I, ISD::UREM);
case Instruction::FRem:
- return SelectBinaryOp(I, ISD::FREM);
+ return selectBinaryOp(I, ISD::FREM);
case Instruction::Shl:
- return SelectBinaryOp(I, ISD::SHL);
+ return selectBinaryOp(I, ISD::SHL);
case Instruction::LShr:
- return SelectBinaryOp(I, ISD::SRL);
+ return selectBinaryOp(I, ISD::SRL);
case Instruction::AShr:
- return SelectBinaryOp(I, ISD::SRA);
+ return selectBinaryOp(I, ISD::SRA);
case Instruction::And:
- return SelectBinaryOp(I, ISD::AND);
+ return selectBinaryOp(I, ISD::AND);
case Instruction::Or:
- return SelectBinaryOp(I, ISD::OR);
+ return selectBinaryOp(I, ISD::OR);
case Instruction::Xor:
- return SelectBinaryOp(I, ISD::XOR);
+ return selectBinaryOp(I, ISD::XOR);
case Instruction::GetElementPtr:
- return SelectGetElementPtr(I);
+ return selectGetElementPtr(I);
case Instruction::Br: {
const BranchInst *BI = cast<BranchInst>(I);
@@ -1141,7 +1507,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
if (BI->isUnconditional()) {
const BasicBlock *LLVMSucc = BI->getSuccessor(0);
MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
- FastEmitBranch(MSucc, BI->getDebugLoc());
+ fastEmitBranch(MSucc, BI->getDebugLoc());
return true;
}
@@ -1152,7 +1518,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
case Instruction::Unreachable:
if (TM.Options.TrapUnreachable)
- return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
+ return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
else
return true;
@@ -1165,38 +1531,39 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
return false;
case Instruction::Call:
- return SelectCall(I);
+ return selectCall(I);
case Instruction::BitCast:
- return SelectBitCast(I);
+ return selectBitCast(I);
case Instruction::FPToSI:
- return SelectCast(I, ISD::FP_TO_SINT);
+ return selectCast(I, ISD::FP_TO_SINT);
case Instruction::ZExt:
- return SelectCast(I, ISD::ZERO_EXTEND);
+ return selectCast(I, ISD::ZERO_EXTEND);
case Instruction::SExt:
- return SelectCast(I, ISD::SIGN_EXTEND);
+ return selectCast(I, ISD::SIGN_EXTEND);
case Instruction::Trunc:
- return SelectCast(I, ISD::TRUNCATE);
+ return selectCast(I, ISD::TRUNCATE);
case Instruction::SIToFP:
- return SelectCast(I, ISD::SINT_TO_FP);
+ return selectCast(I, ISD::SINT_TO_FP);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
if (DstVT.bitsGT(SrcVT))
- return SelectCast(I, ISD::ZERO_EXTEND);
+ return selectCast(I, ISD::ZERO_EXTEND);
if (DstVT.bitsLT(SrcVT))
- return SelectCast(I, ISD::TRUNCATE);
+ return selectCast(I, ISD::TRUNCATE);
unsigned Reg = getRegForValue(I->getOperand(0));
- if (Reg == 0) return false;
- UpdateValueMap(I, Reg);
+ if (!Reg)
+ return false;
+ updateValueMap(I, Reg);
return true;
}
case Instruction::ExtractValue:
- return SelectExtractValue(I);
+ return selectExtractValue(I);
case Instruction::PHI:
llvm_unreachable("FastISel shouldn't visit PHI nodes!");
@@ -1207,83 +1574,72 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo)
- : FuncInfo(funcInfo),
- MF(funcInfo.MF),
- MRI(FuncInfo.MF->getRegInfo()),
- MFI(*FuncInfo.MF->getFrameInfo()),
- MCP(*FuncInfo.MF->getConstantPool()),
- TM(FuncInfo.MF->getTarget()),
- DL(*TM.getDataLayout()),
- TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()),
- TRI(*TM.getRegisterInfo()),
- LibInfo(libInfo) {
-}
+FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo,
+ bool SkipTargetIndependentISel)
+ : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()),
+ TII(*MF->getSubtarget().getInstrInfo()),
+ TLI(*MF->getSubtarget().getTargetLowering()),
+ TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
+ SkipTargetIndependentISel(SkipTargetIndependentISel) {}
FastISel::~FastISel() {}
-bool FastISel::FastLowerArguments() {
+bool FastISel::fastLowerArguments() { return false; }
+
+bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; }
+
+bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) {
return false;
}
-unsigned FastISel::FastEmit_(MVT, MVT,
- unsigned) {
- return 0;
-}
+unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; }
-unsigned FastISel::FastEmit_r(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/) {
+unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/) {
return 0;
}
-unsigned FastISel::FastEmit_rr(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
- unsigned /*Op1*/, bool /*Op1IsKill*/) {
+unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, unsigned /*Op1*/,
+ bool /*Op1IsKill*/) {
return 0;
}
-unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
return 0;
}
-unsigned FastISel::FastEmit_f(MVT, MVT,
- unsigned, const ConstantFP * /*FPImm*/) {
+unsigned FastISel::fastEmit_f(MVT, MVT, unsigned,
+ const ConstantFP * /*FPImm*/) {
return 0;
}
-unsigned FastISel::FastEmit_ri(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
- uint64_t /*Imm*/) {
+unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, uint64_t /*Imm*/) {
return 0;
}
-unsigned FastISel::FastEmit_rf(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
+unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/,
const ConstantFP * /*FPImm*/) {
return 0;
}
-unsigned FastISel::FastEmit_rri(MVT, MVT,
- unsigned,
- unsigned /*Op0*/, bool /*Op0IsKill*/,
- unsigned /*Op1*/, bool /*Op1IsKill*/,
- uint64_t /*Imm*/) {
+unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, unsigned /*Op1*/,
+ bool /*Op1IsKill*/, uint64_t /*Imm*/) {
return 0;
}
-/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
-/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// This method is a wrapper of fastEmit_ri. It first tries to emit an
+/// instruction with an immediate operand using fastEmit_ri.
/// If that fails, it materializes the immediate into a register and try
-/// FastEmit_rr instead.
-unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm, MVT ImmType) {
+/// fastEmit_rr instead.
+unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm, MVT ImmType) {
// If this is a multiply by a power of two, emit this as a shift left.
if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
Opcode = ISD::SHL;
@@ -1301,30 +1657,29 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
return 0;
// First check if immediate type is legal. If not, we can't use the ri form.
- unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
- if (ResultReg != 0)
+ unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg)
return ResultReg;
- unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
- if (MaterialReg == 0) {
+ unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ if (!MaterialReg) {
// This is a bit ugly/slow, but failing here means falling out of
// fast-isel, which would be very slow.
- IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
- VT.getSizeInBits());
+ IntegerType *ITy =
+ IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits());
MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
- assert (MaterialReg != 0 && "Unable to materialize imm.");
- if (MaterialReg == 0) return 0;
+ if (!MaterialReg)
+ return 0;
}
- return FastEmit_rr(VT, VT, Opcode,
- Op0, Op0IsKill,
- MaterialReg, /*Kill=*/true);
+ return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg,
+ /*IsKill=*/true);
}
-unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
return MRI.createVirtualRegister(RC);
}
-unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II,
- unsigned Op, unsigned OpNum) {
+unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
+ unsigned OpNum) {
if (TargetRegisterInfo::isVirtualRegister(Op)) {
const TargetRegisterClass *RegClass =
TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
@@ -1340,8 +1695,8 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II,
return Op;
}
-unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
- const TargetRegisterClass* RC) {
+unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC) {
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
@@ -1349,9 +1704,9 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill) {
+unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1359,10 +1714,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill));
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
@@ -1370,10 +1725,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill) {
+unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1382,23 +1737,23 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- unsigned Op2, bool Op2IsKill) {
+unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, unsigned Op2,
+ bool Op2IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1408,48 +1763,46 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addReg(Op2, Op2IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addReg(Op2, Op2IsKill * RegState::Kill);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm) {
+unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
- RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF);
- MRI.constrainRegClass(Op0, RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm1, uint64_t Imm2) {
+unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm1,
+ uint64_t Imm2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1457,24 +1810,23 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm1)
- .addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Imm1)
- .addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- const ConstantFP *FPImm) {
+unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, const ConstantFP *FPImm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1482,23 +1834,22 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addFPImm(FPImm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addFPImm(FPImm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addFPImm(FPImm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addFPImm(FPImm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm) {
+unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1507,25 +1858,25 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill,
- uint64_t Imm1, uint64_t Imm2) {
+ unsigned Op0, bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, uint64_t Imm1,
+ uint64_t Imm2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
unsigned ResultReg = createResultReg(RC);
@@ -1534,28 +1885,30 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm1).addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addReg(Op1, Op1IsKill * RegState::Kill)
- .addImm(Imm1).addImm(Imm2);
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm) {
+unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addImm(Imm);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1564,41 +1917,41 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm1, uint64_t Imm2) {
+unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, uint64_t Imm1,
+ uint64_t Imm2) {
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addImm(Imm1).addImm(Imm2);
+ .addImm(Imm1)
+ .addImm(Imm2);
else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1)
+ .addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
-unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
- unsigned Op0, bool Op0IsKill,
- uint32_t Idx) {
+unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
+ bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx);
return ResultReg;
}
-/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
-/// with all but the least significant bit set to zero.
-unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
- return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+/// Emit MachineInstrs to compute the value of Op with all but the least
+/// significant bit set to zero.
+unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
}
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
@@ -1607,22 +1960,24 @@ unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
/// nodes as input. We cannot just directly add them, because expansion
/// might result in multiple MBB's for one BB. As such, the start of the
/// BB might correspond to a different MBB than the end.
-bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TerminatorInst *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
- unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+ FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
// Check successor nodes' PHI nodes that expect a constant to be available
// from this block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
- if (!isa<PHINode>(SuccBB->begin())) continue;
+ if (!isa<PHINode>(SuccBB->begin()))
+ continue;
MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
- if (!SuccsHandled.insert(SuccMBB)) continue;
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
MachineBasicBlock::iterator MBBI = SuccMBB->begin();
@@ -1630,10 +1985,11 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
for (BasicBlock::const_iterator I = SuccBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ const auto *PN = dyn_cast<PHINode>(I); ++I) {
// Ignore dead phi's.
- if (PN->use_empty()) continue;
+ if (PN->use_empty())
+ continue;
// Only handle legal types. Two interesting things to note here. First,
// by bailing out early, we may leave behind some dead instructions,
@@ -1644,10 +2000,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
// Handle integer promotions, though, because they're common and easy.
- if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
- VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
- else {
- FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
}
}
@@ -1657,12 +2011,12 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Set the DebugLoc for the copy. Prefer the location of the operand
// if there is one; use the location of the PHI otherwise.
DbgLoc = PN->getDebugLoc();
- if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
unsigned Reg = getRegForValue(PHIOp);
- if (Reg == 0) {
- FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ if (!Reg) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
}
FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
@@ -1675,17 +2029,17 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
assert(LI->hasOneUse() &&
- "tryToFoldLoad expected a LoadInst with a single use");
+ "tryToFoldLoad expected a LoadInst with a single use");
// We know that the load has a single use, but don't know what it is. If it
// isn't one of the folded instructions, then we can't succeed here. Handle
// this by scanning the single-use users of the load until we get to FoldInst.
- unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
const Instruction *TheUser = LI->user_back();
- while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
// Stay in the right block.
TheUser->getParent() == FoldInst->getParent() &&
- --MaxUsers) { // Don't scan too far.
+ --MaxUsers) { // Don't scan too far.
// If there are multiple or no uses of this instruction, then bail out.
if (!TheUser->hasOneUse())
return false;
@@ -1707,7 +2061,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
// then there actually was no reference to it. Perhaps the load is referenced
// by a dead instruction.
unsigned LoadReg = getRegForValue(LI);
- if (LoadReg == 0)
+ if (!LoadReg)
return false;
// We can't fold if this vreg has no uses or more than one use. Multiple uses
@@ -1765,19 +2119,20 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
Flags = MachineMemOperand::MOStore;
Ptr = SI->getPointerOperand();
ValTy = SI->getValueOperand()->getType();
- } else {
+ } else
return nullptr;
- }
- bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
- bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
- const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
+ bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
+ bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
- if (Alignment == 0) // Ensure that codegen never sees alignment 0.
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlignment(ValTy);
- unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
+ unsigned Size = DL.getTypeStoreSize(ValTy);
if (IsVolatile)
Flags |= MachineMemOperand::MOVolatile;
@@ -1787,5 +2142,45 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
Flags |= MachineMemOperand::MOInvariant;
return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
- Alignment, TBAAInfo, Ranges);
+ Alignment, AAInfo, Ranges);
+}
+
+CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
+ // If both operands are the same, then try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (CI->getOperand(0) != CI->getOperand(1))
+ return Predicate;
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid predicate!");
+ case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
+
+ case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
+ }
+
+ return Predicate;
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index ae124e8..86b9542 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -36,6 +36,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -55,58 +56,71 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
return false;
}
+static ISD::NodeType getPreferredExtendForValue(const Value *V) {
+ // For the users of the source value being used for compare instruction, if
+ // the number of signed predicate is greater than unsigned predicate, we
+ // prefer to use SIGN_EXTEND.
+ //
+ // With this optimization, we would be able to reduce some redundant sign or
+ // zero extension instruction, and eventually more machine CSE opportunities
+ // can be exposed.
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ unsigned NumOfSigned = 0, NumOfUnsigned = 0;
+ for (const User *U : V->users()) {
+ if (const auto *CI = dyn_cast<CmpInst>(U)) {
+ NumOfSigned += CI->isSigned();
+ NumOfUnsigned += CI->isUnsigned();
+ }
+ }
+ if (NumOfSigned > NumOfUnsigned)
+ ExtendKind = ISD::SIGN_EXTEND;
+
+ return ExtendKind;
+}
+
void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
SelectionDAG *DAG) {
- const TargetLowering *TLI = TM.getTargetLowering();
-
Fn = &fn;
MF = &mf;
+ TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI);
CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
- Fn->isVarArg(),
- Outs, Fn->getContext());
+ Fn->isVarArg(), Outs, Fn->getContext());
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
Function::const_iterator BB = Fn->begin(), EB = Fn->end();
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- // Don't fold inalloca allocas or other dynamic allocas into the initial
- // stack frame allocation, even if they are in the entry block.
- if (!AI->isStaticAlloca())
- continue;
-
- if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
- Type *Ty = AI->getAllocatedType();
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
- unsigned Align =
- std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
- AI->getAlignment());
-
- TySize *= CUI->getZExtValue(); // Get total allocated size.
- if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
-
- StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
- }
- }
-
for (; BB != EB; ++BB)
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
- // Look for dynamic allocas.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- if (!AI->isStaticAlloca()) {
+ // Static allocas can be folded into the initial stack frame adjustment.
+ if (AI->isStaticAlloca()) {
+ const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
+ Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+ StaticAllocaMap[AI] =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+
+ } else {
unsigned Align = std::max(
(unsigned)TLI->getDataLayout()->getPrefTypeAlignment(
AI->getAllocatedType()),
AI->getAlignment());
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ MF->getSubtarget().getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
// Inform the Frame Information that we have variable-sized objects.
@@ -126,9 +140,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (Op.Type == InlineAsm::isClobber) {
// Clobbers don't have SDValue operands, hence SDValue().
TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
- std::pair<unsigned, const TargetRegisterClass*> PhysReg =
- TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
- Op.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
+ Op.ConstraintVT);
if (PhysReg.first == SP)
MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
}
@@ -136,6 +150,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
+ // Look for calls to the @llvm.va_start intrinsic. We can omit some
+ // prologue boilerplate for variadic functions that don't examine their
+ // arguments.
+ if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ MF->getFrameInfo()->setHasVAStart(true);
+ }
+
+ // If we have a musttail call in a variadic funciton, we need to ensure we
+ // forward implicit register parameters.
+ if (const auto *CI = dyn_cast<CallInst>(I)) {
+ if (CI->isMustTailCall() && Fn->isVarArg())
+ MF->getFrameInfo()->setHasMustTailInVarArgFunc(true);
+ }
+
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
@@ -166,13 +195,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
StaticAllocaMap.find(AI);
if (SI != StaticAllocaMap.end()) { // Check for VLAs.
int FI = SI->second;
- MMI.setVariableDbgInfo(DI->getVariable(),
+ MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
FI, DI->getDebugLoc());
}
}
}
}
}
+
+ // Decide the preferred extend type for a value.
+ PreferredExtendType[I] = getPreferredExtendForValue(I);
}
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
@@ -208,7 +240,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
for (unsigned i = 0; i != NumRegisters; ++i)
BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
PHIReg += NumRegisters;
@@ -241,12 +273,13 @@ void FunctionLoweringInfo::clear() {
ArgDbgValues.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
+ PreferredExtendType.clear();
}
/// CreateReg - Allocate a single virtual register for the given type.
unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
- return RegInfo->
- createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT));
+ return RegInfo->createVirtualRegister(
+ MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
}
/// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -257,7 +290,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
/// will assign registers for each member or element.
///
unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, Ty, ValueVTs);
@@ -306,8 +339,6 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
if (!Ty->isIntegerTy() || Ty->isVectorTy())
return;
- const TargetLowering *TLI = TM.getTargetLowering();
-
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, Ty, ValueVTs);
assert(ValueVTs.size() == 1 &&
@@ -452,7 +483,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
// Gather all the type infos for this landing pad and pass them along to
// MachineModuleInfo.
- std::vector<const GlobalVariable *> TyInfo;
+ std::vector<const GlobalValue *> TyInfo;
unsigned N = I.getNumArgOperands();
for (unsigned i = N - 1; i > 1; --i) {
@@ -510,14 +541,14 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
Value *Val = I.getClause(i - 1);
if (I.isCatch(i - 1)) {
MMI.addCatchTypeInfo(MBB,
- dyn_cast<GlobalVariable>(Val->stripPointerCasts()));
+ dyn_cast<GlobalValue>(Val->stripPointerCasts()));
} else {
// Add filters in a list.
Constant *CVal = cast<Constant>(Val);
- SmallVector<const GlobalVariable*, 4> FilterList;
+ SmallVector<const GlobalValue*, 4> FilterList;
for (User::op_iterator
II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
- FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts()));
+ FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
MMI.addFilterTypeInfo(MBB, FilterList);
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 7c124b8..a65f33e 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -27,7 +27,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "instr-emitter"
@@ -265,12 +265,16 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
MIB.addReg(VRBase, RegState::Define);
}
- SDValue Op(Node, i);
- if (IsClone)
- VRBaseMap.erase(Op);
- bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- (void)isNew; // Silence compiler warning.
- assert(isNew && "Node emitted out of order - early");
+ // If this def corresponds to a result of the SDNode insert the VRBase into
+ // the lookup map.
+ if (i < NumResults) {
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
}
}
@@ -402,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
- Align = TM->getDataLayout()->getPrefTypeAlignment(Type);
+ Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type);
if (Align == 0) {
// Alignment of vector types. FIXME!
- Align = TM->getDataLayout()->getTypeAllocSize(Type);
+ Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type);
}
}
@@ -643,14 +647,18 @@ MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
uint64_t Offset = SD->getOffset();
- MDNode* MDPtr = SD->getMDPtr();
+ MDNode *Var = SD->getVariable();
+ MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr);
+ .addFrameIndex(SD->getFrameIx())
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
@@ -696,7 +704,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
MIB.addReg(0U, RegState::Debug);
}
- MIB.addMetadata(MDPtr);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
return &*MIB;
}
@@ -859,9 +868,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
// Run post-isel target hook to adjust this instruction if needed.
-#ifdef NDEBUG
if (II.hasPostISelHook())
-#endif
TLI->AdjustInstrPostInstrSelection(MIB, Node);
}
@@ -1013,11 +1020,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/// at the given position in the given block.
InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
MachineBasicBlock::iterator insertpos)
- : MF(mbb->getParent()),
- MRI(&MF->getRegInfo()),
- TM(&MF->getTarget()),
- TII(TM->getInstrInfo()),
- TRI(TM->getRegisterInfo()),
- TLI(TM->getTargetLowering()),
- MBB(mbb), InsertPos(insertpos) {
-}
+ : MF(mbb->getParent()), MRI(&MF->getRegInfo()),
+ TII(MF->getSubtarget().getInstrInfo()),
+ TRI(MF->getSubtarget().getRegisterInfo()),
+ TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
+ InsertPos(insertpos) {}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 920dda8..7b86f7d 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef INSTREMITTER_H
-#define INSTREMITTER_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -29,7 +29,6 @@ class SDDbgValue;
class InstrEmitter {
MachineFunction *MF;
MachineRegisterInfo *MRI;
- const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const TargetLowering *TLI;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c0e8c8c..5d17a5f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,8 +34,11 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "legalizedag"
+
//===----------------------------------------------------------------------===//
/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
/// hacks on it until the target machine can handle it. This involves
@@ -48,16 +52,17 @@ using namespace llvm;
/// will attempt merge setcc and brc instructions into brcc's.
///
namespace {
-class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
+class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- /// LegalizePosition - The iterator for walking through the node list.
- SelectionDAG::allnodes_iterator LegalizePosition;
+ /// \brief The set of nodes which have already been legalized. We hold a
+ /// reference to it in order to update as necessary on node deletion.
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes;
- /// LegalizedNodes - The set of nodes which have already been legalized.
- SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ /// \brief A set of all the nodes updated during legalization.
+ SmallSetVector<SDNode *, 16> *UpdatedNodes;
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(*DAG.getContext(), VT);
@@ -66,14 +71,16 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
// Libcall insertion helpers.
public:
- explicit SelectionDAGLegalize(SelectionDAG &DAG);
+ SelectionDAGLegalize(SelectionDAG &DAG,
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes,
+ SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr)
+ : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
+ LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {}
- void LegalizeDAG();
-
-private:
- /// LegalizeOp - Legalizes the given operation.
+ /// \brief Legalizes the given operation.
void LegalizeOp(SDNode *Node);
+private:
SDValue OptimizeFloatStore(StoreSDNode *ST);
void LegalizeLoadOps(SDNode *Node);
@@ -145,37 +152,49 @@ private:
void ExpandNode(SDNode *Node);
void PromoteNode(SDNode *Node);
- void ForgetNode(SDNode *N) {
- LegalizedNodes.erase(N);
- if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
- ++LegalizePosition;
- }
-
public:
- // DAGUpdateListener implementation.
- void NodeDeleted(SDNode *N, SDNode *E) override {
- ForgetNode(N);
- }
- void NodeUpdated(SDNode *N) override {}
-
// Node replacement helpers
void ReplacedNode(SDNode *N) {
- if (N->use_empty()) {
- DAG.RemoveDeadNode(N);
- } else {
- ForgetNode(N);
- }
+ LegalizedNodes.erase(N);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(N);
}
void ReplaceNode(SDNode *Old, SDNode *New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ assert(Old->getNumValues() == New->getNumValues() &&
+ "Replacing one node with another that produces a different number "
+ "of values!");
DAG.ReplaceAllUsesWith(Old, New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i));
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New);
ReplacedNode(Old);
}
void ReplaceNode(SDValue Old, SDValue New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
DAG.ReplaceAllUsesWith(Old, New);
+ DAG.TransferDbgValues(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
ReplacedNode(Old.getNode());
}
void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
+
DAG.ReplaceAllUsesWith(Old, New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ DEBUG(dbgs() << (i == 0 ? " with: "
+ : " and: ");
+ New[i]->dump(&DAG));
+ DAG.TransferDbgValues(SDValue(Old, i), New[i]);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New[i].getNode());
+ }
ReplacedNode(Old);
}
};
@@ -213,40 +232,6 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
}
-SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
- : SelectionDAG::DAGUpdateListener(dag),
- TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
- DAG(dag) {
-}
-
-void SelectionDAGLegalize::LegalizeDAG() {
- DAG.AssignTopologicalOrder();
-
- // Visit all the nodes. We start in topological order, so that we see
- // nodes with their original operands intact. Legalization can produce
- // new nodes which may themselves need to be legalized. Iterate until all
- // nodes have been legalized.
- for (;;) {
- bool AnyLegalized = false;
- for (LegalizePosition = DAG.allnodes_end();
- LegalizePosition != DAG.allnodes_begin(); ) {
- --LegalizePosition;
-
- SDNode *N = LegalizePosition;
- if (LegalizedNodes.insert(N)) {
- AnyLegalized = true;
- LegalizeOp(N);
- }
- }
- if (!AnyLegalized)
- break;
-
- }
-
- // Remove dead nodes now.
- DAG.RemoveDeadNodes();
-}
-
/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
/// a load from the constant pool.
SDValue
@@ -270,7 +255,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
EVT OrigVT = VT;
EVT SVT = VT;
- while (SVT != MVT::f32) {
+ while (SVT != MVT::f32 && SVT != MVT::f16) {
SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
// Only do this if the target has a native EXTLOAD instruction from
@@ -291,7 +276,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
DAG.getEntryNode(),
CPIdx, MachinePointerInfo::getConstantPool(),
- VT, false, false, Alignment);
+ VT, false, false, false, Alignment);
return Result;
}
SDValue Result =
@@ -377,7 +362,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Load from the stack slot.
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
MachinePointerInfo(),
- MemVT, false, false, 0);
+ MemVT, false, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo()
@@ -385,7 +370,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
MemVT, ST->isVolatile(),
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset),
- ST->getTBAAInfo()));
+ ST->getAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
@@ -417,7 +402,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
- Alignment, ST->getTBAAInfo());
+ Alignment, ST->getAAInfo());
SDValue Result =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -476,7 +461,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(),
MinAlign(LD->getAlignment(), Offset),
- LD->getTBAAInfo());
+ LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
MachinePointerInfo(), false, false, 0));
@@ -494,8 +479,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->getPointerInfo().getWithOffset(Offset),
MemVT, LD->isVolatile(),
LD->isNonTemporal(),
+ LD->isInvariant(),
MinAlign(LD->getAlignment(), Offset),
- LD->getTBAAInfo());
+ LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
@@ -508,7 +494,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
- MachinePointerInfo(), LoadedVT, false, false, 0);
+ MachinePointerInfo(), LoadedVT, false,false, false,
+ 0);
// Callers expect a MERGE_VALUES node.
ValResult = Load;
@@ -538,25 +525,27 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isLittleEndian()) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(), Alignment,
+ LD->getAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
- LD->getTBAAInfo());
+ LD->isNonTemporal(),LD->isInvariant(),
+ MinAlign(Alignment, IncrementSize), LD->getAAInfo());
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), Alignment, LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(), Alignment,
+ LD->getAAInfo());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize),
- LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(),
+ MinAlign(Alignment, IncrementSize), LD->getAAInfo());
}
// aggregate the two parts
@@ -659,7 +648,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDLoc dl(ST);
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
@@ -668,7 +657,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
}
if (CFP->getValueType(0) == MVT::f64) {
@@ -677,7 +666,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
}
if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
@@ -690,13 +679,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (TLI.isBigEndian()) std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, Alignment, AAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U),
- TBAAInfo);
+ AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -714,7 +703,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
if (!ST->isTruncatingStore()) {
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -731,10 +720,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// If this is an unaligned store and the target doesn't support it,
// expand it.
unsigned AS = ST->getAddressSpace();
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
+ unsigned Align = ST->getAlignment();
+ if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
+ if (Align < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node),
DAG, TLI, this);
}
@@ -754,7 +744,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr,
ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, Alignment, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -777,7 +767,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
NVT, isVolatile, isNonTemporal, Alignment,
- TBAAInfo);
+ AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -799,7 +789,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
RoundVT,
isVolatile, isNonTemporal, Alignment,
- TBAAInfo);
+ AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -811,7 +801,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
@@ -821,7 +811,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
TLI.getShiftAmountTy(Value.getValueType())));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
RoundVT, isVolatile, isNonTemporal, Alignment,
- TBAAInfo);
+ AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -830,7 +820,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
}
// The order of the stores doesn't matter.
@@ -842,12 +832,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
+ if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
+ if (Align < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
break;
@@ -868,7 +859,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
SDValue Result =
DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -893,13 +884,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
unsigned AS = LD->getAddressSpace();
+ unsigned Align = LD->getAlignment();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) {
+ if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment =
TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
+ if (Align < ABIAlignment){
ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
}
}
@@ -928,6 +920,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
assert(RVal.getNode() != Node && "Load must be completely replaced");
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(RVal.getNode());
+ UpdatedNodes->insert(RChain.getNode());
+ }
ReplacedNode(Node);
}
return;
@@ -938,7 +934,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ bool isInvariant = LD->isInvariant();
+ AAMDNodes AAInfo = LD->getAAInfo();
if (SrcWidth != SrcVT.getStoreSizeInBits() &&
// Some targets pretend to have an i1 loading operation, and actually
@@ -965,7 +962,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
SDValue Result =
DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
Chain, Ptr, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ NVT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ AAInfo);
Ch = Result.getValue(1); // The chain.
@@ -1002,7 +1000,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, isInvariant, Alignment, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1010,8 +1008,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ ExtraVT, isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1031,7 +1029,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment, TBAAInfo);
+ isNonTemporal, isInvariant, Alignment, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1040,8 +1038,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ ExtraVT, isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1080,12 +1078,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// it, expand it.
EVT MemVT = LD->getMemoryVT();
unsigned AS = LD->getAddressSpace();
- if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) {
+ unsigned Align = LD->getAlignment();
+ if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
Type *Ty =
LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment =
TLI.getDataLayout()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
+ if (Align < ABIAlignment){
ExpandUnalignedLoad(cast<LoadSDNode>(Node),
DAG, TLI, Value, Chain);
}
@@ -1148,6 +1147,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
assert(Value.getNode() != Node && "Load must be completely replaced");
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Value.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
ReplacedNode(Node);
}
}
@@ -1155,6 +1158,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
+
if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
return;
@@ -1186,6 +1191,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action != TargetLowering::Promote)
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::FP_TO_FP16:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
@@ -1334,10 +1340,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
if (NewNode != Node) {
- DAG.ReplaceAllUsesWith(Node, NewNode);
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
- ReplacedNode(Node);
+ ReplaceNode(Node, NewNode);
Node = NewNode;
}
switch (Action) {
@@ -1348,19 +1351,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
// a complete mess.
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Res.getNode()) {
- SmallVector<SDValue, 8> ResultVals;
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
- if (e == 1)
- ResultVals.push_back(Res);
- else
- ResultVals.push_back(Res.getValue(i));
- }
- if (Res.getNode() != Node || Res.getResNo() != 0) {
- DAG.ReplaceAllUsesWith(Node, ResultVals.data());
- for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
- ReplacedNode(Node);
+ if (!(Res.getNode() != Node || Res.getResNo() != 0))
+ return;
+
+ if (Node->getNumValues() == 1) {
+ // We can just directly replace this node with the lowered value.
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
}
+
+ SmallVector<SDValue, 8> ResultVals;
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Res.getValue(i));
+ ReplaceNode(Node, ResultVals.data());
return;
}
}
@@ -1448,7 +1451,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
Vec.getValueType().getVectorElementType(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
@@ -1483,7 +1486,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
StackPtr);
// Store the subvector.
- Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ Ch = DAG.getStore(Ch, dl, Part, SubStackPtr,
MachinePointerInfo(), false, false, 0);
// Finally, load the updated vector.
@@ -1623,7 +1626,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
@@ -1797,7 +1801,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
- PtrInfo, SlotVT, false, false, DestAlign);
+ PtrInfo, SlotVT, false, false, false, DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1877,7 +1881,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
ShuffleVec.data());
else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
return false;
- NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices));
+ NewIntermedVals.push_back(
+ std::make_pair(Shuffle, std::move(FinalIndices)));
}
// If we had an odd number of defined values, then append the last
@@ -2580,7 +2585,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, Alignment);
+ MVT::f32, false, false, false, Alignment);
HandleSDNode Handle(Load);
LegalizeOp(Load.getNode());
FudgeInReg = Handle.getValue();
@@ -2782,7 +2787,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// x = x | (x >>32); // for 64-bit input
// return popcount(~x);
//
- // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+ // Ref: "Hacker's Delight" by Henry Warren
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT);
unsigned len = VT.getSizeInBits();
@@ -2801,7 +2806,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
- // see also http://www.hackersdelight.org/HDcode/ntz.cc
+ // Ref: "Hacker's Delight" by Henry Warren
EVT VT = Op.getValueType();
SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNOT(dl, Op, VT),
@@ -3153,65 +3158,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0), Node->getValueType(0), dl);
Results.push_back(Tmp1);
break;
- case ISD::FP_TO_SINT: {
- EVT VT = Node->getOperand(0).getValueType();
- EVT NVT = Node->getValueType(0);
-
- // FIXME: Only f32 to i64 conversions are supported.
- if (VT != MVT::f32 || NVT != MVT::i64)
- break;
-
- // Expand f32 -> i64 conversion
- // This algorithm comes from compiler-rt's implementation of fixsfdi:
- // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits());
- SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
- SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
- SDValue Bias = DAG.getConstant(127, IntVT);
- SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
- IntVT);
- SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
- SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
-
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
-
- SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
- DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT)));
- SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
-
- SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
- DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT)));
- Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
-
- SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
- DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
- DAG.getConstant(0x00800000, IntVT));
-
- R = DAG.getZExtOrTrunc(R, dl, NVT);
-
-
- R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
- DAG.getNode(ISD::SHL, dl, NVT, R,
- DAG.getZExtOrTrunc(
- DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
- dl, TLI.getShiftAmountTy(IntVT))),
- DAG.getNode(ISD::SRL, dl, NVT, R,
- DAG.getZExtOrTrunc(
- DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
- dl, TLI.getShiftAmountTy(IntVT))),
- ISD::SETGT);
-
- SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
- DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
- Sign);
-
- Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
- DAG.getConstant(0, NVT), Ret, ISD::SETLT));
+ case ISD::FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
break;
- }
case ISD::FP_TO_UINT: {
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
@@ -3450,6 +3400,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
+ case ISD::FMINNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+ break;
+ case ISD::FMAXNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+ break;
case ISD::FSQRT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
@@ -3568,12 +3528,38 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
- case ISD::FP16_TO_FP32:
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
break;
- case ISD::FP32_TO_FP16:
- Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+ case ISD::FP16_TO_FP: {
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ break;
+ }
+
+ // We can extend to types bigger than f32 in two steps without changing the
+ // result. Since "f16 -> f32" is much more commonly available, give CodeGen
+ // the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
break;
+ }
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
case ISD::ConstantFP: {
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
@@ -3584,12 +3570,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FSUB: {
EVT VT = Node->getValueType(0);
- assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
- TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
- "Don't know how to expand this FP subtraction!");
- Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
- Results.push_back(Tmp1);
+ if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ } else {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ }
break;
}
case ISD::SUB: {
@@ -3844,9 +3834,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
DAG.getIntPtrConstant(1));
// Ret is a node with an illegal type. Because such things are not
- // generally permitted during this phase of legalization, delete the
- // node. The above EXTRACT_ELEMENT nodes should have been folded.
- DAG.DeleteNode(Ret.getNode());
+ // generally permitted during this phase of legalization, make sure the
+ // node has no more uses. The above EXTRACT_ELEMENT nodes should have been
+ // folded.
+ assert(Ret->use_empty() &&
+ "Unexpected uses of illegally type from expanded lib call.");
}
if (isSigned) {
@@ -3907,7 +3899,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
MachinePointerInfo::getJumpTable(), MemVT,
- false, false, 0);
+ false, false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
// For PIC, the sequence is:
@@ -4217,6 +4209,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// use the new one.
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Tmp2.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
ReplacedNode(Node);
break;
}
@@ -4293,6 +4289,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp1, Tmp2, Node->getOperand(2)));
break;
}
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
case ISD::FPOW: {
@@ -4323,7 +4322,55 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// SelectionDAG::Legalize - This is the entry point for the file.
//
void SelectionDAG::Legalize() {
- /// run - This is the main entry point to this class.
- ///
- SelectionDAGLegalize(*this).LegalizeDAG();
+ AssignTopologicalOrder();
+
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes);
+
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (auto NI = allnodes_end(); NI != allnodes_begin();) {
+ --NI;
+
+ SDNode *N = NI;
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ continue;
+ }
+
+ if (LegalizedNodes.insert(N).second) {
+ AnyLegalized = true;
+ Legalizer.LegalizeOp(N);
+
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ }
+ }
+ }
+ if (!AnyLegalized)
+ break;
+
+ }
+
+ // Remove dead nodes now.
+ RemoveDeadNodes();
+}
+
+bool SelectionDAG::LegalizeOp(SDNode *N,
+ SmallSetVector<SDNode *, 16> &UpdatedNodes) {
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes);
+
+ // Directly insert the node in question, and legalize it. This will recurse
+ // as needed through operands.
+ LegalizedNodes.insert(N);
+ Legalizer.LegalizeOp(N);
+
+ return LegalizedNodes.count(N);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6b8fec6..4591e79 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -68,6 +68,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
+ case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
@@ -85,7 +87,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
- case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+ case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
@@ -153,6 +155,32 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32,
+ RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80,
+ RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ NVT, Ops, 2, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32,
+ RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80,
+ RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ NVT, Ops, 2, false, SDLoc(N)).first;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
@@ -373,23 +401,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
+
+ // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+ // entirely possible for both f16 and f32 to be legal, so use the fully
+ // hard-float FP_EXTEND rather than FP16_TO_FP.
+ if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
+ if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
+ SoftenFloatResult(Op.getNode(), 0);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
+ Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
// nodes?
-SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
+ EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
- SDLoc(N)).first;
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
+ false, SDLoc(N)).first;
+ if (N->getValueType(0) == MVT::f32)
+ return Res32;
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f16) {
+ // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
+ // storage-only type get a chance to select things.
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
@@ -498,6 +551,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (N->getValueType(0) == MVT::f16)
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
+
SDValue Op = GetSoftenedFloat(N->getOperand(0));
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
@@ -520,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
L->getPointerInfo(), NVT, L->isVolatile(),
L->isNonTemporal(), false, L->getAlignment(),
- L->getTBAAInfo());
+ L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -533,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
L->isNonTemporal(), false, L->getAlignment(),
- L->getTBAAInfo());
+ L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -625,10 +681,11 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
+ case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
- case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
@@ -654,11 +711,32 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
+ // If we get here, the result must be legal but the source illegal.
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (SVT == MVT::f16)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
+
+ return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+}
+
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ // We actually deal with the partially-softened FP_TO_FP16 node too, which
+ // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
+ assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16);
+
EVT SVT = N->getOperand(0).getValueType();
EVT RVT = N->getValueType(0);
+ EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
- RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -704,13 +782,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
- EVT RVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -813,6 +884,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+ case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
@@ -876,6 +949,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
ISD::SETEQ);
}
+void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index cffb0a1..b73bb0a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -99,7 +99,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
- case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+ case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
case ISD::AND:
case ISD::OR:
@@ -225,10 +225,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
N->getFailureOrdering(), N->getSynchScope());
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- unsigned ChainOp = N->getNumValues() - 1;
- ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp));
+ // Update the use to N with the newly created Res.
+ for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
+ ReplaceValueWith(SDValue(N, i), Res.getValue(i));
return Res;
}
@@ -402,7 +401,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
DAG.getValueType(N->getValueType(0).getScalarType()));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -827,7 +826,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
- case ISD::FP16_TO_FP32:
+ case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
@@ -863,7 +862,26 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
switch (CCCode) {
default: llvm_unreachable("Unknown integer comparison!");
case ISD::SETEQ:
- case ISD::SETNE:
+ case ISD::SETNE: {
+ SDValue OpL = GetPromotedInteger(NewLHS);
+ SDValue OpR = GetPromotedInteger(NewRHS);
+
+ // We would prefer to promote the comparison operand with sign extension,
+ // if we find the operand is actually to truncate an AssertSext. With this
+ // optimization, we can avoid inserting real truncate instruction, which
+ // is redudant eventually.
+ if (OpL->getOpcode() == ISD::AssertSext &&
+ cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
+ OpR->getOpcode() == ISD::AssertSext &&
+ cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
+ NewLHS = OpL;
+ NewRHS = OpR;
+ } else {
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ }
+ break;
+ }
case ISD::SETUGE:
case ISD::SETUGT:
case ISD::SETULE:
@@ -947,7 +965,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
EVT VecVT = N->getValueType(0);
unsigned NumElts = VecVT.getVectorNumElements();
assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
- "Legal vector of one illegal element?");
+ "Legal vector of one illegal element?");
// Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be
@@ -1861,7 +1879,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
bool isInvariant = N->isInvariant();
- const MDNode *TBAAInfo = N->getTBAAInfo();
+ AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1870,7 +1888,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
- MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ MemVT, isVolatile, isNonTemporal, isInvariant,
+ Alignment, AAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -1893,7 +1912,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Little-endian - low bits are at low addresses.
Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo);
+ AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1905,8 +1924,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1924,7 +1943,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant, Alignment,
+ AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1933,8 +1953,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant,
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2711,7 +2731,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
- const MDNode *TBAAInfo = N->getTBAAInfo();
+ AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
SDValue Lo, Hi;
@@ -2721,7 +2741,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
- Alignment, TBAAInfo);
+ Alignment, AAInfo);
}
if (TLI.isLittleEndian()) {
@@ -2729,7 +2749,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
GetExpandedInteger(N->getValue(), Lo, Hi);
Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2742,7 +2762,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2770,7 +2790,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Store both the high bits and maybe some of the low bits.
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
- HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ HiVT, isVolatile, isNonTemporal, Alignment, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -2780,7 +2800,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -2855,7 +2875,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
FudgePtr,
MachinePointerInfo::getConstantPool(),
MVT::f32,
- false, false, Alignment);
+ false, false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d0ca6f8..30f412b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SELECTIONDAG_LEGALIZETYPES_H
-#define SELECTIONDAG_LEGALIZETYPES_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -122,8 +122,8 @@ public:
explicit DAGTypeLegalizer(SelectionDAG &dag)
: TLI(dag.getTargetLoweringInfo()), DAG(dag),
ValueTypeActions(TLI.getValueTypeActions()) {
- assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
- "Too many value types for ValueTypeActions to hold!");
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
}
/// run - This is the main entry point for the type legalizer. This does a
@@ -237,7 +237,7 @@ private:
SDValue PromoteIntRes_CTTZ(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
- SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
@@ -387,6 +387,8 @@ private:
SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SoftenFloatRes_FABS(SDNode *N);
+ SDValue SoftenFloatRes_FMINNUM(SDNode *N);
+ SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
@@ -403,7 +405,7 @@ private:
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
- SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
@@ -425,10 +427,10 @@ private:
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
- SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -450,6 +452,8 @@ private:
void ExpandFloatResult(SDNode *N, unsigned ResNo);
void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 7e2f7b6..38829b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -256,13 +256,13 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ AAMDNodes AAInfo = LD->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo);
+ AAInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -271,7 +271,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -470,7 +470,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
- const MDNode *TBAAInfo = St->getTBAAInfo();
+ AAMDNodes AAInfo = St->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -482,14 +482,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), TBAAInfo);
+ MinAlign(Alignment, IncrementSize), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 507e7ff..b5af7b7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -199,12 +199,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
- if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
- if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
+ switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getMemoryVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
return TranslateLegalizeResults(Op, Result);
- Changed = true;
- return LegalizeOp(ExpandLoad(Op));
- }
+ case TargetLowering::Custom:
+ if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+ Changed = true;
+ if (Lowered->getNumValues() != Op->getNumValues()) {
+ // This expanded to something other than the load. Assume the
+ // lowering code took care of any chain values, and just handle the
+ // returned value.
+ assert(Result.getValue(1).use_empty() &&
+ "There are still live users of the old chain!");
+ return LegalizeOp(Lowered);
+ } else {
+ return TranslateLegalizeResults(Op, Lowered);
+ }
+ }
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
} else if (Op.getOpcode() == ISD::STORE) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
EVT StVT = ST->getMemoryVT();
@@ -273,6 +290,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_TO_UINT:
case ISD::FNEG:
case ISD::FABS:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
case ISD::FCOPYSIGN:
case ISD::FSQRT:
case ISD::FSIN:
@@ -353,9 +372,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
}
- // The rest of the time, vector "promotion" is basically just bitcasting and
- // doing the operation in a different type. For example, x86 promotes
- // ISD::AND on v2i32 to v1i64.
+ // There are currently two cases of vector promotion:
+ // 1) Bitcasting a vector of integers to a different type to a vector of the
+ // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64.
+ // 2) Extending a vector of floats to a vector of the same number oflarger
+ // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
MVT VT = Op.getSimpleValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
@@ -365,14 +386,23 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ if (Op.getOperand(j)
+ .getValueType()
+ .getVectorElementType()
+ .isFloatingPoint())
+ Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
-
- return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ if (VT.isFloatingPoint() ||
+ (VT.isVector() && VT.getVectorElementType().isFloatingPoint()))
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
+ else
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
@@ -480,7 +510,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LD->getPointerInfo().getWithOffset(Offset),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment(),
- LD->getTBAAInfo());
+ LD->getAAInfo());
} else {
EVT LoadVT = WideVT;
while (RemainingBytes < LoadBytes) {
@@ -490,8 +520,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Offset),
LoadVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment(),
- LD->getTBAAInfo());
+ LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment(), LD->getAAInfo());
}
RemainingBytes -= LoadBytes;
@@ -561,8 +591,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Op.getNode()->getValueType(0).getScalarType(),
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment(), LD->getTBAAInfo());
+ LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
+ LD->getAlignment(), LD->getAAInfo());
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
@@ -593,7 +623,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
unsigned NumElem = StVT.getVectorNumElements();
// The type of the data we want to save
@@ -621,7 +651,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
DAG.getConstant(Stride, BasePTR.getValueType()));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 71240fc..27f63d2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -106,6 +106,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOPYSIGN:
case ISD::FDIV:
case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+
case ISD::FPOW:
case ISD::FREM:
case ISD::FSUB:
@@ -223,7 +226,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->isInvariant(), N->getOriginalAlignment(),
- N->getTBAAInfo());
+ N->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -234,7 +237,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
// Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
EVT DestVT = N->getValueType(0).getVectorElementType();
- SDValue Op = GetScalarizedVector(N->getOperand(0));
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ // This is a workaround for targets where it's impossible to scalarize the
+ // result of a conversion, because the source type is legal.
+ // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
+ // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
+ // legal and was not scalarized.
+ // See the similar logic in ScalarizeVecRes_VSETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ }
return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
}
@@ -408,6 +427,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::CONCAT_VECTORS:
@@ -451,11 +474,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
N->getValueType(0), Elt);
}
-/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
-/// to be scalarized, it must be <1 x ty>. Extend the element instead.
+/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Do the operation on the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
- "Unexected vector type!");
+ "Unexpected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
N->getValueType(0).getScalarType(), Elt);
@@ -509,12 +532,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment(), N->getTBAAInfo());
+ N->getOriginalAlignment(), N->getAAInfo());
}
/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs
@@ -627,6 +650,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOPYSIGN:
case ISD::FSUB:
case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::FDIV:
@@ -868,6 +893,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
return;
}
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return;
+
// Spill the vector to the stack.
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
@@ -923,14 +952,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ AAMDNodes AAInfo = LD->getAAInfo();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- isInvariant, Alignment, TBAAInfo);
+ isInvariant, Alignment, AAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -938,7 +967,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize),
HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo);
+ AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1349,6 +1378,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
Idx.getValueType())), 0);
}
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return SDValue();
+
// Store the vector to the stack.
EVT EltVT = VecVT.getVectorElementType();
SDLoc dl(N);
@@ -1359,7 +1392,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo(), EltVT, false, false, 0);
+ MachinePointerInfo(), EltVT, false, false, false, 0);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1374,7 +1407,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
bool isNT = N->isNonTemporal();
- const MDNode *TBAAInfo = N->getTBAAInfo();
+ AAMDNodes AAInfo = N->getAAInfo();
SDValue Lo, Hi;
GetSplitVector(N->getOperand(1), Lo, Hi);
@@ -1385,10 +1418,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (isTruncating)
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- LoMemVT, isVol, isNT, Alignment, TBAAInfo);
+ LoMemVT, isVol, isNT, Alignment, AAInfo);
else
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- isVol, isNT, Alignment, TBAAInfo);
+ isVol, isNT, Alignment, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -1397,11 +1430,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVol, isNT, Alignment, TBAAInfo);
+ HiMemVT, isVol, isNT, Alignment, AAInfo);
else
Hi = DAG.getStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- isVol, isNT, Alignment, TBAAInfo);
+ isVol, isNT, Alignment, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
@@ -1575,6 +1608,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::OR:
case ISD::SUB:
case ISD::XOR:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
Res = WidenVecRes_Binary(N);
break;
@@ -2737,7 +2772,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
bool isInvariant = LD->isInvariant();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ AAMDNodes AAInfo = LD->getAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2748,7 +2783,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
isVolatile, isNonTemporal, isInvariant, Align,
- TBAAInfo);
+ AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction
@@ -2793,7 +2828,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
isNonTemporal, isInvariant, MinAlign(Align, Increment),
- TBAAInfo);
+ AAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -2809,7 +2844,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset), isVolatile,
isNonTemporal, isInvariant, MinAlign(Align, Increment),
- TBAAInfo);
+ AAInfo);
LdChain.push_back(L.getValue(1));
}
@@ -2889,7 +2924,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const MDNode *TBAAInfo = LD->getTBAAInfo();
+ bool isInvariant = LD->isInvariant();
+ AAMDNodes AAInfo = LD->getAAInfo();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2901,7 +2937,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
LD->getPointerInfo(),
- LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo);
+ LdEltVT, isVolatile, isNonTemporal, isInvariant,
+ Align, AAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2911,7 +2948,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
BasePtr.getValueType()));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- isVolatile, isNonTemporal, Align, TBAAInfo);
+ isVolatile, isNonTemporal, isInvariant, Align,
+ AAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2934,7 +2972,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -2961,7 +2999,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset), TBAAInfo));
+ MinAlign(Align, Offset), AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
@@ -2981,7 +3019,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo().getWithOffset(Offset),
isVolatile, isNonTemporal,
- MinAlign(Align, Offset), TBAAInfo));
+ MinAlign(Align, Offset), AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -3003,7 +3041,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- const MDNode *TBAAInfo = ST->getTBAAInfo();
+ AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -3027,7 +3065,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
ST->getPointerInfo(), StEltVT,
isVolatile, isNonTemporal, Align,
- TBAAInfo));
+ AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
@@ -3038,7 +3076,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
ST->getPointerInfo().getWithOffset(Offset),
StEltVT, isVolatile, isNonTemporal,
- MinAlign(Align, Offset), TBAAInfo));
+ MinAlign(Align, Offset), AAInfo));
}
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 624003f..db38b76 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -40,32 +41,29 @@ static cl::opt<signed> RegPressureThreshold(
"dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
cl::desc("Track reg pressure and switch priority to in-depth"));
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
+ : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TLI = IS->TLI;
+ TII = STI.getInstrInfo();
+ ResourcesModel = TII->CreateTargetScheduleState(STI);
+ // This hard requirement could be relaxed, but for now
+ // do not let it procede.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end();
+ I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
-ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
- Picker(this),
- InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData())
-{
- TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo();
- TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo();
- TLI = IS->getTargetLowering();
-
- const TargetMachine &tm = (*IS->MF).getTarget();
- ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr);
- // This hard requirement could be relaxed, but for now
- // do not let it procede.
- assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
-
- unsigned NumRC = TRI->getNumRegClasses();
- RegLimit.resize(NumRC);
- RegPressure.resize(NumRC);
- std::fill(RegLimit.begin(), RegLimit.end(), 0);
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
-
- ParallelLiveRanges = 0;
- HorizontalVerticalBalance = 0;
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
}
unsigned
@@ -319,7 +317,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) {
// If packet is now full, reset the state so in the next cycle
// we start fresh.
- if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
+ if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
ResourcesModel->clearResources();
Packet.clear();
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ee54292..bce69d7 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
-#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugLoc.h"
@@ -44,7 +44,8 @@ private:
const Value *Const; // valid for constants
unsigned FrameIx; // valid for stack objects
} u;
- MDNode *mdPtr;
+ MDNode *Var;
+ MDNode *Expr;
bool IsIndirect;
uint64_t Offset;
DebugLoc DL;
@@ -52,69 +53,72 @@ private:
bool Invalid;
public:
// Constructor for non-constants.
- SDDbgValue(MDNode *mdP, SDNode *N, unsigned R,
- bool indir, uint64_t off, DebugLoc dl,
- unsigned O) : mdPtr(mdP), IsIndirect(indir),
- Offset(off), DL(dl), Order(O),
- Invalid(false) {
+ SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
+ uint64_t off, DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = SDNODE;
u.s.Node = N;
u.s.ResNo = R;
}
// Constructor for constants.
- SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
- unsigned O) :
- mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O),
- Invalid(false) {
+ SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
+ DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = CONST;
u.Const = C;
}
// Constructor for frame indices.
- SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) :
- mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O),
- Invalid(false) {
+ SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
+ unsigned O)
+ : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = FRAMEIX;
u.FrameIx = FI;
}
// Returns the kind.
- DbgValueKind getKind() { return kind; }
+ DbgValueKind getKind() const { return kind; }
- // Returns the MDNode pointer.
- MDNode *getMDPtr() { return mdPtr; }
+ // Returns the MDNode pointer for the variable.
+ MDNode *getVariable() const { return Var; }
+
+ // Returns the MDNode pointer for the expression.
+ MDNode *getExpression() const { return Expr; }
// Returns the SDNode* for a register ref
- SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+ SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
// Returns the ResNo for a register ref
- unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+ unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
// Returns the Value* for a constant
- const Value *getConst() { assert (kind==CONST); return u.Const; }
+ const Value *getConst() const { assert (kind==CONST); return u.Const; }
// Returns the FrameIx for a stack object
- unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+ unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
// Returns whether this is an indirect value.
- bool isIndirect() { return IsIndirect; }
+ bool isIndirect() const { return IsIndirect; }
// Returns the offset.
- uint64_t getOffset() { return Offset; }
+ uint64_t getOffset() const { return Offset; }
// Returns the DebugLoc.
- DebugLoc getDebugLoc() { return DL; }
+ DebugLoc getDebugLoc() const { return DL; }
// Returns the SDNodeOrder. This is the order of the preceding node in the
// input.
- unsigned getOrder() { return Order; }
+ unsigned getOrder() const { return Order; }
// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
// property. A SDDbgValue is invalid if the SDNode that produces the value is
// deleted.
void setIsInvalidated() { Invalid = true; }
- bool isInvalidated() { return Invalid; }
+ bool isInvalidated() const { return Invalid; }
};
} // end llvm namespace
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 4d8c2c7..61a3fd7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -221,7 +221,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
+ MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue)
return nullptr;
else if (VT == MVT::Other)
@@ -229,7 +229,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
- EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
if (VT == MVT::Glue)
return nullptr;
}
@@ -431,17 +431,23 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
/// definition of the specified node.
/// FIXME: Move to SelectionDAG?
-static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
- unsigned NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
- break;
- ++NumRes;
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
}
- return N->getValueType(NumRes);
+ return N->getSimpleValueType(NumRes);
}
/// CheckForLiveRegDef - Return true and update live register vector if the
@@ -454,7 +460,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
bool Added = false;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
- if (RegAdded.insert(*AI)) {
+ if (RegAdded.insert(*AI).second) {
LRegs.push_back(*AI);
Added = true;
}
@@ -572,7 +578,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 13cfae7..8b54e656 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -30,8 +30,8 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -166,12 +166,11 @@ public:
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
Topo(SUnits, nullptr) {
- const TargetMachine &tm = mf.getTarget();
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
else
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
- tm.getSubtargetImpl(), this);
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
}
~ScheduleDAGRRList() {
@@ -946,7 +945,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
+ MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue)
return nullptr;
else if (VT == MVT::Other)
@@ -954,7 +953,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
- EVT VT = Op.getNode()->getValueType(Op.getResNo());
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
if (VT == MVT::Glue)
return nullptr;
}
@@ -1189,17 +1188,23 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
/// definition of the specified node.
/// FIXME: Move to SelectionDAG?
-static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
- unsigned NumRes = MCID.getNumDefs();
- for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
- if (Reg == *ImpDef)
- break;
- ++NumRes;
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
}
- return N->getValueType(NumRes);
+ return N->getSimpleValueType(NumRes);
}
/// CheckForLiveRegDef - Return true and update live register vector if the
@@ -1218,7 +1223,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
if (LiveRegDefs[*AliasI] == SU) continue;
// Add Reg to the set of interfering live regs.
- if (RegAdded.insert(*AliasI)) {
+ if (RegAdded.insert(*AliasI).second) {
LRegs.push_back(*AliasI);
}
}
@@ -1235,7 +1240,7 @@ static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
if (!LiveRegDefs[i]) continue;
if (LiveRegDefs[i] == SU) continue;
if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
- if (RegAdded.insert(i))
+ if (RegAdded.insert(i).second)
LRegs.push_back(i);
}
}
@@ -1310,7 +1315,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
SDNode *Gen = LiveRegGens[CallResource]->getNode();
while (SDNode *Glued = Gen->getGluedNode())
Gen = Glued;
- if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+ if (!IsChainDependent(Gen, Node, 0, TII) &&
+ RegAdded.insert(CallResource).second)
LRegs.push_back(CallResource);
}
}
@@ -1373,7 +1379,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
Interferences.push_back(CurSU);
}
else {
- assert(CurSU->isPending && "Intereferences are pending");
+ assert(CurSU->isPending && "Interferences are pending");
// Update the interference with current live regs.
LRegsPair.first->second = LRegs;
}
@@ -1439,7 +1445,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
@@ -1930,8 +1936,8 @@ void RegReductionPQBase::dumpRegPressure() const {
unsigned Id = RC->getID();
unsigned RP = RegPressure[Id];
if (!RP) continue;
- DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
- << '\n');
+ DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
+ << RegLimit[Id] << '\n');
}
#endif
}
@@ -2754,7 +2760,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
if (!SUImpDefs && !SURegMask)
continue;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
+ MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
@@ -2977,9 +2983,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
llvm::ScheduleDAGSDNodes *
llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
BURegReductionPriorityQueue *PQ =
new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
@@ -2991,9 +2997,9 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
llvm::ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ =
new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
@@ -3005,10 +3011,10 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
llvm::ScheduleDAGSDNodes *
llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- const TargetLowering *TLI = IS->getTargetLowering();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
HybridBURRPriorityQueue *PQ =
new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
@@ -3021,10 +3027,10 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
llvm::ScheduleDAGSDNodes *
llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetMachine &TM = IS->TM;
- const TargetInstrInfo *TII = TM.getInstrInfo();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- const TargetLowering *TLI = IS->getTargetLowering();
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
ILPBURRPriorityQueue *PQ =
new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index de910b7..8b9f618 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -29,7 +29,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -38,17 +37,17 @@ using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
-// This allows latency based scheduler to notice high latency instructions
-// without a target itinerary. The choise if number here has more to do with
-// balancing scheduler heursitics than with the actual machine latency.
+// This allows the latency-based scheduler to notice high latency instructions
+// without a target itinerary. The choice of number here has more to do with
+// balancing scheduler heuristics than with the actual machine latency.
static cl::opt<int> HighLatencyCycles(
"sched-high-latency-cycles", cl::Hidden, cl::init(10),
cl::desc("Roughly estimate the number of cycles that 'long latency'"
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
- InstrItins(mf.getTarget().getInstrItineraryData()) {}
+ : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
+ InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
@@ -120,15 +119,20 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
return;
unsigned ResNo = User->getOperand(2).getResNo();
- if (Def->isMachineOpcode()) {
+ if (Def->getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
+ PhysReg = Reg;
+ } else if (Def->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
if (ResNo >= II.getNumDefs() &&
- II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
PhysReg = Reg;
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
- Cost = RC->getCopyCost();
- }
+ }
+
+ if (PhysReg != 0) {
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
+ Cost = RC->getCopyCost();
}
}
@@ -136,7 +140,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
SmallVectorImpl<EVT> &VTs,
SDValue ExtraOper = SDValue()) {
- SmallVector<SDValue, 4> Ops;
+ SmallVector<SDValue, 8> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
@@ -226,7 +230,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
I != E && UseCount < 100; ++I, ++UseCount) {
SDNode *User = *I;
- if (User == Node || !Visited.insert(User))
+ if (User == Node || !Visited.insert(User).second)
continue;
int64_t Offset1, Offset2;
if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
@@ -339,7 +343,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Add all operands to the worklist unless they've already been added.
for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
- if (Visited.insert(NI->getOperand(i).getNode()))
+ if (Visited.insert(NI->getOperand(i).getNode()).second)
Worklist.push_back(NI->getOperand(i).getNode());
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
@@ -425,7 +429,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
}
void ScheduleDAGSDNodes::AddSchedEdges() {
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = forceUnitLatencies();
@@ -733,7 +737,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
SmallSet<unsigned, 8> &Seen) {
unsigned Order = N->getIROrder();
- if (!Order || !Seen.insert(Order)) {
+ if (!Order || !Seen.insert(Order).second) {
// Process any valid SDDbgValues even if node does not have any order
// assigned.
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 39ebadf..2cd1f4b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SCHEDULEDAGSDNODES_H
-#define SCHEDULEDAGSDNODES_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAG.h"
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 4589b0c..418b58e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -71,10 +72,8 @@ public:
AliasAnalysis *aa,
SchedulingPriorityQueue *availqueue)
: ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
-
- const TargetMachine &tm = mf.getTarget();
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
- tm.getSubtargetImpl(), this);
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
}
~ScheduleDAGVLIW() {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index daff1f2..7961e66 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -46,6 +46,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cmath>
@@ -95,7 +96,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
/// BUILD_VECTOR where all of the elements are ~0 or undef.
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BITCAST)
+ while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -143,7 +144,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BITCAST)
+ while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -686,6 +687,15 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
DeallocateNode(N);
}
+void SDDbgInfo::erase(const SDNode *Node) {
+ DbgValMapType::iterator I = DbgValMap.find(Node);
+ if (I == DbgValMap.end())
+ return;
+ for (auto &Val: I->second)
+ Val->setIsInvalidated();
+ DbgValMap.erase(I);
+}
+
void SelectionDAG::DeallocateNode(SDNode *N) {
if (N->OperandsNeedDelete)
delete[] N->OperandList;
@@ -696,10 +706,60 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
NodeAllocator.Deallocate(AllNodes.remove(N));
- // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
- ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
- for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
- DbgVals[i]->setIsInvalidated();
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate
+ // them and forget about that node.
+ DbgInfo->erase(N);
+}
+
+#ifndef NDEBUG
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
+ break;
+ }
+ }
+}
+#endif // NDEBUG
+
+/// \brief Insert a newly allocated node into the DAG.
+///
+/// Handles insertion into the all nodes list and CSE map, as well as
+/// verification and other common operations when a new node is allocated.
+void SelectionDAG::InsertNode(SDNode *N) {
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ VerifySDNode(N);
+#endif
}
/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -839,83 +899,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
return Node;
}
-#ifndef NDEBUG
-/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
-static void VerifyNodeCommon(SDNode *N) {
- switch (N->getOpcode()) {
- default:
- break;
- case ISD::BUILD_PAIR: {
- EVT VT = N->getValueType(0);
- assert(N->getNumValues() == 1 && "Too many results!");
- assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
- "Wrong return type!");
- assert(N->getNumOperands() == 2 && "Wrong number of operands!");
- assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
- "Mismatched operand types!");
- assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
- "Wrong operand type!");
- assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
- "Wrong return type size");
- break;
- }
- case ISD::BUILD_VECTOR: {
- assert(N->getNumValues() == 1 && "Too many results!");
- assert(N->getValueType(0).isVector() && "Wrong return type!");
- assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
- "Wrong number of operands!");
- EVT EltVT = N->getValueType(0).getVectorElementType();
- for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
- assert((I->getValueType() == EltVT ||
- (EltVT.isInteger() && I->getValueType().isInteger() &&
- EltVT.bitsLE(I->getValueType()))) &&
- "Wrong operand type!");
- assert(I->getValueType() == N->getOperand(0).getValueType() &&
- "Operands must all have the same type");
- }
- break;
- }
- }
-}
-
-/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
-static void VerifySDNode(SDNode *N) {
- // The SDNode allocators cannot be used to allocate nodes with fields that are
- // not present in an SDNode!
- assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
- assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
- assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
- assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
- assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
- assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
- assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
- assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
- assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
- assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
- assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
- assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
- assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
- assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
- assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
- assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
- assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
- assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
- assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
-
- VerifyNodeCommon(N);
-}
-
-/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
-/// invalid.
-static void VerifyMachineNode(SDNode *N) {
- // The MachineNode allocators cannot be used to allocate nodes with fields
- // that are not present in a MachineNode!
- // Currently there are no such nodes.
-
- VerifyNodeCommon(N);
-}
-#endif // NDEBUG
-
/// getEVTAlignment - Compute the default alignment value for the
/// given type.
///
@@ -924,22 +907,23 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
- return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty);
+ return TLI->getDataLayout()->getABITypeAlignment(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL),
- EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
- UpdateListeners(nullptr) {
+ : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL),
+ EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
+ Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
+ UpdateListeners(nullptr) {
AllNodes.push_back(&EntryNode);
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) {
+void SelectionDAG::init(MachineFunction &mf) {
MF = &mf;
- TLI = tli;
+ TLI = getSubtarget().getTargetLowering();
+ TSI = getSubtarget().getSelectionDAGInfo();
Context = &mf.getFunction()->getContext();
}
@@ -1108,8 +1092,6 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
EVT EltVT = VT.getScalarType();
const ConstantInt *Elt = &Val;
- const TargetLowering *TLI = TM.getTargetLowering();
-
// In some cases the vector type is legal but the element type is illegal and
// needs to be promoted, for example v8i8 on ARM. In this case, promote the
// inserted value (the type does not need to match the vector element type).
@@ -1185,7 +1167,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
if (!N) {
N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
}
SDValue Result(N, 0);
@@ -1198,7 +1180,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
}
SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
- return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget);
+ return getConstant(Val, TLI->getPointerTy(), isTarget);
}
@@ -1227,7 +1209,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
if (!N) {
N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
}
SDValue Result(N, 0);
@@ -1263,7 +1245,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
- const TargetLowering *TLI = TM.getTargetLowering();
// Truncate (with sign-extension) the offset value to the pointer size.
unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType());
@@ -1290,7 +1271,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
DL.getDebugLoc(), GV, VT,
Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1305,7 +1286,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1325,7 +1306,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1336,8 +1317,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment =
- TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
+ Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1352,7 +1332,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
Alignment, TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1364,8 +1344,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment =
- TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
+ Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1380,7 +1359,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
Alignment, TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1398,7 +1377,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1412,7 +1391,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1426,7 +1405,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
if (N) return SDValue(N, 0);
N = new (NodeAllocator) VTSDNode(VT);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1434,7 +1413,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
SDNode *&N = ExternalSymbols[Sym];
if (N) return SDValue(N, 0);
N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1445,7 +1424,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
TargetFlags)];
if (N) return SDValue(N, 0);
N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1456,7 +1435,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
if (!CondCodeNodes[Cond]) {
CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
CondCodeNodes[Cond] = N;
- AllNodes.push_back(N);
+ InsertNode(N);
}
return SDValue(CondCodeNodes[Cond], 0);
@@ -1594,10 +1573,31 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
dl.getDebugLoc(), N1, N2,
MaskAlloc);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
+SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
+ MVT VT = SV.getSimpleValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = SV.getMaskElt(i);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElems)
+ Idx += NumElems;
+ else
+ Idx -= NumElems;
+ }
+ MaskVec.push_back(Idx);
+ }
+
+ SDValue Op0 = SV.getOperand(0);
+ SDValue Op1 = SV.getOperand(1);
+ return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]);
+}
+
SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
SDValue Val, SDValue DTy,
SDValue STy, SDValue Rnd, SDValue Sat,
@@ -1619,7 +1619,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
dl.getDebugLoc(),
Ops, Code);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1633,7 +1633,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1647,7 +1647,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1663,7 +1663,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(),
dl.getDebugLoc(), Root, Label);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1686,7 +1686,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
TargetFlags);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1704,7 +1704,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1720,7 +1720,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1741,7 +1741,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
dl.getDebugLoc(),
VT, Ptr, SrcAS, DestAS);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -1749,7 +1749,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
EVT OpTy = Op.getValueType();
- EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy);
+ EVT ShTy = TLI->getShiftAmountTy(LHSTy);
if (OpTy == ShTy || OpTy.isVector()) return Op;
ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1762,7 +1762,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
unsigned ByteSize = VT.getStoreSize();
Type *Ty = VT.getTypeForEVT(*getContext());
- const TargetLowering *TLI = TM.getTargetLowering();
unsigned StackAlign =
std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
@@ -1777,7 +1776,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
VT2.getStoreSizeInBits())/8;
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
- const TargetLowering *TLI = TM.getTargetLowering();
const DataLayout *TD = TLI->getDataLayout();
unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
TD->getPrefTypeAlignment(Ty2));
@@ -1796,7 +1794,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
case ISD::SETFALSE2: return getConstant(0, VT);
case ISD::SETTRUE:
case ISD::SETTRUE2: {
- const TargetLowering *TLI = TM.getTargetLowering();
TargetLowering::BooleanContent Cnt =
TLI->getBooleanContents(N1->getValueType(0));
return getConstant(
@@ -1885,7 +1882,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
// Ensure that the constant occurs on the RHS.
ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
MVT CompVT = N1.getValueType().getSimpleVT();
- if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT))
+ if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
return SDValue();
return getSetCC(dl, VT, N2, N1, SwappedCond);
@@ -1921,7 +1918,6 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
/// them in the KnownZero/KnownOne bitsets.
void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
APInt &KnownOne, unsigned Depth) const {
- const TargetLowering *TLI = TM.getTargetLowering();
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
@@ -2357,7 +2353,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
/// information. For example, immediately after an "SRA X, 2", we know that
/// the top 3 bits are all equal to each other, so we return 3.
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
- const TargetLowering *TLI = TM.getTargetLowering();
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarType().getSizeInBits();
@@ -2655,10 +2650,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
DL.getDebugLoc(), getVTList(VT));
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -2753,7 +2745,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
case ISD::FP_TO_UINT: {
integerPart x[2];
bool ignored;
- assert(integerPartWidth >= 64);
+ static_assert(integerPartWidth >= 64, "APFloat parts too small!");
// FIXME need to be more flexible about rounding mode.
APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
Opcode==ISD::FP_TO_SINT,
@@ -2772,6 +2764,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
}
}
+ // Constant fold unary operations with a vector integer operand.
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
+ if (BV->isConstant()) {
+ switch (Opcode) {
+ default:
+ // FIXME: Entirely reasonable to perform folding of other unary
+ // operations here as the need arises.
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ SmallVector<SDValue, 8> Ops;
+ for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ SDValue OpN = BV->getOperand(i);
+ // Let the above scalar folding handle the conversion of each
+ // element.
+ OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(),
+ OpN);
+ Ops.push_back(OpN);
+ }
+ return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ }
+ }
+ }
+ }
+
unsigned OpOpcode = Operand.getNode()->getOpcode();
switch (Opcode) {
case ISD::TokenFactor:
@@ -2931,10 +2948,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
DL.getDebugLoc(), VTs, Operand);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -3375,6 +3389,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Constant fold FP operations.
+ bool HasFPExceptions = TLI->hasFloatingPointExceptions();
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
if (N1CFP) {
@@ -3388,28 +3403,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
switch (Opcode) {
case ISD::FADD:
s = V1.add(V2, APFloat::rmNearestTiesToEven);
- if (s != APFloat::opInvalidOp)
+ if (!HasFPExceptions || s != APFloat::opInvalidOp)
return getConstantFP(V1, VT);
break;
case ISD::FSUB:
s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp)
+ if (!HasFPExceptions || s!=APFloat::opInvalidOp)
return getConstantFP(V1, VT);
break;
case ISD::FMUL:
s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp)
+ if (!HasFPExceptions || s!=APFloat::opInvalidOp)
return getConstantFP(V1, VT);
break;
case ISD::FDIV:
s = V1.divide(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
+ s!=APFloat::opDivByZero)) {
return getConstantFP(V1, VT);
+ }
break;
case ISD::FREM :
s = V1.mod(V2, APFloat::rmNearestTiesToEven);
- if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+ if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
+ s!=APFloat::opDivByZero)) {
return getConstantFP(V1, VT);
+ }
break;
case ISD::FCOPYSIGN:
V1.copySign(V2);
@@ -3526,10 +3545,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -3633,10 +3649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
DL.getDebugLoc(), VTs, N1, N2, N3);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -3802,7 +3815,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT == MVT::Other) {
unsigned AS = 0;
if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
- TLI.allowsUnalignedMemoryAccesses(VT, AS)) {
+ TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) {
VT = TLI.getPointerTy();
} else {
switch (DstAlign & 7) {
@@ -3862,7 +3875,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned AS = 0;
if (NumMemOps && AllowOverlap &&
VTSize >= 8 && NewVTSize < Size &&
- TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast)
+ TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast)
VTSize = Size;
else {
VT = NewVT;
@@ -3926,7 +3939,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
// Don't promote to an alignment that would require dynamic stack
// realignment.
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
@@ -3982,7 +3995,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, dl, DAG),
SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
- MinAlign(SrcAlign, SrcOff));
+ false, MinAlign(SrcAlign, SrcOff));
Store = DAG.getTruncStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, dl, DAG),
DstPtrInfo.getWithOffset(DstOff), VT, isVol,
@@ -4202,9 +4215,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
- isVol, AlwaysInline,
- DstPtrInfo, SrcPtrInfo);
+ TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -4223,8 +4235,6 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
// beyond the given memory regions. But fixing this isn't easy, and most
// people don't care.
- const TargetLowering *TLI = TM.getTargetLowering();
-
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -4270,17 +4280,14 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result =
- TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstPtrInfo, SrcPtrInfo);
+ SDValue Result = TSI->EmitTargetCodeForMemmove(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
- const TargetLowering *TLI = TM.getTargetLowering();
-
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -4325,31 +4332,22 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
- SDValue Result =
- TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstPtrInfo);
+ SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src,
+ Size, Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
// Emit a library call.
- const TargetLowering *TLI = TM.getTargetLowering();
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
Args.push_back(Entry);
- // Extend or truncate the argument to be an i32 value for the call.
- if (Src.getValueType().bitsGT(MVT::i32))
- Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
- else
- Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
Entry.Node = Src;
- Entry.Ty = Type::getInt32Ty(*getContext());
- Entry.isSExt = true;
+ Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
Args.push_back(Entry);
Entry.Node = Size;
Entry.Ty = IntPtrTy;
- Entry.isSExt = false;
Args.push_back(Entry);
// FIXME: pass in SDLoc
@@ -4396,7 +4394,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SuccessOrdering, FailureOrdering,
SynchScope);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4541,7 +4539,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
- bool ReadMem, bool WriteMem) {
+ bool ReadMem, bool WriteMem, unsigned Size) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
@@ -4553,8 +4551,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
Flags |= MachineMemOperand::MOLoad;
if (Vol)
Flags |= MachineMemOperand::MOVolatile;
+ if (!Size)
+ Size = MemVT.getStoreSize();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
@@ -4593,7 +4593,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
dl.getDebugLoc(), VTList, Ops,
MemVT, MMO);
}
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4637,7 +4637,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal, bool isInvariant,
- unsigned Alignment, const MDNode *TBAAInfo,
+ unsigned Alignment, const AAMDNodes &AAInfo,
const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
@@ -4660,7 +4660,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- TBAAInfo, Ranges);
+ AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
@@ -4709,7 +4709,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
dl.getDebugLoc(), VTs, AM, ExtType,
MemVT, MMO);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4718,12 +4718,12 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
bool isInvariant, unsigned Alignment,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
- TBAAInfo, Ranges);
+ AAInfo, Ranges);
}
SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
@@ -4738,11 +4738,12 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ bool isInvariant, unsigned Alignment,
+ const AAMDNodes &AAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
- TBAAInfo);
+ PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant,
+ Alignment, AAInfo);
}
@@ -4769,7 +4770,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base,
SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const MDNode *TBAAInfo) {
+ unsigned Alignment, const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4788,7 +4789,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags,
Val.getValueType().getStoreSize(), Alignment,
- TBAAInfo);
+ AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
@@ -4816,7 +4817,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
dl.getDebugLoc(), VTs,
ISD::UNINDEXED, false, VT, MMO);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4824,7 +4825,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
EVT SVT,bool isVolatile, bool isNonTemporal,
unsigned Alignment,
- const MDNode *TBAAInfo) {
+ const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4842,7 +4843,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
- TBAAInfo);
+ AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -4885,7 +4886,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
dl.getDebugLoc(), VTs,
ISD::UNINDEXED, true, SVT, MMO);
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4912,7 +4913,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
ST->getMemoryVT(),
ST->getMemOperand());
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -4991,10 +4992,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
VTs, Ops);
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
@@ -5074,15 +5072,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
VTList, Ops);
}
}
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifySDNode(N);
-#endif
+ InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) {
- return getNode(Opcode, DL, VTList, ArrayRef<SDValue>());
+ return getNode(Opcode, DL, VTList, None);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
@@ -5464,6 +5459,10 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
/// node, and because it doesn't require CSE recalculation for any of
/// the node's users.
///
+/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG.
+/// As a consequence it isn't appropriate to use from within the DAG combiner or
+/// the legalizer which maintain worklists that would need to be updated when
+/// deleting things.
SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
SDVTList VTs, ArrayRef<SDValue> Ops) {
unsigned NumOps = Ops.size();
@@ -5530,10 +5529,9 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
// new operands.
if (!DeadNodeSet.empty()) {
SmallVector<SDNode *, 16> DeadNodes;
- for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
- E = DeadNodeSet.end(); I != E; ++I)
- if ((*I)->use_empty())
- DeadNodes.push_back(*I);
+ for (SDNode *N : DeadNodeSet)
+ if (N->use_empty())
+ DeadNodes.push_back(N);
RemoveDeadNodes(DeadNodes);
}
@@ -5702,10 +5700,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
if (DoCSE)
CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
-#ifndef NDEBUG
- VerifyMachineNode(N);
-#endif
+ InsertNode(N);
return N;
}
@@ -5751,26 +5746,24 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
/// getDbgValue - Creates a SDDbgValue node.
///
/// SDNode
-SDDbgValue *
-SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R,
- bool IsIndirect, uint64_t Off,
- DebugLoc DL, unsigned O) {
- return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O);
+SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
+ unsigned R, bool IsIndirect, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
}
/// Constant
-SDDbgValue *
-SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C,
- uint64_t Off,
- DebugLoc DL, unsigned O) {
- return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
+ const Value *C, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O);
}
/// FrameIndex
-SDDbgValue *
-SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
- DebugLoc DL, unsigned O) {
- return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
+ unsigned FI, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O);
}
namespace {
@@ -6159,9 +6152,11 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
/// value is produced by SD.
void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
- DbgInfo->add(DB, SD, isParameter);
- if (SD)
+ if (SD) {
+ assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue());
SD->setHasDebugValue(true);
+ }
+ DbgInfo->add(DB, SD, isParameter);
}
/// TransferDbgValues - Transfer SDDbgValues.
@@ -6176,10 +6171,10 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
I != E; ++I) {
SDDbgValue *Dbg = *I;
if (Dbg->getKind() == SDDbgValue::SDNODE) {
- SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
- Dbg->isIndirect(),
- Dbg->getOffset(), Dbg->getDebugLoc(),
- Dbg->getOrder());
+ SDDbgValue *Clone =
+ getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
+ To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
+ Dbg->getDebugLoc(), Dbg->getOrder());
ClonedDVs.push_back(Clone);
}
}
@@ -6217,7 +6212,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(isNonTemporal() == MMO->isNonTemporal() &&
"Non-temporal encoding error!");
- assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+ // We check here that the size of the memory operand fits within the size of
+ // the MMO. This is because the MMO might indicate only a possible address
+ // range instead of specifying the affected memory addresses precisely.
+ assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
}
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
@@ -6227,7 +6225,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
- assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+ assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
}
/// Profile - Gather unique data for the node.
@@ -6371,7 +6369,7 @@ bool SDNode::hasPredecessor(const SDNode *N) const {
bool
SDNode::hasPredecessorHelper(const SDNode *N,
- SmallPtrSet<const SDNode *, 32> &Visited,
+ SmallPtrSetImpl<const SDNode *> &Visited,
SmallVectorImpl<const SDNode *> &Worklist) const {
if (Visited.empty()) {
Worklist.push_back(this);
@@ -6387,7 +6385,7 @@ SDNode::hasPredecessorHelper(const SDNode *N,
const SDNode *M = Worklist.pop_back_val();
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
SDNode *Op = M->getOperand(i).getNode();
- if (Visited.insert(Op))
+ if (Visited.insert(Op).second)
Worklist.push_back(Op);
if (Op == N)
return true;
@@ -6427,7 +6425,6 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
EVT OperandVT = Operand.getValueType();
if (OperandVT.isVector()) {
// A vector operand; extract a single element.
- const TargetLowering *TLI = TM.getTargetLowering();
EVT OperandEltVT = OperandVT.getVectorElementType();
Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
OperandEltVT,
@@ -6507,7 +6504,6 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
- const TargetLowering *TLI = TM.getTargetLowering();
bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1);
bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
if (isGA1 && isGA2 && GV1 == GV2)
@@ -6522,7 +6518,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
const GlobalValue *GV;
int64_t GVOffset = 0;
- const TargetLowering *TLI = TM.getTargetLowering();
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
@@ -6749,8 +6744,8 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
- SmallPtrSet<const SDNode*, 32> &Visited,
- SmallPtrSet<const SDNode*, 32> &Checked,
+ SmallPtrSetImpl<const SDNode*> &Visited,
+ SmallPtrSetImpl<const SDNode*> &Checked,
const llvm::SelectionDAG *DAG) {
// If this node has already been checked, don't check it again.
if (Checked.count(N))
@@ -6758,7 +6753,7 @@ static void checkForCyclesHelper(const SDNode *N,
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
- if (!Visited.insert(N)) {
+ if (!Visited.insert(N).second) {
errs() << "Detected cycle in SelectionDAG\n";
dbgs() << "Offending node:\n";
N->dumprFull(DAG); dbgs() << "\n";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 28d8e98..8f582f1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -58,6 +58,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -645,8 +646,10 @@ namespace {
/// specified value into the registers specified by this object. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
- void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
- SDValue &Chain, SDValue *Flag, const Value *V) const;
+ void
+ getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
+ SDValue *Flag, const Value *V,
+ ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
@@ -761,9 +764,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
- SDValue &Chain, SDValue *Flag,
- const Value *V) const {
+ SDValue &Chain, SDValue *Flag, const Value *V,
+ ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ISD::NodeType ExtendKind = PreferredExtendType;
// Get the list of the values's legal parts.
unsigned NumRegs = Regs.size();
@@ -772,8 +776,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
EVT ValueVT = ValueVTs[Value];
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
MVT RegisterVT = RegVTs[Value];
- ISD::NodeType ExtendKind =
- TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
+
+ if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
+ ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT, V, ExtendKind);
@@ -860,7 +865,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
AA = &aa;
GFI = gfi;
LibInfo = li;
- DL = DAG.getTarget().getDataLayout();
+ DL = DAG.getSubtarget().getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
@@ -988,15 +993,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DebugLoc dl = DDI.getdl();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
MDNode *Variable = DI->getVariable();
+ MDNode *Expr = DI->getExpression();
uint64_t Offset = DI->getOffset();
// A dbg.value for an alloca is always indirect.
bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) {
- SDV = DAG.getDbgValue(Variable, Val.getNode(),
- Val.getResNo(), IsIndirect,
- Offset, dl, DbgSDNodeOrder);
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect,
+ Val)) {
+ SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
+ IsIndirect, Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1018,8 +1024,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
if (It != FuncInfo.ValueMap.end()) {
unsigned InReg = It->second;
- RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(),
- InReg, V->getType());
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
+ V->getType());
SDValue Chain = DAG.getEntryNode();
N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
resolveDanglingDebugInfo(V, N);
@@ -1050,10 +1056,10 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
/// getValueImpl - Helper function for getValue and getNonRegisterValue.
/// Create an SDValue for the given value.
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const Constant *C = dyn_cast<Constant>(V)) {
- EVT VT = TLI->getValueType(V->getType(), true);
+ EVT VT = TLI.getValueType(V->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
return DAG.getConstant(*CI, VT);
@@ -1063,7 +1069,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
- return DAG.getConstant(0, TLI->getPointerTy(AS));
+ return DAG.getConstant(0, TLI.getPointerTy(AS));
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
@@ -1117,7 +1123,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
"Unknown struct or array constant!");
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, C->getType(), ValueVTs);
+ ComputeValueVTs(TLI, C->getType(), ValueVTs);
unsigned NumElts = ValueVTs.size();
if (NumElts == 0)
return SDValue(); // empty struct
@@ -1149,7 +1155,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Ops.push_back(getValue(CV->getOperand(i)));
} else {
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
- EVT EltVT = TLI->getValueType(VecTy->getElementType());
+ EVT EltVT = TLI.getValueType(VecTy->getElementType());
SDValue Op;
if (EltVT.isFloatingPoint())
@@ -1169,13 +1175,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
- return DAG.getFrameIndex(SI->second, TLI->getPointerTy());
+ return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
- RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType());
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -1184,7 +1190,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
SmallVector<SDValue, 8> OutVals;
@@ -1197,7 +1203,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// Leave Outs empty so that LowerReturn won't try to load return
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
- ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()),
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
PtrValueVTs);
SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
@@ -1205,7 +1211,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
@@ -1224,7 +1230,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs);
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
@@ -1242,10 +1248,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ExtendKind = ISD::ZERO_EXTEND;
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
- VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
+ VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
- unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT);
- MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT);
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -1275,9 +1281,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction()->getCallingConv();
- Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg,
- Outs, OutVals, getCurSDLoc(),
- DAG);
+ Chain = DAG.getTargetLoweringInfo().LowerReturn(
+ Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
// Verify that the target's LowerReturn behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -1601,10 +1606,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (!TM.getTargetLowering()->isJumpExpensive() &&
- BOp->hasOneUse() &&
- (BOp->getOpcode() == Instruction::And ||
- BOp->getOpcode() == Instruction::Or)) {
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
+ BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
+ BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
getEdgeWeight(BrMBB, Succ1MBB));
@@ -1724,7 +1728,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
// Emit the code for the jump table
assert(JT.Reg != -1U && "Should lower JT Header first!");
- EVT PTy = TM.getTargetLowering()->getPointerTy();
+ EVT PTy = DAG.getTargetLoweringInfo().getPointerTy();
SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -1752,10 +1756,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// can be used as an index into the jump table in a subsequent basic block.
// This value may be smaller or larger than the target's pointer type, and
// therefore require extension or truncating.
- const TargetLowering *TLI = TM.getTargetLowering();
- SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy());
- unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy());
+ unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
@@ -1763,12 +1767,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the largest
// case in the switch.
- SDValue CMP = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(),
- Sub.getValueType()),
- Sub,
- DAG.getConstant(JTH.Last - JTH.First,VT),
- ISD::SETUGT);
+ SDValue CMP =
+ DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1799,8 +1801,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineBasicBlock *ParentBB) {
// First create the loads to the guard/stack slot for the comparison.
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT PtrTy = TLI->getPointerTy();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy();
MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI->getStackProtectorIndex();
@@ -1810,10 +1812,22 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
unsigned Align =
- TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
- SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
- GuardPtr, MachinePointerInfo(IRGuard, 0),
- true, false, false, Align);
+ TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
+
+ SDValue Guard;
+
+ // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the
+ // guard value from the virtual register holding the value. Otherwise, emit a
+ // volatile load to retrieve the stack guard value.
+ unsigned GuardReg = SPD.getGuardReg();
+
+ if (GuardReg && TLI.useLoadStackGuardNode())
+ Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg,
+ PtrTy);
+ else
+ Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
+ GuardPtr, MachinePointerInfo(IRGuard, 0),
+ true, false, false, Align);
SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(),
StackSlotPtr,
@@ -1824,11 +1838,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
EVT VT = Guard.getValueType();
SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot);
- SDValue Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(0, VT),
- ISD::SETNE);
+ SDValue Cmp =
+ DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(0, VT), ISD::SETNE);
// If the sub is not 0, then we know the guard/stackslot do not equal, so
// branch to failure MBB.
@@ -1853,10 +1866,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
/// StackProtectorDescriptor.
void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
- const TargetLowering *TLI = TM.getTargetLowering();
- SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL,
- MVT::isVoid, nullptr, 0, false,
- getCurSDLoc(), false, false).second;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain =
+ TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
+ nullptr, 0, false, getCurSDLoc(), false, false).second;
DAG.setRoot(Chain);
}
@@ -1871,16 +1884,15 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
DAG.getConstant(B.First, VT));
// Check range
- const TargetLowering *TLI = TM.getTargetLowering();
- SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(),
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue RangeCmp =
+ DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(),
Sub.getValueType()),
- Sub, DAG.getConstant(B.Range, VT),
- ISD::SETUGT);
+ Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT);
// Determine the type of the test operands.
bool UsePtrType = false;
- if (!TLI->isTypeLegal(VT))
+ if (!TLI.isTypeLegal(VT))
UsePtrType = true;
else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
@@ -1892,7 +1904,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
}
}
if (UsePtrType) {
- VT = TLI->getPointerTy();
+ VT = TLI.getPointerTy();
Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT);
}
@@ -1936,22 +1948,18 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
Reg, VT);
SDValue Cmp;
unsigned PopCount = CountPopulation_64(B.Mask);
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (PopCount == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
- Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(), VT),
- ShiftOp,
- DAG.getConstant(countTrailingZeros(B.Mask), VT),
- ISD::SETEQ);
+ Cmp = DAG.getSetCC(
+ getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+ DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
- Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(), VT),
- ShiftOp,
- DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
- ISD::SETNE);
+ Cmp = DAG.getSetCC(
+ getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
+ DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT,
@@ -1961,9 +1969,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(),
VT, SwitchVal, DAG.getConstant(B.Mask, VT));
Cmp = DAG.getSetCC(getCurSDLoc(),
- TLI->getSetCCResultType(*DAG.getContext(), VT),
- AndOp, DAG.getConstant(0, VT),
- ISD::SETNE);
+ TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
+ DAG.getConstant(0, VT), ISD::SETNE);
}
// The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
@@ -2001,8 +2008,17 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
if (isa<InlineAsm>(Callee))
visitInlineAsm(&I);
else if (Fn && Fn->isIntrinsic()) {
- assert(Fn->getIntrinsicID() == Intrinsic::donothing);
- // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ switch (Fn->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Cannot invoke this intrinsic");
+ case Intrinsic::donothing:
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ visitPatchpoint(&I, LandingPad);
+ break;
+ }
} else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
@@ -2034,26 +2050,26 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
- const TargetLowering *TLI = TM.getTargetLowering();
- if (TLI->getExceptionPointerRegister() == 0 &&
- TLI->getExceptionSelectorRegister() == 0)
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.getExceptionPointerRegister() == 0 &&
+ TLI.getExceptionSelectorRegister() == 0)
return;
SmallVector<EVT, 2> ValueVTs;
- ComputeValueVTs(*TLI, LP.getType(), ValueVTs);
+ ComputeValueVTs(TLI, LP.getType(), ValueVTs);
assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
// Get the two live-in registers as SDValues. The physregs have already been
// copied into virtual registers.
SDValue Ops[2];
Ops[0] = DAG.getZExtOrTrunc(
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()),
- getCurSDLoc(), ValueVTs[0]);
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+ getCurSDLoc(), ValueVTs[0]);
Ops[1] = DAG.getZExtOrTrunc(
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
- FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()),
- getCurSDLoc(), ValueVTs[1]);
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
+ getCurSDLoc(), ValueVTs[1]);
// Merge into one.
SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
@@ -2218,9 +2234,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return TLI.supportJumpTables() &&
- (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+ return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
@@ -2245,8 +2260,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
TSize += I->size();
- const TargetLowering *TLI = TM.getTargetLowering();
- if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries()))
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
return false;
APInt Range = ComputeRange(First, Last);
@@ -2327,7 +2342,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
}
// Create a jump table index for this jump table.
- unsigned JTEncoding = TLI->getJumpTableEncoding();
+ unsigned JTEncoding = TLI.getJumpTableEncoding();
unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
->createJumpTableIndex(DestBBs);
@@ -2347,7 +2362,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
- MachineBasicBlock* Default,
MachineBasicBlock* SwitchBB) {
// Get the MachineFunction which holds the current MBB. This is used when
// inserting any additional MBBs necessary to represent the switch.
@@ -2413,8 +2427,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
RSize -= J->size();
}
- const TargetLowering *TLI = TM.getTargetLowering();
- if (areJTsAllowed(*TLI)) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (areJTsAllowed(TLI)) {
// If our case is dense we *really* should handle it earlier!
assert((FMetric > 0) && "Should handle dense range earlier!");
} else {
@@ -2484,8 +2498,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
const Value* SV,
MachineBasicBlock* Default,
MachineBasicBlock* SwitchBB) {
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT PTy = TLI->getPointerTy();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PTy = TLI.getPointerTy();
unsigned IntPtrBits = PTy.getSizeInBits();
Case& FrontCase = *CR.Range.first;
@@ -2496,7 +2510,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
MachineFunction *CurMF = FuncInfo.MF;
// If target does not have legal shift left, do not emit bit tests at all.
- if (!TLI->isOperationLegal(ISD::SHL, PTy))
+ if (!TLI.isOperationLegal(ISD::SHL, PTy))
return false;
size_t numCmps = 0;
@@ -2601,21 +2615,19 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
BitTestBlock BTB(lowBound, cmpRange, SV,
-1U, MVT::Other, (CR.CaseBB == SwitchBB),
- CR.CaseBB, Default, BTC);
+ CR.CaseBB, Default, std::move(BTC));
if (CR.CaseBB == SwitchBB)
visitBitTestHeader(BTB, SwitchBB);
- BitTestCases.push_back(BTB);
+ BitTestCases.push_back(std::move(BTB));
return true;
}
/// Clusterify - Transform simple list of Cases into list of CaseRange's
-size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
- const SwitchInst& SI) {
- size_t numCmps = 0;
-
+void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+ const SwitchInst& SI) {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
@@ -2653,13 +2665,15 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
}
}
- for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
- if (I->Low != I->High)
- // A range counts double, since it requires two compares.
- ++numCmps;
- }
+ DEBUG({
+ size_t numCmps = 0;
+ for (auto &I : Cases)
+ // A range counts double, since it requires two compares.
+ numCmps += I.Low != I.High ? 2 : 1;
- return numCmps;
+ dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total compares: " << numCmps << '\n';
+ });
}
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
@@ -2701,10 +2715,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// representing each one, and sort the vector so that we can efficiently
// create a binary search tree from them.
CaseVector Cases;
- size_t numCmps = Clusterify(Cases, SI);
- DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total compares: " << numCmps << '\n');
- (void)numCmps;
+ Clusterify(Cases, SI);
// Get the Value to be switched on and default basic blocks, which will be
// inserted into CaseBlock records, representing basic blocks in the binary
@@ -2738,7 +2749,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Emit binary tree. We need to pick a pivot, and push left and right ranges
// onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
- handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
+ handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB);
}
}
@@ -2749,7 +2760,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
SmallSet<BasicBlock*, 32> Done;
for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
BasicBlock *BB = I.getSuccessor(i);
- bool Inserted = Done.insert(BB);
+ bool Inserted = Done.insert(BB).second;
if (!Inserted)
continue;
@@ -2806,7 +2817,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType());
+ EVT ShiftTy =
+ DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType());
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2861,8 +2873,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) {
if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
!isa<ConstantSDNode>(Op1) &&
isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
- setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2,
- getCurSDLoc(), DAG));
+ setValue(&I, DAG.getTargetLoweringInfo()
+ .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG));
else
setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(),
Op1, Op2));
@@ -2878,7 +2890,7 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
@@ -2893,13 +2905,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
ISD::CondCode Condition = getFCmpCondCode(predicate);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
}
void SelectionDAGBuilder::visitSelect(const User &I) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
@@ -2926,7 +2938,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
void SelectionDAGBuilder::visitTrunc(const User &I) {
// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
}
@@ -2934,7 +2946,7 @@ void SelectionDAGBuilder::visitZExt(const User &I) {
// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// ZExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
}
@@ -2942,52 +2954,51 @@ void SelectionDAGBuilder::visitSExt(const User &I) {
// SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// SExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT DestVT = TLI->getValueType(I.getType());
- setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(),
- DestVT, N,
- DAG.getTargetConstant(0, TLI->getPointerTy())));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N,
+ DAG.getTargetConstant(0, TLI.getPointerTy())));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
// FPExt is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToUI(const User &I) {
// FPToUI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitFPToSI(const User &I) {
// FPToSI is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitUIToFP(const User &I) {
// UIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
}
void SelectionDAGBuilder::visitSIToFP(const User &I) {
// SIToFP is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
}
@@ -2995,7 +3006,7 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
}
@@ -3003,13 +3014,13 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
}
void SelectionDAGBuilder::visitBitCast(const User &I) {
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
// BitCast assures us that source and destination are the same size so this is
// either a BITCAST or a no-op.
@@ -3031,7 +3042,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
SDValue N = getValue(SV);
- EVT DestVT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
@@ -3049,8 +3060,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)),
getCurSDLoc(), TLI.getVectorIdxTy());
setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
- TM.getTargetLowering()->getValueType(I.getType()),
- InVec, InVal, InIdx));
+ TLI.getValueType(I.getType()), InVec, InVal, InIdx));
}
void SelectionDAGBuilder::visitExtractElement(const User &I) {
@@ -3059,8 +3069,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)),
getCurSDLoc(), TLI.getVectorIdxTy());
setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
- TM.getTargetLowering()->getValueType(I.getType()),
- InVec, InIdx));
+ TLI.getValueType(I.getType()), InVec, InIdx));
}
// Utility for visitShuffleVector - Return true if every element in Mask,
@@ -3082,8 +3091,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
unsigned MaskNumElts = Mask.size();
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT VT = TLI->getValueType(I.getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -3202,9 +3211,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (RangeUse[Input] == 0)
Src = DAG.getUNDEF(VT);
else
- Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT,
- Src, DAG.getConstant(StartIdx[Input],
- TLI->getVectorIdxTy()));
+ Src = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src,
+ DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy()));
}
// Calculate new mask.
@@ -3230,7 +3239,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// replacing the shuffle with extract and build vector.
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = TLI->getVectorIdxTy();
+ EVT IdxVT = TLI.getVectorIdxTy();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
@@ -3262,16 +3271,22 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> AggValueVTs;
- ComputeValueVTs(*TLI, AggTy, AggValueVTs);
+ ComputeValueVTs(TLI, AggTy, AggValueVTs);
SmallVector<EVT, 4> ValValueVTs;
- ComputeValueVTs(*TLI, ValTy, ValValueVTs);
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
unsigned NumAggValues = AggValueVTs.size();
unsigned NumValValues = ValValueVTs.size();
SmallVector<SDValue, 4> Values(NumAggValues);
+ // Ignore an insertvalue that produces an empty object
+ if (!NumAggValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
SDValue Agg = getValue(Op0);
unsigned i = 0;
// Copy the beginning value(s) from the original aggregate.
@@ -3302,9 +3317,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValValueVTs;
- ComputeValueVTs(*TLI, ValTy, ValValueVTs);
+ ComputeValueVTs(TLI, ValTy, ValValueVTs);
unsigned NumValValues = ValValueVTs.size();
@@ -3353,13 +3368,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = cast<SequentialType>(Ty)->getElementType();
// If this is a constant subscript, handle it quickly.
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
uint64_t Offs =
DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
- EVT PTy = TLI->getPointerTy(AS);
+ EVT PTy = TLI.getPointerTy(AS);
unsigned PtrBits = PTy.getSizeInBits();
if (PtrBits < 64)
OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
@@ -3373,8 +3388,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
// N = N + Idx * ElementSize;
- APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
- DL->getTypeAllocSize(Ty));
+ APInt ElementSize =
+ APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -3411,15 +3426,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
return; // getValue will auto-populate this.
Type *Ty = I.getAllocatedType();
- const TargetLowering *TLI = TM.getTargetLowering();
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
unsigned Align =
- std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
- I.getAlignment());
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
+ I.getAlignment());
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI->getPointerTy();
+ EVT IntPtr = TLI.getPointerTy();
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr);
@@ -3430,7 +3445,8 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
@@ -3464,15 +3480,18 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata("nontemporal") != nullptr;
- bool isInvariant = I.getMetadata("invariant.load") != nullptr;
+ bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
+ bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
unsigned Alignment = I.getAlignment();
- const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3483,7 +3502,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
// Serialize volatile loads with other side effects.
Root = getRoot();
else if (AA->pointsToConstantMemory(
- AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -3492,9 +3511,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Root = DAG.getRoot();
}
- const TargetLowering *TLI = TM.getTargetLowering();
if (isVolatile)
- Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+ Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
@@ -3520,7 +3538,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root,
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
- isNonTemporal, isInvariant, Alignment, TBAAInfo,
+ isNonTemporal, isInvariant, Alignment, AAInfo,
Ranges);
Values[i] = L;
@@ -3549,7 +3567,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(),
+ ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3565,9 +3584,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
NumValues));
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata("nontemporal") != nullptr;
+ bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
unsigned Alignment = I.getAlignment();
- const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
@@ -3583,7 +3604,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue St = DAG.getStore(Root, getCurSDLoc(),
SDValue(Src.getNode(), Src.getResNo() + i),
Add, MachinePointerInfo(PtrV, Offsets[i]),
- isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ isVolatile, isNonTemporal, Alignment, AAInfo);
Chains[ChainI] = St;
}
@@ -3592,30 +3613,6 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
DAG.setRoot(StoreNode);
}
-static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
- SynchronizationScope Scope,
- bool Before, SDLoc dl,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- // Fence, if necessary
- if (Before) {
- if (Order == AcquireRelease || Order == SequentiallyConsistent)
- Order = Release;
- else if (Order == Acquire || Order == Monotonic || Order == Unordered)
- return Chain;
- } else {
- if (Order == AcquireRelease)
- Order = Acquire;
- else if (Order == Release || Order == Monotonic || Order == Unordered)
- return Chain;
- }
- SDValue Ops[3];
- Ops[0] = Chain;
- Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
- Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
- return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
-}
-
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();
@@ -3624,27 +3621,16 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
- DAG, *TLI);
-
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
SDValue L = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
- 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
- TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope);
+ /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope);
SDValue OutChain = L.getValue(2);
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
- DAG, *TLI);
-
setValue(&I, L);
DAG.setRoot(OutChain);
}
@@ -3671,38 +3657,28 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
- DAG, *TLI);
-
SDValue L =
DAG.getAtomic(NT, dl,
getValue(I.getValOperand()).getSimpleValueType(),
InChain,
getValue(I.getPointerOperand()),
getValue(I.getValOperand()),
- I.getPointerOperand(), 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
- Scope);
+ I.getPointerOperand(),
+ /* Alignment=*/ 0, Order, Scope);
SDValue OutChain = L.getValue(1);
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
- DAG, *TLI);
-
setValue(&I, L);
DAG.setRoot(OutChain);
}
void SelectionDAGBuilder::visitFence(const FenceInst &I) {
SDLoc dl = getCurSDLoc();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
- Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy());
- Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy());
+ Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+ Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
@@ -3713,8 +3689,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT VT = TLI->getValueType(I.getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getType());
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
@@ -3728,19 +3704,14 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
I.getAlignment() ? I.getAlignment() :
DAG.getEVTAlignment(VT));
- InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
+ InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
getValue(I.getPointerOperand()), MMO,
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
- Scope);
+ Order, Scope);
SDValue OutChain = L.getValue(1);
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
- DAG, *TLI);
-
setValue(&I, L);
DAG.setRoot(OutChain);
}
@@ -3753,28 +3724,19 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue InChain = getRoot();
- const TargetLowering *TLI = TM.getTargetLowering();
- EVT VT = TLI->getValueType(I.getValueOperand()->getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
- if (TLI->getInsertFencesForAtomic())
- InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
- DAG, *TLI);
-
SDValue OutChain =
DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
InChain,
getValue(I.getPointerOperand()),
getValue(I.getValueOperand()),
I.getPointerOperand(), I.getAlignment(),
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
- Scope);
-
- if (TLI->getInsertFencesForAtomic())
- OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
- DAG, *TLI);
+ Order, Scope);
DAG.setRoot(OutChain);
}
@@ -3799,13 +3761,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Info is set by getTgtMemInstrinsic
TargetLowering::IntrinsicInfo Info;
- const TargetLowering *TLI = TM.getTargetLowering();
- bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy()));
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
@@ -3814,7 +3776,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, I.getType(), ValueVTs);
+ ComputeValueVTs(TLI, I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
@@ -3829,7 +3791,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.vol,
- Info.readMem, Info.writeMem);
+ Info.readMem, Info.writeMem, Info.size);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -3848,7 +3810,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (!I.getType()->isVoidTy()) {
if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
- EVT VT = TLI->getValueType(PTy);
+ EVT VT = TLI.getValueType(PTy);
Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
}
@@ -4555,16 +4517,17 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
/// argument, create the corresponding DBG_VALUE machine instruction for it now.
/// At the end of instruction selection, they will be inserted to the entry BB.
-bool
-SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
- int64_t Offset, bool IsIndirect,
- const SDValue &N) {
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
+ MDNode *Variable,
+ MDNode *Expr, int64_t Offset,
+ bool IsIndirect,
+ const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
- const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
// Ignore inlined function arguments here.
DIVariable DV(Variable);
@@ -4610,14 +4573,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
return false;
if (Op->isReg())
- FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(),
- TII->get(TargetOpcode::DBG_VALUE),
- IsIndirect,
- Op->getReg(), Offset, Variable));
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE),
+ IsIndirect, Op->getReg(), Offset, Variable, Expr));
else
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op).addImm(Offset).addMetadata(Variable));
+ BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
+ .addOperand(*Op)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr));
return true;
}
@@ -4635,7 +4600,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
/// otherwise lower it and return null.
const char *
SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc sdl = getCurSDLoc();
DebugLoc dl = getCurDebugLoc();
SDValue Res;
@@ -4649,17 +4614,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::vaend: visitVAEnd(I); return nullptr;
case Intrinsic::vacopy: visitVACopy(I); return nullptr;
case Intrinsic::returnaddress:
- setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(),
getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::frameaddress:
- setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(),
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
- EVT VT = TM.getTargetLowering()->getValueType(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName));
return nullptr;
}
@@ -4673,9 +4638,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::setjmp:
- return &"_setjmp"[!TLI->usesUnderscoreSetJmp()];
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
case Intrinsic::longjmp:
- return &"_longjmp"[!TLI->usesUnderscoreLongJmp()];
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
@@ -4736,6 +4701,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
MDNode *Variable = DI.getVariable();
+ MDNode *Expression = DI.getExpression();
const Value *Address = DI.getAddress();
DIVariable DIVar(Variable);
assert((!DIVar || DIVar.isVariable()) &&
@@ -4771,16 +4737,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (FINode)
// Byval parameter. We have a frame index at this point.
- SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(),
- 0, dl, SDNodeOrder);
+ SDV = DAG.getFrameIndexDbgValue(
+ Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder);
else {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, 0, false, N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N);
return nullptr;
}
} else if (AI)
- SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, 0, dl, SDNodeOrder);
else {
// Can't do anything with other non-AI cases yet.
@@ -4793,7 +4759,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) {
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false,
+ N)) {
// If variable is pinned by a alloca in dominating bb then
// use StaticAllocaMap.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
@@ -4801,7 +4768,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- SDV = DAG.getFrameIndexDbgValue(Variable, SI->second,
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second,
0, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
return nullptr;
@@ -4822,6 +4789,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
MDNode *Variable = DI.getVariable();
+ MDNode *Expression = DI.getExpression();
uint64_t Offset = DI.getOffset();
const Value *V = DI.getValue();
if (!V)
@@ -4829,7 +4797,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDDbgValue *SDV;
if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
- SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+ SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl,
+ SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
} else {
// Do not use getValue() in here; we don't want to generate code at
@@ -4841,10 +4810,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (N.getNode()) {
// A dbg.value for an alloca is always indirect.
bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
- if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) {
- SDV = DAG.getDbgValue(Variable, N.getNode(),
- N.getResNo(), IsIndirect,
- Offset, dl, SDNodeOrder);
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset,
+ IsIndirect, N)) {
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
+ IsIndirect, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
} else if (!V->use_empty() ) {
@@ -4878,7 +4847,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
- GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+ GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, MVT::i32);
setValue(&I, Res);
@@ -4899,15 +4868,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
case Intrinsic::eh_dwarf_cfa: {
SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
- TLI->getPointerTy());
+ TLI.getPointerTy());
SDValue Offset = DAG.getNode(ISD::ADD, sdl,
CfaArg.getValueType(),
DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
CfaArg.getValueType()),
CfaArg);
- SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl,
- TLI->getPointerTy(),
- DAG.getConstant(0, TLI->getPointerTy()));
+ SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
+ DAG.getConstant(0, TLI.getPointerTy()));
setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
FA, Offset));
return nullptr;
@@ -4997,7 +4965,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
- EVT DestVT = TLI->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
DAG.getConstant(NewIntrinsic, MVT::i32),
@@ -5009,14 +4977,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::x86_avx_vinsertf128_ps_256:
case Intrinsic::x86_avx_vinsertf128_si_256:
case Intrinsic::x86_avx2_vinserti128: {
- EVT DestVT = TLI->getValueType(I.getType());
- EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType());
+ EVT DestVT = TLI.getValueType(I.getType());
+ EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
ElVT.getVectorNumElements();
- Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- DAG.getConstant(Idx, TLI->getVectorIdxTy()));
+ Res =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
setValue(&I, Res);
return nullptr;
}
@@ -5024,12 +4992,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::x86_avx_vextractf128_ps_256:
case Intrinsic::x86_avx_vextractf128_si_256:
case Intrinsic::x86_avx2_vextracti128: {
- EVT DestVT = TLI->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
DestVT.getVectorNumElements();
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
getValue(I.getArgOperand(0)),
- DAG.getConstant(Idx, TLI->getVectorIdxTy()));
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
setValue(&I, Res);
return nullptr;
}
@@ -5055,7 +5023,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertus: Code = ISD::CVT_US; break;
case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
}
- EVT DestVT = TLI->getValueType(I.getType());
+ EVT DestVT = TLI.getValueType(I.getType());
const Value *Op1 = I.getArgOperand(0);
Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
DAG.getValueType(DestVT),
@@ -5071,23 +5039,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)), DAG));
return nullptr;
case Intrinsic::log:
- setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::log2:
- setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::log10:
- setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::exp:
- setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::exp2:
- setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
+ setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
return nullptr;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)), DAG, *TLI));
+ getValue(I.getArgOperand(1)), DAG, TLI));
return nullptr;
case Intrinsic::sqrt:
case Intrinsic::fabs:
@@ -5119,6 +5087,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return nullptr;
}
+ case Intrinsic::minnum:
+ setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::maxnum:
+ setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5133,9 +5113,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(2))));
return nullptr;
case Intrinsic::fmuladd: {
- EVT VT = TLI->getValueType(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI->isFMAFasterThanFMulAndFAdd(VT)) {
+ TLI.isFMAFasterThanFMulAndFAdd(VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
@@ -5155,12 +5135,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::convert_to_fp16:
- setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl,
- MVT::i16, getValue(I.getArgOperand(0))));
+ setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
+ DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant(0, MVT::i32))));
return nullptr;
case Intrinsic::convert_from_fp16:
- setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl,
- MVT::f32, getValue(I.getArgOperand(0))));
+ setValue(&I,
+ DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()),
+ DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)))));
return nullptr;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
@@ -5205,7 +5189,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(ISD::STACKSAVE, sdl,
- DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op);
+ DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return nullptr;
@@ -5219,9 +5203,44 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- EVT PtrTy = TLI->getPointerTy();
+ EVT PtrTy = TLI.getPointerTy();
+ SDValue Src, Chain = getRoot();
+ const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
+
+ // See if Ptr is a bitcast. If it is, look through it and see if we can get
+ // global variable __stack_chk_guard.
+ if (!GV)
+ if (const Operator *BC = dyn_cast<Operator>(Ptr))
+ if (BC->getOpcode() == Instruction::BitCast)
+ GV = dyn_cast<GlobalVariable>(BC->getOperand(0));
+
+ if (GV && TLI.useLoadStackGuardNode()) {
+ // Emit a LOAD_STACK_GUARD node.
+ MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD,
+ sdl, PtrTy, Chain);
+ MachinePointerInfo MPInfo(GV);
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
+ unsigned Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOInvariant;
+ *MemRefs = MF.getMachineMemOperand(MPInfo, Flags,
+ PtrTy.getSizeInBits() / 8,
+ DAG.getEVTAlignment(PtrTy));
+ Node->setMemRefs(MemRefs, MemRefs + 1);
+
+ // Copy the guard value to a virtual register so that it can be
+ // retrieved in the epilogue.
+ Src = SDValue(Node, 0);
+ const TargetRegisterClass *RC =
+ TLI.getRegClassFor(Src.getSimpleValueType());
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
+
+ SPDescriptor.setGuardReg(Reg);
+ Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src);
+ } else {
+ Src = getValue(I.getArgOperand(0)); // The guard's value.
+ }
- SDValue Src = getValue(I.getArgOperand(0)); // The guard's value.
AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
int FI = FuncInfo.StaticAllocaMap[Slot];
@@ -5230,7 +5249,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(getRoot(), sdl, Src, FIN,
+ Res = DAG.getStore(Chain, sdl, Src, FIN,
MachinePointerInfo::getFixedStack(FI),
true, false, 0);
setValue(&I, Res);
@@ -5259,8 +5278,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return nullptr;
+ case Intrinsic::assume:
case Intrinsic::var_annotation:
- // Discard annotate attributes
+ // Discard annotate attributes and assumptions
return nullptr;
case Intrinsic::init_trampoline: {
@@ -5281,7 +5301,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::adjust_trampoline: {
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
- TLI->getPointerTy(),
+ TLI.getPointerTy(),
getValue(I.getArgOperand(0))));
return nullptr;
}
@@ -5321,10 +5341,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(sdl).setChain(getRoot())
.setCallee(CallingConv::C, I.getType(),
- DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()),
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
std::move(Args), 0);
- std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return nullptr;
}
@@ -5388,11 +5408,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (!LifetimeObject)
continue;
- int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+ // First check that the Alloca is static, otherwise it won't have a
+ // valid frame index.
+ auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return nullptr;
+
+ int FI = SI->second;
SDValue Ops[2];
Ops[0] = getRoot();
- Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true);
+ Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@@ -5402,7 +5428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::invariant_start:
// Discard region information.
- setValue(&I, DAG.getUNDEF(TLI->getPointerTy()));
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
return nullptr;
case Intrinsic::invariant_end:
// Discard region information.
@@ -5420,7 +5446,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::clear_cache:
- return TLI->getClearCacheBuiltinName();
+ return TLI.getClearCacheBuiltinName();
case Intrinsic::donothing:
// ignore
return nullptr;
@@ -5430,42 +5456,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64: {
- visitPatchpoint(I);
+ visitPatchpoint(&I);
return nullptr;
}
}
}
-void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
- bool isTailCall,
- MachineBasicBlock *LandingPad) {
- const TargetLowering *TLI = TM.getTargetLowering();
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- Type *RetTy = FTy->getReturnType();
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ MachineBasicBlock *LandingPad) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
MCSymbol *BeginLabel = nullptr;
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Args.reserve(CS.arg_size());
-
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
- const Value *V = *i;
-
- // Skip empty types
- if (V->getType()->isEmptyTy())
- continue;
-
- SDValue ArgNode = getValue(V);
- Entry.Node = ArgNode; Entry.Ty = V->getType();
-
- // Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
- Args.push_back(Entry);
- }
-
if (LandingPad) {
// Insert a label before the invoke call to mark the try range. This can be
// used to detect deletion of the invoke via the MachineModuleInfo.
@@ -5486,24 +5488,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// this call might not return.
(void)getRoot();
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
- }
- // Check if target-independent constraints permit a tail call here.
- // Target-dependent constraints are checked within TLI->LowerCallTo.
- if (isTailCall && !isInTailCallPosition(CS, DAG))
- isTailCall = false;
+ CLI.setChain(getRoot());
+ }
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall);
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
- std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI);
- assert((isTailCall || Result.second.getNode()) &&
+ assert((CLI.IsTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
"Null value expected with tail call!");
- if (Result.first.getNode())
- setValue(CS.getInstruction(), Result.first);
if (!Result.second.getNode()) {
// As a special case, a null chain means that a tail call has been emitted
@@ -5526,6 +5521,50 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Inform MachineModuleInfo of range.
MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
}
+
+ return Result;
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+ bool isTailCall,
+ MachineBasicBlock *LandingPad) {
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ Type *RetTy = FTy->getReturnType();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI->LowerCallTo.
+ if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
+ isTailCall = false;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
+ .setTailCall(isTailCall);
+ std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+
+ if (Result.first.getNode())
+ setValue(CS.getInstruction(), Result.first);
}
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -5591,7 +5630,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
- EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
if (IsSigned)
Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
else
@@ -5616,7 +5655,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
- EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true);
+ EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
setValue(&I, DAG.getConstant(0, CallVT));
return true;
}
@@ -5673,15 +5712,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// Require that we can find a legal MVT, and only do this if the target
// supports unaligned loads of that type. Expanding into byte loads would
// bloat the code.
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (ActuallyDoIt && CSize->getZExtValue() > 4) {
unsigned DstAS = LHS->getType()->getPointerAddressSpace();
unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
- if (!TLI->isTypeLegal(LoadVT) ||
- !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) ||
- !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS))
+ // TODO: Check alignment of src and dest ptrs.
+ if (!TLI.isTypeLegal(LoadVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS))
ActuallyDoIt = false;
}
@@ -5859,6 +5899,26 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
return true;
}
+/// visitBinaryFloatCall - If a call instruction is a binary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a binary floating-point call.
+ if (I.getNumArgOperands() != 2 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ I.getType() != I.getArgOperand(1)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp0 = getValue(I.getArgOperand(0));
+ SDValue Tmp1 = getValue(I.getArgOperand(1));
+ EVT VT = Tmp0.getValueType();
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
+ return true;
+}
+
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
if (isa<InlineAsm>(I.getCalledValue())) {
@@ -5915,6 +5975,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
+ case LibFunc::fmin:
+ case LibFunc::fminf:
+ case LibFunc::fminl:
+ if (visitBinaryFloatCall(I, ISD::FMINNUM))
+ return;
+ break;
+ case LibFunc::fmax:
+ case LibFunc::fmaxf:
+ case LibFunc::fmaxl:
+ if (visitBinaryFloatCall(I, ISD::FMAXNUM))
+ return;
+ break;
case LibFunc::sin:
case LibFunc::sinf:
case LibFunc::sinl:
@@ -6021,7 +6093,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Callee = getValue(I.getCalledValue());
else
Callee = DAG.getExternalSymbol(RenameFn,
- TM.getTargetLowering()->getPointerTy());
+ DAG.getTargetLoweringInfo().getPointerTy());
// Check if we can potentially perform a tail call. More detailed checking is
// be done within LowerCallTo, after more information about the call is known.
@@ -6216,9 +6288,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
/// ConstraintOperands - Information about all of the constraints.
SDISelAsmOperandInfoVector ConstraintOperands;
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(CS);
+ TargetConstraints = TLI.ParseConstraints(CS);
bool hasMemory = false;
@@ -6243,10 +6315,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// corresponding argument.
assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
- OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo));
+ OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpVT = TLI->getSimpleValueType(CS.getType());
+ OpVT = TLI.getSimpleValueType(CS.getType());
}
++ResNo;
break;
@@ -6267,8 +6339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL).
- getSimpleVT();
+ OpVT =
+ OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT();
}
OpInfo.ConstraintVT = OpVT;
@@ -6279,7 +6351,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
else {
for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
TargetLowering::ConstraintType
- CType = TLI->getConstraintType(OpInfo.Codes[j]);
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
if (CType == TargetLowering::C_Memory) {
hasMemory = true;
break;
@@ -6311,10 +6383,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI->getRegForInlineAsmConstraint(Input.ConstraintCode,
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -6328,7 +6400,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.Type == InlineAsm::isClobber)
@@ -6356,16 +6428,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
- TLI->getPointerTy());
+ TLI.getPointerTy());
} else {
// Otherwise, create a stack slot and emit a store to it before the
// asm.
Type *Ty = OpVal->getType();
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
- unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty);
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurSDLoc(),
OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
@@ -6383,7 +6455,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this constraint is for a specific register, allocate it before
// anything else.
if (OpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo);
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
}
// Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -6394,7 +6466,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Register operands have already been allocated, Other/Memory don't need
// to be.
if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo);
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6402,7 +6474,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
AsmNodeOperands.push_back(
DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
// If we have a !srcloc metadata node associated with it, we want to attach
// this to the ultimately generated inline asm machineinstr. To do this, we
@@ -6425,7 +6497,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
// Ideally, we would only check against memory constraints. However, the
// meaning of an other constraint can be target-specific and we can't easily
@@ -6443,7 +6515,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
@@ -6465,7 +6537,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Add information to the INLINEASM node to know about this output.
unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
}
@@ -6545,7 +6617,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
i != e; ++i) {
- if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT))
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
else {
LLVMContext &Ctx = *DAG.getContext();
@@ -6572,7 +6644,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
break;
}
@@ -6584,7 +6656,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
- TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
LLVMContext &Ctx = *DAG.getContext();
@@ -6598,20 +6670,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
unsigned ResOpType =
InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
break;
}
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
- assert(InOperandVal.getValueType() == TLI->getPointerTy() &&
+ assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
"Memory operands expect pointer values");
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
- TLI->getPointerTy()));
+ TLI.getPointerTy()));
AsmNodeOperands.push_back(InOperandVal);
break;
}
@@ -6674,7 +6746,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
- EVT ResultType = TLI->getValueType(CS.getType());
+ EVT ResultType = TLI.getValueType(CS.getType());
// If any of the results of the inline asm is a vector, it may have the
// wrong width/num elts. This can happen for register classes that can
@@ -6739,9 +6811,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
- const TargetLowering *TLI = TM.getTargetLowering();
- const DataLayout &DL = *TLI->getDataLayout();
- SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(),
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = *TLI.getDataLayout();
+ SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)),
DAG.getSrcValue(I.getOperand(0)),
DL.getABITypeAlignment(I.getType()));
@@ -6773,18 +6845,18 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
std::pair<SDValue, SDValue>
-SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
+SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
- bool useVoidTy) {
+ bool UseVoidTy,
+ MachineBasicBlock *LandingPad) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
- ImmutableCallSite CS(&CI);
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
ArgI != ArgE; ++ArgI) {
- const Value *V = CI.getOperand(ArgI);
+ const Value *V = CS->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
@@ -6795,14 +6867,13 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
Args.push_back(Entry);
}
- Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
+ Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
- .setDiscardResult(!CI.use_empty());
+ .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
+ .setDiscardResult(CS->use_empty());
- const TargetLowering *TLI = TM.getTargetLowering();
- return TLI->LowerCallTo(CLI);
+ return lowerInvokable(CLI, LandingPad);
}
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6822,11 +6893,11 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
/// only available in a register, then the runtime would need to trap when
/// execution reaches the StackMap in order to read the alloca's location.
-static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx,
+static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
- for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) {
- SDValue OpVal = Builder.getValue(CI.getArgOperand(i));
+ for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(CS.getArgument(i));
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
Ops.push_back(
Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
@@ -6877,7 +6948,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
// Push live variables for the stack map.
- addStackMapLiveVars(CI, 2, Ops, *this);
+ addStackMapLiveVars(&CI, 2, Ops, *this);
// We are not pushing any register mask info here on the operands list,
// because the stackmap doesn't clobber anything.
@@ -6904,7 +6975,8 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
}
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
-void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
+void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
+ MachineBasicBlock *LandingPad) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
// i8* <target>,
@@ -6912,32 +6984,29 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// [Args...],
// [live variables...])
- CallingConv::ID CC = CI.getCallingConv();
- bool isAnyRegCC = CC == CallingConv::AnyReg;
- bool hasDef = !CI.getType()->isVoidTy();
- SDValue Callee = getValue(CI.getOperand(2)); // <target>
+ CallingConv::ID CC = CS.getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !CS->getType()->isVoidTy();
+ SDValue Callee = getValue(CS->getOperand(2)); // <target>
// Get the real number of arguments participating in the call <numArgs>
- SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos));
+ SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// Intrinsics include all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
- unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
std::pair<SDValue, SDValue> Result =
- LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC);
-
- // Set the root to the target-lowered call chain.
- SDValue Chain = Result.second;
- DAG.setRoot(Chain);
+ lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC,
+ LandingPad);
- SDNode *CallEnd = Chain.getNode();
- if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ SDNode *CallEnd = Result.second.getNode();
+ if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
CallEnd = CallEnd->getOperand(0).getNode();
/// Get a call instruction from the call sequence chain.
@@ -6945,16 +7014,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
"Expected a callseq node.");
SDNode *Call = CallEnd->getOperand(0).getNode();
- bool hasGlue = Call->getGluedNode();
+ bool HasGlue = Call->getGluedNode();
// Replace the target specific call node with the patchable intrinsic.
SmallVector<SDValue, 8> Ops;
// Add the <id> and <numBytes> constants.
- SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
- SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
@@ -6967,8 +7036,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// Adjust <numArgs> to account for any arguments that have been passed on the
// stack instead.
// Call Node: Chain, Target, {Args}, RegMask, [Glue]
- unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3);
- NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs;
+ unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
+ NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32));
// Add the calling convention
@@ -6976,20 +7045,20 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// Add the arguments we omitted previously. The register allocator should
// place these in any free register.
- if (isAnyRegCC)
+ if (IsAnyRegCC)
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
- Ops.push_back(getValue(CI.getArgOperand(i)));
+ Ops.push_back(getValue(CS.getArgument(i)));
// Push the arguments from the call instruction up to the register mask.
- SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
Ops.push_back(*i);
// Push live variables for the stack map.
- addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this);
+ addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this);
// Push the register mask info.
- if (hasGlue)
+ if (HasGlue)
Ops.push_back(*(Call->op_end()-2));
else
Ops.push_back(*(Call->op_end()-1));
@@ -6999,15 +7068,15 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
Ops.push_back(*(Call->op_begin()));
// Push the glue flag (last operand).
- if (hasGlue)
+ if (HasGlue)
Ops.push_back(*(Call->op_end()-1));
SDVTList NodeTys;
- if (isAnyRegCC && hasDef) {
+ if (IsAnyRegCC && HasDef) {
// Create the return types based on the intrinsic definition
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 3> ValueVTs;
- ComputeValueVTs(TLI, CI.getType(), ValueVTs);
+ ComputeValueVTs(TLI, CS->getType(), ValueVTs);
assert(ValueVTs.size() == 1 && "Expected only one return value type.");
// There is always a chain and a glue type at the end
@@ -7022,18 +7091,18 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
getCurSDLoc(), NodeTys, Ops);
// Update the NodeMap.
- if (hasDef) {
- if (isAnyRegCC)
- setValue(&CI, SDValue(MN, 0));
+ if (HasDef) {
+ if (IsAnyRegCC)
+ setValue(CS.getInstruction(), SDValue(MN, 0));
else
- setValue(&CI, Result.first);
+ setValue(CS.getInstruction(), Result.first);
}
// Fixup the consumers of the intrinsic. The chain and glue may be used in the
// call sequence. Furthermore the location of the chain and glue can change
// when the AnyReg calling convention is used and the intrinsic returns a
// value.
- if (isAnyRegCC && hasDef) {
+ if (IsAnyRegCC && HasDef) {
SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
@@ -7182,8 +7251,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
- if (NeedsRegBlock)
+ if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
+ if (Value == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@@ -7229,10 +7301,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
- // Only mark the end at the last register of the last value.
- if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
- MyFlags.Flags.setInConsecutiveRegsLast();
-
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
@@ -7345,10 +7413,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
"Copy from a reg to the same reg!");
assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
- const TargetLowering *TLI = TM.getTargetLowering();
- RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V);
+
+ ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
+ FuncInfo.PreferredExtendType.end())
+ ? ISD::ANY_EXTEND
+ : FuncInfo.PreferredExtendType[V];
+ RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -7374,15 +7447,13 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
- const TargetLowering *TLI = getTargetLowering();
const DataLayout *DL = TLI->getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(*getTargetLowering(),
- PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
// or one register.
@@ -7447,8 +7518,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
- if (NeedsRegBlock)
+ if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
+ if (Value == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7461,11 +7535,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// if it isn't first piece, alignment must be 1
else if (i > 0)
MyFlags.Flags.setOrigAlign(1);
-
- // Only mark the end at the last register of the last value.
- if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
- MyFlags.Flags.setInConsecutiveRegsLast();
-
Ins.push_back(MyFlags);
}
PartBase += VT.getStoreSize();
@@ -7474,9 +7543,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Call the target to set up the argument values.
SmallVector<SDValue, 8> InVals;
- SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
- F.isVarArg(), Ins,
- dl, DAG, InVals);
+ SDValue NewRoot = TLI->LowerFormalArguments(
+ DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
// Verify that the target's LowerFormalArguments behaved as expected.
assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
@@ -7513,8 +7581,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MachineRegisterInfo& RegInfo = MF.getRegInfo();
unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
FuncInfo->DemoteRegister = SRetReg;
- NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(),
- SRetReg, ArgValue);
+ NewRoot =
+ SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
DAG.setRoot(NewRoot);
// i indexes lowered arguments. Bump it past the hidden sret argument.
@@ -7629,7 +7697,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
- if (!SuccsHandled.insert(SuccMBB)) continue;
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
MachineBasicBlock::iterator MBBI = SuccMBB->begin();
@@ -7672,11 +7741,11 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Remember that this register needs to added to the machine PHI node as
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
- const TargetLowering *TLI = TM.getTargetLowering();
- ComputeValueVTs(*TLI, PN->getType(), ValueVTs);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ComputeValueVTs(TLI, PN->getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
- unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT);
+ unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
for (unsigned i = 0, e = NumRegisters; i != e; ++i)
FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
Reg += NumRegisters;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 84679f9..f74e652 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SELECTIONDAGBUILDER_H
-#define SELECTIONDAGBUILDER_H
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
@@ -21,6 +21,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
#include <vector>
namespace llvm {
@@ -200,7 +201,7 @@ private:
}
};
- size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+ void Clusterify(CaseVector &Cases, const SwitchInst &SI);
/// CaseBlock - This structure is used to communicate between
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -276,9 +277,9 @@ private:
BitTestBlock(APInt F, APInt R, const Value* SV,
unsigned Rg, MVT RgVT, bool E,
MachineBasicBlock* P, MachineBasicBlock* D,
- const BitTestInfo& C):
+ BitTestInfo C):
First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
- Parent(P), Default(D), Cases(C) { }
+ Parent(P), Default(D), Cases(std::move(C)) { }
APInt First;
APInt Range;
const Value *SValue;
@@ -397,7 +398,8 @@ private:
class StackProtectorDescriptor {
public:
StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
- FailureMBB(nullptr), Guard(nullptr) { }
+ FailureMBB(nullptr), Guard(nullptr),
+ GuardReg(0) { }
~StackProtectorDescriptor() { }
/// Returns true if all fields of the stack protector descriptor are
@@ -455,6 +457,9 @@ private:
MachineBasicBlock *getFailureMBB() { return FailureMBB; }
const Value *getGuard() { return Guard; }
+ unsigned getGuardReg() const { return GuardReg; }
+ void setGuardReg(unsigned R) { GuardReg = R; }
+
private:
/// The basic block for which we are generating the stack protector.
///
@@ -477,6 +482,9 @@ private:
/// stack protector stack slot.
const Value *Guard;
+ /// The virtual register holding the stack guard value.
+ unsigned GuardReg;
+
/// Add a successor machine basic block to ParentMBB. If the successor mbb
/// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
/// block will be created.
@@ -626,17 +634,23 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
MachineBasicBlock *LandingPad = nullptr);
- std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI,
- unsigned ArgIdx,
- unsigned NumArgs,
- SDValue Callee,
- bool useVoidTy = false);
+ std::pair<SDValue, SDValue> lowerCallOperands(
+ ImmutableCallSite CS,
+ unsigned ArgIdx,
+ unsigned NumArgs,
+ SDValue Callee,
+ bool UseVoidTy = false,
+ MachineBasicBlock *LandingPad = nullptr);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
/// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
private:
+ std::pair<SDValue, SDValue> lowerInvokable(
+ TargetLowering::CallLoweringInfo &CLI,
+ MachineBasicBlock *LandingPad);
+
// Terminator instructions.
void visitRet(const ReturnInst &I);
void visitBr(const BranchInst &I);
@@ -658,7 +672,6 @@ private:
bool handleBTSplitSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
const Value* SV,
- MachineBasicBlock* Default,
MachineBasicBlock *SwitchBB);
bool handleBitTestsSwitchCase(CaseRec& CR,
CaseRecVector& WorkList,
@@ -755,6 +768,7 @@ private:
bool visitStrLenCall(const CallInst &I);
bool visitStrNLenCall(const CallInst &I);
bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+ bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
@@ -767,7 +781,8 @@ private:
void visitVAEnd(const CallInst &I);
void visitVACopy(const CallInst &I);
void visitStackmap(const CallInst &I);
- void visitPatchpoint(const CallInst &I);
+ void visitPatchpoint(ImmutableCallSite CS,
+ MachineBasicBlock *LandingPad = nullptr);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
@@ -784,7 +799,7 @@ private:
/// EmitFuncArgumentDbgValue - If V is an function argument then create
/// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
- bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
int64_t Offset, bool IsIndirect,
const SDValue &N);
};
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index b3a452f..c9f6cff 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
std::string SDNode::getOperationName(const SelectionDAG *G) const {
@@ -36,7 +37,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "<<Unknown DAG Node>>";
if (isMachineOpcode()) {
if (G)
- if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+ if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
return TII->getName(getMachineOpcode());
return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
@@ -140,6 +141,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Unary operators
case ISD::FABS: return "fabs";
+ case ISD::FMINNUM: return "fminnum";
+ case ISD::FMAXNUM: return "fmaxnum";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
@@ -236,8 +239,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
- case ISD::FP16_TO_FP32: return "fp16_to_fp32";
- case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+ case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::CONVERT_RNDSAT: {
switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
@@ -433,7 +436,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr);
+ OS << ' ' << PrintReg(R->getReg(),
+ G ? G->getSubtarget().getRegisterInfo() : nullptr);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
@@ -565,7 +569,7 @@ void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
- if (!once.insert(N)) // If we've been here before, return now.
+ if (!once.insert(N).second) // If we've been here before, return now.
return;
// Dump the current SDNode, but don't end the line yet.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 57e22e2..79109b7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -284,8 +284,8 @@ namespace llvm {
/// for the target.
ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
- const TargetLowering *TLI = IS->getTargetLowering();
- const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
+ const TargetLowering *TLI = IS->TLI;
+ const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
TLI->getSchedulingPreference() == Sched::Source)
@@ -336,7 +336,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm),
- FuncInfo(new FunctionLoweringInfo(TM)),
+ FuncInfo(new FunctionLoweringInfo()),
CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
@@ -411,32 +411,32 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
"-fast-isel-abort requires -fast-isel");
const Function &Fn = *mf.getFunction();
- const TargetInstrInfo &TII = *TM.getInstrInfo();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- const TargetLowering *TLI = TM.getTargetLowering();
-
MF = &mf;
- RegInfo = &MF->getRegInfo();
- AA = &getAnalysis<AliasAnalysis>();
- LibInfo = &getAnalysis<TargetLibraryInfo>();
- GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
-
- TargetSubtargetInfo &ST =
- const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
- ST.resetSubtargetFeatures(MF);
- TM.resetTargetOptions(MF);
+ // Reset the target options before resetting the optimization
+ // level below.
+ // FIXME: This is a horrible hack and should be processed via
+ // codegen looking at the optimization level explicitly when
+ // it wants to look at it.
+ TM.resetTargetOptions(Fn);
// Reset OptLevel to None for optnone functions.
CodeGenOpt::Level NewOptLevel = OptLevel;
if (Fn.hasFnAttribute(Attribute::OptimizeNone))
NewOptLevel = CodeGenOpt::None;
OptLevelChanger OLC(*this, NewOptLevel);
+ TII = MF->getSubtarget().getInstrInfo();
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
- CurDAG->init(*MF, TLI);
+ CurDAG->init(*MF);
FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -454,7 +454,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// copied into vregs, emit the copies into the top of the block before
// emitting the code for the block.
MachineBasicBlock *EntryMBB = MF->begin();
- RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
@@ -489,15 +490,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
"- add if needed");
MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
MachineBasicBlock::iterator InsertPos = Def;
- const MDNode *Variable =
- MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ const MDNode *Variable = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
bool IsIndirect = MI->isIndirectDebugValue();
unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
// Def is never a terminator here, so it is ok to increment InsertPos.
BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
- TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect,
- LDI->second, Offset, Variable);
+ TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset,
+ Variable, Expr);
// If this vreg is directly copied into an exported register then
// that COPY instructions also need DBG_VALUE, if it is the only
@@ -516,11 +516,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
if (CopyUseMI) {
MachineInstr *NewMI =
- BuildMI(*MF, CopyUseMI->getDebugLoc(),
- TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect,
- CopyUseMI->getOperand(0).getReg(),
- Offset, Variable);
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
MachineBasicBlock::iterator Pos = CopyUseMI;
EntryMBB->insertAfter(Pos, NewMI);
}
@@ -534,7 +532,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
break;
for (const auto &MI : MBB) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode());
+ const MCInstrDesc &MCID = TII->get(MI.getOpcode());
if ((MCID.isCall() && !MCID.isReturn()) ||
MI.isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
@@ -617,7 +615,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SDNode *N = Worklist.pop_back_val();
// If we've already seen this node, ignore it.
- if (!VisitedNodes.insert(N))
+ if (!VisitedNodes.insert(N).second)
continue;
// Otherwise, add all chain operands to the worklist.
@@ -901,12 +899,11 @@ void SelectionDAGISel::PrepareEHLandingPad() {
// Assign the call site to the landing pad's begin label.
MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
- const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+ const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
// Mark exception register as live in.
- const TargetLowering *TLI = getTargetLowering();
const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
if (unsigned Reg = TLI->getExceptionPointerRegister())
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
@@ -1042,7 +1039,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel)
- FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo);
+ FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
// Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -1096,7 +1093,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
++NumEntryBlocks;
// Lower any arguments needed in this block if this is the entry block.
- if (!FastIS->LowerArguments()) {
+ if (!FastIS->lowerArguments()) {
// Fast isel failed to lower these arguments
++NumFastIselFailLowerArguments;
if (EnableFastISelAbortArgs)
@@ -1134,7 +1131,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
// Try to select the instruction with FastISel.
- if (FastIS->SelectInstruction(Inst)) {
+ if (FastIS->selectInstruction(Inst)) {
--NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
@@ -1729,7 +1726,7 @@ static SDNode *findGlueUse(SDNode *N) {
/// This function recursively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
- SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+ SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
bool IgnoreChains) {
// The NodeID's are given uniques ID's where a node ID is guaranteed to be
// greater than all of its (recursive) operands. If we scan to a point where
@@ -1744,7 +1741,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// Don't revisit nodes if we already scanned it and didn't fail, we know we
// won't fail if we scan it again.
- if (!Visited.insert(Use))
+ if (!Visited.insert(Use).second)
return false;
for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
@@ -1861,8 +1858,8 @@ SDNode
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg = getTargetLowering()->getRegisterByName(
- RegStr->getString().data(), Op->getValueType(0));
+ unsigned Reg =
+ TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0));
SDValue New = CurDAG->getCopyFromReg(
CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
@@ -1874,8 +1871,8 @@ SDNode
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg = getTargetLowering()->getRegisterByName(
- RegStr->getString().data(), Op->getOperand(2).getValueType());
+ unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
+ Op->getOperand(2).getValueType());
SDValue New = CurDAG->getCopyToReg(
CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2));
New->setNodeId(-1);
@@ -2375,7 +2372,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Result = !::CheckOpcode(Table, Index, N.getNode());
return Index;
case SelectionDAGISel::OPC_CheckType:
- Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering());
+ Result = !::CheckType(Table, Index, N, SDISel.TLI);
return Index;
case SelectionDAGISel::OPC_CheckChild0Type:
case SelectionDAGISel::OPC_CheckChild1Type:
@@ -2385,14 +2382,15 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckChild5Type:
case SelectionDAGISel::OPC_CheckChild6Type:
case SelectionDAGISel::OPC_CheckChild7Type:
- Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(),
- Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+ Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+ Table[Index - 1] -
+ SelectionDAGISel::OPC_CheckChild0Type);
return Index;
case SelectionDAGISel::OPC_CheckCondCode:
Result = !::CheckCondCode(Table, Index, N);
return Index;
case SelectionDAGISel::OPC_CheckValueType:
- Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering());
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
return Index;
case SelectionDAGISel::OPC_CheckInteger:
Result = !::CheckInteger(Table, Index, N);
@@ -2436,6 +2434,42 @@ struct MatchScope {
bool HasChainNodesMatched, HasGlueResultNodesMatched;
};
+/// \\brief A DAG update listener to keep the matching state
+/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
+/// change the DAG while matching. X86 addressing mode matcher is an example
+/// for this.
+class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
+{
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
+public:
+ MatchStateUpdater(SelectionDAG &DAG,
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN,
+ SmallVectorImpl<MatchScope> &MS) :
+ SelectionDAG::DAGUpdateListener(DAG),
+ RecordedNodes(RN), MatchScopes(MS) { }
+
+ void NodeDeleted(SDNode *N, SDNode *E) {
+ // Some early-returns here to avoid the search if we deleted the node or
+ // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
+ // do, so it's unnecessary to update matching state at that point).
+ // Neither of these can occur currently because we only install this
+ // update listener during matching a complex patterns.
+ if (!E || E->isMachineOpcode())
+ return;
+ // Performing linear search here does not matter because we almost never
+ // run this code. You'd have to have a CSE during complex pattern
+ // matching.
+ for (auto &I : RecordedNodes)
+ if (I.first.getNode() == N)
+ I.first.setNode(E);
+
+ for (auto &I : MatchScopes)
+ for (auto &J : I.NodeStack)
+ if (J.getNode() == N)
+ J.setNode(E);
+ }
+};
}
SDNode *SelectionDAGISel::
@@ -2449,8 +2483,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::BasicBlock:
case ISD::Register:
case ISD::RegisterMask:
- //case ISD::VALUETYPE:
- //case ISD::CONDCODE:
case ISD::HANDLENODE:
case ISD::MDNODE_SDNODE:
case ISD::TargetConstant:
@@ -2692,6 +2724,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+
+ // If target can modify DAG during matching, keep the matching state
+ // consistent.
+ std::unique_ptr<MatchStateUpdater> MSU;
+ if (ComplexPatternFuncMutatesDAG())
+ MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
+ MatchScopes));
+
if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
RecordedNodes[RecNo].first, CPNum,
RecordedNodes))
@@ -2703,7 +2743,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
continue;
case OPC_CheckType:
- if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering()))
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI))
break;
continue;
@@ -2751,7 +2791,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (CaseVT == MVT::iPTR)
- CaseVT = getTargetLowering()->getPointerTy();
+ CaseVT = TLI->getPointerTy();
// If the VT matches, then we will execute this case.
if (CurNodeVT == CaseVT)
@@ -2773,7 +2813,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckChild2Type: case OPC_CheckChild3Type:
case OPC_CheckChild4Type: case OPC_CheckChild5Type:
case OPC_CheckChild6Type: case OPC_CheckChild7Type:
- if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(),
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
Opcode-OPC_CheckChild0Type))
break;
continue;
@@ -2781,7 +2821,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
continue;
case OPC_CheckValueType:
- if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering()))
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI))
break;
continue;
case OPC_CheckInteger:
@@ -2980,7 +3020,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
for (unsigned i = 0; i != NumVTs; ++i) {
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
- if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy;
+ if (VT == MVT::iPTR)
+ VT = TLI->getPointerTy().SimpleTy;
VTs.push_back(VT);
}
@@ -3076,7 +3117,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (EmitNodeInfo & OPFL_MemRefs) {
// Only attach load or store memory operands if the generated
// instruction may load or store.
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+ const MCInstrDesc &MCID = TII->get(TargetOpc);
bool mayLoad = MCID.mayLoad();
bool mayStore = MCID.mayStore();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 42372a2..9aef5ed 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -31,13 +31,13 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
-/// NOTE: The constructor takes ownership of TLOF.
-TargetLowering::TargetLowering(const TargetMachine &tm,
- const TargetLoweringObjectFile *tlof)
- : TargetLoweringBase(tm, tlof) {}
+/// NOTE: The TargetMachine owns TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm)
+ : TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
@@ -2177,7 +2177,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
- const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *RI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
@@ -2239,14 +2240,11 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
- InlineAsm::ConstraintInfoVector
- ConstraintInfos = IA->ParseConstraints();
-
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+ ConstraintOperands.emplace_back(std::move(CI));
AsmOperandInfo &OpInfo = ConstraintOperands.back();
// Update multiple alternative constraint count.
@@ -2325,7 +2323,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
}
// If we have multiple alternative constraints, select the best alternative.
- if (ConstraintInfos.size()) {
+ if (ConstraintOperands.size()) {
if (maCount) {
unsigned bestMAIndex = 0;
int bestWeight = -1;
@@ -2641,11 +2639,13 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
/// \brief Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
std::vector<SDNode *> *Created) const {
+ assert(Created && "No vector to hold sdiv ops.");
+
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2673,38 +2673,36 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
// If d > 0 and m < 0, add the numerator
if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
}
// If d < 0 and m > 0, subtract the numerator.
if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
}
// Shift right algebraic if shift value is nonzero
if (magics.s > 0) {
Q = DAG.getNode(ISD::SRA, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
}
// Extract the sign bit and add it to the quotient
SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(VT.getScalarSizeInBits() - 1,
getShiftAmountTy(Q.getValueType())));
- if (Created)
- Created->push_back(T.getNode());
+ Created->push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
/// \brief Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number. See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG, bool IsAfterLegalization,
std::vector<SDNode *> *Created) const {
+ assert(Created && "No vector to hold udiv ops.");
+
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2725,8 +2723,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
unsigned Shift = Divisor.countTrailingZeros();
Q = DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
- if (Created)
- Created->push_back(Q.getNode());
+ Created->push_back(Q.getNode());
// Get magic number for the shifted divisor.
magics = Divisor.lshr(Shift).magicu(Shift);
@@ -2744,8 +2741,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.m, VT)).getNode(), 1);
else
return SDValue(); // No mulhu or equvialent
- if (Created)
- Created->push_back(Q.getNode());
+
+ Created->push_back(Q.getNode());
if (magics.a == 0) {
assert(magics.s < Divisor.getBitWidth() &&
@@ -2754,15 +2751,12 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
- if (Created)
- Created->push_back(NPQ.getNode());
+ Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
- if (Created)
- Created->push_back(NPQ.getNode());
+ Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
- if (Created)
- Created->push_back(NPQ.getNode());
+ Created->push_back(NPQ.getNode());
return DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
}
@@ -2785,7 +2779,7 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
SelectionDAG &DAG, SDValue LL, SDValue LH,
- SDValue RL, SDValue RH) const {
+ SDValue RL, SDValue RH) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2818,8 +2812,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
// The inputs are both zero-extended.
if (HasUMUL_LOHI) {
// We can emit a umul_lohi.
- Lo = DAG.getNode(ISD::UMUL_LOHI, dl,
- DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
+ RL);
Hi = SDValue(Lo.getNode(), 1);
return true;
}
@@ -2834,8 +2828,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
// The input values are both sign-extended.
if (HasSMUL_LOHI) {
// We can emit a smul_lohi.
- Lo = DAG.getNode(ISD::SMUL_LOHI, dl,
- DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
+ RL);
Hi = SDValue(Lo.getNode(), 1);
return true;
}
@@ -2885,3 +2879,65 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
}
return false;
}
+
+bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ SDLoc dl(SDValue(Node, 0));
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (VT != MVT::f32 || NVT != MVT::i64)
+ return false;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits());
+ SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
+ SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
+ SDValue Bias = DAG.getConstant(127, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+ IntVT);
+ SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
+ SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+ SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT)));
+ SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT)));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+ SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, IntVT));
+
+ R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+
+ R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, getShiftAmountTy(IntVT))),
+ DAG.getNode(ISD::SRL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, getShiftAmountTy(IntVT))),
+ ISD::SETGT);
+
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+ Sign);
+
+ Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
+ DAG.getConstant(0, NVT), Ret, ISD::SETLT);
+ return true;
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index f7c64da..0be00f0 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -144,7 +144,7 @@ namespace {
LLVMContext &C = F.getContext();
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
Type *ExnTy = StructType::get(Type::getInt8PtrTy(C),
- Type::getInt32Ty(C), NULL);
+ Type::getInt32Ty(C), nullptr);
Constant *PersFn =
F.getParent()->
getOrInsertFunction("__gcc_personality_v0",
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index d2f3955..7fd8107 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -98,7 +99,7 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
VoidPtrTy, // __personality
VoidPtrTy, // __lsda
ArrayType::get(VoidPtrTy, 5), // __jbuf
- NULL);
+ nullptr);
RegisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
PointerType::getUnqual(FunctionContextTy), (Type *)nullptr);
@@ -138,8 +139,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
/// we reach blocks we've already seen.
static void MarkBlocksLiveIn(BasicBlock *BB,
- SmallPtrSet<BasicBlock *, 64> &LiveBBs) {
- if (!LiveBBs.insert(BB))
+ SmallPtrSetImpl<BasicBlock *> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second)
return; // already been here.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
@@ -190,7 +191,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
unsigned Align =
TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
@@ -249,34 +250,16 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
++AI) {
Type *Ty = AI->getType();
- // Aggregate types can't be cast, but are legal argument types, so we have
- // to handle them differently. We use an extract/insert pair as a
- // lightweight method to achieve the same goal.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
- Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
- Instruction *NI = InsertValueInst::Create(AI, EI, 0);
- NI->insertAfter(EI);
- AI->replaceAllUsesWith(NI);
-
- // Set the operand of the instructions back to the AllocaInst.
- EI->setOperand(0, AI);
- NI->setOperand(0, AI);
- } else {
- // This is always a no-op cast because we're casting AI to AI->getType()
- // so src and destination types are identical. BitCast is the only
- // possibility.
- CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp",
- AfterAllocaInsPt);
- AI->replaceAllUsesWith(NC);
-
- // Set the operand of the cast instruction back to the AllocaInst.
- // Normally it's forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However, we're
- // replacing it here with the same value it was constructed with. We do
- // this because the above replaceAllUsesWith() clobbered the operand, but
- // we want this one to remain.
- NC->setOperand(0, AI);
- }
+ // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
+ Value *TrueValue = ConstantInt::getTrue(F.getContext());
+ Value *UndefValue = UndefValue::get(Ty);
+ Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue,
+ AI->getName() + ".tmp",
+ AfterAllocaInsPt);
+ AI->replaceAllUsesWith(SI);
+
+ // Reset the operand, because it was clobbered by the RAUW above.
+ SI->setOperand(1, AI);
}
}
@@ -368,10 +351,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
continue;
// Demote the PHIs to the stack.
- for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(),
- E = PHIsToDemote.end();
- I != E; ++I)
- DemotePHIToStack(*I);
+ for (PHINode *PN : PHIsToDemote)
+ DemotePHIToStack(PN);
// Move the landingpad instruction back to the top of the landing pad block.
LPI->moveBefore(UnwindBlock->begin());
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 24e94d1..97a5424 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -60,27 +61,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-namespace {
-static BlockFrequency Threshold;
-}
-
-/// Decision threshold. A node gets the output value 0 if the weighted sum of
-/// its inputs falls in the open interval (-Threshold;Threshold).
-static BlockFrequency getThreshold() { return Threshold; }
-
-/// \brief Set the threshold for a given entry frequency.
-///
-/// Set the threshold relative to \c Entry. Since the threshold is used as a
-/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
-/// threshold.
-static void setThreshold(const BlockFrequency &Entry) {
- // Apparently 2 is a good threshold when Entry==2^14, but we need to scale
- // it. Divide by 2^13, rounding as appropriate.
- uint64_t Freq = Entry.getFrequency();
- uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
- Threshold = std::max(UINT64_C(1), Scaled);
-}
-
/// Node - Each edge bundle corresponds to a Hopfield node.
///
/// The node contains precomputed frequency data that only depends on the CFG,
@@ -126,9 +106,9 @@ struct SpillPlacement::Node {
/// clear - Reset per-query data, but preserve frequencies that only depend on
// the CFG.
- void clear() {
+ void clear(const BlockFrequency &Threshold) {
BiasN = BiasP = Value = 0;
- SumLinkWeights = getThreshold();
+ SumLinkWeights = Threshold;
Links.clear();
}
@@ -166,7 +146,7 @@ struct SpillPlacement::Node {
/// update - Recompute Value from Bias and Links. Return true when node
/// preference changes.
- bool update(const Node nodes[]) {
+ bool update(const Node nodes[], const BlockFrequency &Threshold) {
// Compute the weighted sum of inputs.
BlockFrequency SumN = BiasN;
BlockFrequency SumP = BiasP;
@@ -186,9 +166,9 @@ struct SpillPlacement::Node {
// 2. It helps tame rounding errors when the links nominally sum to 0.
//
bool Before = preferReg();
- if (SumN >= SumP + getThreshold())
+ if (SumN >= SumP + Threshold)
Value = -1;
- else if (SumP >= SumN + getThreshold())
+ else if (SumP >= SumN + Threshold)
Value = 1;
else
Value = 0;
@@ -227,7 +207,7 @@ void SpillPlacement::activate(unsigned n) {
if (ActiveNodes->test(n))
return;
ActiveNodes->set(n);
- nodes[n].clear();
+ nodes[n].clear(Threshold);
// Very large bundles usually come from big switches, indirect branches,
// landing pads, or loops with many 'continue' statements. It is difficult to
@@ -244,6 +224,18 @@ void SpillPlacement::activate(unsigned n) {
}
}
+/// \brief Set the threshold for a given entry frequency.
+///
+/// Set the threshold relative to \c Entry. Since the threshold is used as a
+/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
+/// threshold.
+void SpillPlacement::setThreshold(const BlockFrequency &Entry) {
+ // Apparently 2 is a good threshold when Entry==2^14, but we need to scale
+ // it. Divide by 2^13, rounding as appropriate.
+ uint64_t Freq = Entry.getFrequency();
+ uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
+ Threshold = std::max(UINT64_C(1), Scaled);
+}
/// addConstraints - Compute node biases and weights from a set of constraints.
/// Set a bit in NodeMask for each active node.
@@ -310,7 +302,7 @@ bool SpillPlacement::scanActiveBundles() {
Linked.clear();
RecentPositive.clear();
for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
- nodes[n].update(nodes);
+ nodes[n].update(nodes, Threshold);
// A node that must spill, or a node without any links is not going to
// change its value ever again, so exclude it from iterations.
if (nodes[n].mustSpill())
@@ -330,7 +322,7 @@ void SpillPlacement::iterate() {
// First update the recently positive nodes. They have likely received new
// negative bias that will turn them off.
while (!RecentPositive.empty())
- nodes[RecentPositive.pop_back_val()].update(nodes);
+ nodes[RecentPositive.pop_back_val()].update(nodes, Threshold);
if (Linked.empty())
return;
@@ -349,7 +341,7 @@ void SpillPlacement::iterate() {
iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()),
E = Linked.rend(); I != E; ++I) {
unsigned n = *I;
- if (nodes[n].update(nodes)) {
+ if (nodes[n].update(nodes, Threshold)) {
Changed = true;
if (nodes[n].preferReg())
RecentPositive.push_back(n);
@@ -363,7 +355,7 @@ void SpillPlacement::iterate() {
for (SmallVectorImpl<unsigned>::const_iterator I =
std::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
unsigned n = *I;
- if (nodes[n].update(nodes)) {
+ if (nodes[n].update(nodes, Threshold)) {
Changed = true;
if (nodes[n].preferReg())
RecentPositive.push_back(n);
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 43fc7f5..622361e 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -24,8 +24,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
-#define LLVM_CODEGEN_SPILLPLACEMENT_H
+#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
@@ -40,7 +40,7 @@ class MachineBasicBlock;
class MachineLoopInfo;
class MachineBlockFrequencyInfo;
-class SpillPlacement : public MachineFunctionPass {
+class SpillPlacement : public MachineFunctionPass {
struct Node;
const MachineFunction *MF;
const EdgeBundles *bundles;
@@ -60,7 +60,11 @@ class SpillPlacement : public MachineFunctionPass {
SmallVector<unsigned, 8> RecentPositive;
// Block frequencies are computed once. Indexed by block number.
- SmallVector<BlockFrequency, 4> BlockFrequencies;
+ SmallVector<BlockFrequency, 8> BlockFrequencies;
+
+ /// Decision threshold. A node gets the output value 0 if the weighted sum of
+ /// its inputs falls in the open interval (-Threshold;Threshold).
+ BlockFrequency Threshold;
public:
static char ID; // Pass identification, replacement for typeid.
@@ -152,6 +156,7 @@ private:
void releaseMemory() override;
void activate(unsigned);
+ void setThreshold(const BlockFrequency &Entry);
};
} // end namespace llvm
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
deleted file mode 100644
index 0649448..0000000
--- a/lib/CodeGen/Spiller.cpp
+++ /dev/null
@@ -1,184 +0,0 @@
-//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Spiller.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "spiller"
-
-namespace {
- enum SpillerName { trivial, inline_ };
-}
-
-static cl::opt<SpillerName>
-spillerOpt("spiller",
- cl::desc("Spiller to use: (default: standard)"),
- cl::Prefix,
- cl::values(clEnumVal(trivial, "trivial spiller"),
- clEnumValN(inline_, "inline", "inline spiller"),
- clEnumValEnd),
- cl::init(trivial));
-
-// Spiller virtual destructor implementation.
-Spiller::~Spiller() {}
-
-namespace {
-
-/// Utility class for spillers.
-class SpillerBase : public Spiller {
-protected:
- MachineFunctionPass *pass;
- MachineFunction *mf;
- VirtRegMap *vrm;
- LiveIntervals *lis;
- MachineFrameInfo *mfi;
- MachineRegisterInfo *mri;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
-
- /// Construct a spiller base.
- SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
- : pass(&pass), mf(&mf), vrm(&vrm)
- {
- lis = &pass.getAnalysis<LiveIntervals>();
- mfi = mf.getFrameInfo();
- mri = &mf.getRegInfo();
- tii = mf.getTarget().getInstrInfo();
- tri = mf.getTarget().getRegisterInfo();
- }
-
- /// Add spill ranges for every use/def of the live interval, inserting loads
- /// immediately before each use, and stores after each def. No folding or
- /// remat is attempted.
- void trivialSpillEverywhere(LiveRangeEdit& LRE) {
- LiveInterval* li = &LRE.getParent();
-
- DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
-
- assert(li->weight != llvm::huge_valf &&
- "Attempting to spill already spilled value.");
-
- assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
- "Trying to spill a stack slot.");
-
- DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
-
- const TargetRegisterClass *trc = mri->getRegClass(li->reg);
- unsigned ss = vrm->assignVirt2StackSlot(li->reg);
-
- // Iterate over reg uses/defs.
- for (MachineRegisterInfo::reg_instr_iterator
- regItr = mri->reg_instr_begin(li->reg);
- regItr != mri->reg_instr_end();) {
-
- // Grab the use/def instr.
- MachineInstr *mi = &*regItr;
-
- DEBUG(dbgs() << " Processing " << *mi);
-
- // Step regItr to the next use/def instr.
- ++regItr;
-
- // Collect uses & defs for this instr.
- SmallVector<unsigned, 2> indices;
- bool hasUse = false;
- bool hasDef = false;
- for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
- MachineOperand &op = mi->getOperand(i);
- if (!op.isReg() || op.getReg() != li->reg)
- continue;
- hasUse |= mi->getOperand(i).isUse();
- hasDef |= mi->getOperand(i).isDef();
- indices.push_back(i);
- }
-
- // Create a new virtual register for the load and/or store.
- unsigned NewVReg = LRE.create();
-
- // Update the reg operands & kill flags.
- for (unsigned i = 0; i < indices.size(); ++i) {
- unsigned mopIdx = indices[i];
- MachineOperand &mop = mi->getOperand(mopIdx);
- mop.setReg(NewVReg);
- if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
- mop.setIsKill(true);
- }
- }
- assert(hasUse || hasDef);
-
- // Insert reload if necessary.
- MachineBasicBlock::iterator miItr(mi);
- if (hasUse) {
- MachineInstrSpan MIS(miItr);
-
- tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc,
- tri);
- lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr);
- }
-
- // Insert store if necessary.
- if (hasDef) {
- MachineInstrSpan MIS(miItr);
-
- tii->storeRegToStackSlot(*mi->getParent(), std::next(miItr), NewVReg,
- true, ss, trc, tri);
- lis->InsertMachineInstrRangeInMaps(std::next(miItr), MIS.end());
- }
- }
- }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// Spills any live range using the spill-everywhere method with no attempt at
-/// folding.
-class TrivialSpiller : public SpillerBase {
-public:
-
- TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : SpillerBase(pass, mf, vrm) {}
-
- void spill(LiveRangeEdit &LRE) override {
- // Ignore spillIs - we don't use it.
- trivialSpillEverywhere(LRE);
- }
-};
-
-} // end anonymous namespace
-
-void Spiller::anchor() { }
-
-llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm) {
- switch (spillerOpt) {
- case trivial: return new TrivialSpiller(pass, mf, vrm);
- case inline_: return createInlineSpiller(pass, mf, vrm);
- }
- llvm_unreachable("Invalid spiller optimization");
-}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index b7d5bea..08f99ec 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SPILLER_H
-#define LLVM_CODEGEN_SPILLER_H
+#ifndef LLVM_LIB_CODEGEN_SPILLER_H
+#define LLVM_LIB_CODEGEN_SPILLER_H
namespace llvm {
@@ -31,11 +31,6 @@ namespace llvm {
};
- /// Create and return a spiller object, as specified on the command line.
- Spiller* createSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm);
-
/// Create and return a spiller that will insert spill code directly instead
/// of deferring though VirtRegMap.
Spiller *createInlineSpiller(MachineFunctionPass &pass,
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 7d4f568..ea7b914 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -40,16 +40,11 @@ STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
// Split Analysis
//===----------------------------------------------------------------------===//
-SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
- const LiveIntervals &lis,
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
const MachineLoopInfo &mli)
- : MF(vrm.getMachineFunction()),
- VRM(vrm),
- LIS(lis),
- Loops(mli),
- TII(*MF.getTarget().getInstrInfo()),
- CurLI(nullptr),
- LastSplitPoint(MF.getNumBlockIDs()) {}
+ : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
+ TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr),
+ LastSplitPoint(MF.getNumBlockIDs()) {}
void SplitAnalysis::clear() {
UseSlots.clear();
@@ -321,22 +316,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa,
- LiveIntervals &lis,
- VirtRegMap &vrm,
+SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
MachineDominatorTree &mdt,
MachineBlockFrequencyInfo &mbfi)
- : SA(sa), LIS(lis), VRM(vrm),
- MRI(vrm.getMachineFunction().getRegInfo()),
- MDT(mdt),
- TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
- TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
- MBFI(mbfi),
- Edit(nullptr),
- OpenIdx(0),
- SpillMode(SM_Partition),
- RegAssign(Allocator)
-{}
+ : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()),
+ MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition),
+ RegAssign(Allocator) {}
void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
Edit = &LRE;
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 7048ee3..2e60c14 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_SPLITKIT_H
-#define LLVM_CODEGEN_SPLITKIT_H
+#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H
+#define LLVM_LIB_CODEGEN_SPLITKIT_H
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 370430c..dcf1b44 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -228,7 +228,7 @@ void StackColoring::dump() const {
unsigned StackColoring::collectMarkers(unsigned NumSlot) {
unsigned MarkersFound = 0;
// Scan the function to find all lifetime markers.
- // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
+ // NOTE: We use a reverse-post-order iteration to ensure that we obtain a
// deterministic numbering, and because we'll need a post-order iteration
// later for solving the liveness dataflow problem.
for (MachineBasicBlock *MBB : depth_first(MF)) {
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 3ba502f..c2ee87a 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/StackMapLivenessAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -67,7 +67,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
<< _MF.getName() << " **********\n");
MF = &_MF;
- TRI = MF->getTarget().getRegisterInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
++NumStackMapFuncVisited;
// Skip this function if there are no patchpoints to process.
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 1473fc1..d3791c3 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -24,6 +24,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <iterator>
using namespace llvm;
@@ -83,7 +84,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
switch (MOI->getImm()) {
default: llvm_unreachable("Unrecognized operand type.");
case StackMaps::DirectMemRefOp: {
- unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
+ unsigned Size =
+ AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
unsigned Reg = (++MOI)->getReg();
@@ -122,7 +124,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC =
- AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg());
+ AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass(
+ MOI->getReg());
assert(!MOI->getSubReg() && "Physical subreg still around.");
Locs.push_back(
Location(Location::Register, RC->getSize(), MOI->getReg(), 0));
@@ -158,7 +161,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
StackMaps::LiveOutVec
StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
assert(Mask && "No register mask specified");
- const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
LiveOutVec LiveOuts;
// Create a LiveOutReg for each bit that is set in the register mask.
@@ -217,9 +220,18 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
I != E; ++I) {
// Constants are encoded as sign-extended integers.
// -1 is directly encoded as .long 0xFFFFFFFF with no constant pool.
- if (I->LocType == Location::Constant &&
- ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) {
+ if (I->LocType == Location::Constant && !isInt<32>(I->Offset)) {
I->LocType = Location::ConstantIndex;
+ // ConstPool is intentionally a MapVector of 'uint64_t's (as
+ // opposed to 'int64_t's). We should never be in a situation
+ // where we have to insert either the tombstone or the empty
+ // keys into a map, and for a DenseMap<uint64_t, T> these are
+ // (uint64_t)0 and (uint64_t)-1. They can be and are
+ // represented using 32 bit integers.
+
+ assert((uint64_t)I->Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
+ (uint64_t)I->Offset != DenseMapInfo<uint64_t>::getTombstoneKey() &&
+ "empty and tombstone keys should fit in 32 bits!");
auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset));
I->Offset = Result.first - ConstPool.begin();
}
@@ -232,12 +244,16 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext),
OutContext);
- CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts));
+ CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
+ std::move(LiveOuts));
// Record the stack size of the current function.
const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
+ const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
+ const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+ RegInfo->needsStackRealignment(*(AP.MF));
FnStackSize[AP.CurrentFnSym] =
- MFI->hasVarSizedObjects() ? UINT64_MAX : MFI->getStackSize();
+ DynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
}
void StackMaps::recordStackMap(const MachineInstr &MI) {
@@ -485,7 +501,7 @@ void StackMaps::serializeToStackMapSection() {
MCContext &OutContext = AP.OutStreamer.getContext();
MCStreamer &OS = AP.OutStreamer;
- const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
// Create the section.
const MCSection *StackMapSection =
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index accfe7b..45f97ac 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cstdlib>
using namespace llvm;
@@ -85,7 +86,7 @@ bool StackProtector::runOnFunction(Function &Fn) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- TLI = TM->getTargetLowering();
+ TLI = TM->getSubtargetImpl()->getTargetLowering();
Attribute Attr = Fn.getAttributes().getAttribute(
AttributeSet::FunctionIndex, "stack-protector-buffer-size");
@@ -168,7 +169,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
} else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
// Keep track of what PHI nodes we have already visited to ensure
// they are only visited once.
- if (VisitedPHIs.insert(PN))
+ if (VisitedPHIs.insert(PN).second)
if (HasAddressTaken(PN))
return true;
} else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
@@ -479,12 +480,12 @@ BasicBlock *StackProtector::CreateFailBB() {
if (Trip.getOS() == llvm::Triple::OpenBSD) {
Constant *StackChkFail = M->getOrInsertFunction(
"__stack_smash_handler", Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context), NULL);
+ Type::getInt8PtrTy(Context), nullptr);
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
Constant *StackChkFail = M->getOrInsertFunction(
- "__stack_chk_fail", Type::getVoidTy(Context), NULL);
+ "__stack_chk_fail", Type::getVoidTy(Context), nullptr);
B.CreateCall(StackChkFail);
}
B.CreateUnreachable();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 791168f..cc72e5e 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -28,7 +28,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <vector>
using namespace llvm;
@@ -422,7 +422,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
});
MFI = MF.getFrameInfo();
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
LS = &getAnalysis<LiveStacks>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 723a629..4377236 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "tailduplication"
@@ -135,8 +136,8 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
if (skipOptnoneFunction(*MF.getFunction()))
return false;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
@@ -798,11 +799,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
RS->enterBasicBlock(PredBB);
if (!PredBB->empty())
RS->forward(std::prev(PredBB->end()));
- BitVector RegsLiveAtExit(TRI->getNumRegs());
- RS->getRegsUsed(RegsLiveAtExit, false);
for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
E = TailBB->livein_end(); I != E; ++I) {
- if (!RegsLiveAtExit[*I])
+ if (!RS->isRegUsed(*I, false))
// If a register is previously livein to the tail but it's not live
// at the end of predecessor BB, then it should be added to its
// livein list.
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 883e9d1..1557d10 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -14,8 +14,8 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cstdlib>
using namespace llvm;
@@ -26,7 +26,7 @@ TargetFrameLowering::~TargetFrameLowering() {
/// the stack frame of the specified index. This is the default implementation
/// which is overridden for some targets.
int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+ int FI) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getObjectOffset(FI) + MFI->getStackSize() -
getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
@@ -34,7 +34,7 @@ int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg) const {
- const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
// By default, assume all frame indices are referenced via whatever
// getFrameRegister() says. The target can override this if it's doing
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 83966bd0..ab45f89 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -290,13 +290,15 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
Offset = 0;
return true;
}
- unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx);
+ unsigned BitSize =
+ TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx);
// Convert bit size to byte size to be consistent with
// MCRegisterClass::getSize().
if (BitSize % 8)
return false;
- int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
+ int BitOffset =
+ TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
if (BitOffset < 0 || BitOffset % 8)
return false;
@@ -305,7 +307,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
- if (!TM->getDataLayout()->isLittleEndian()) {
+ if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) {
Offset = RC->getSize() - (Offset + Size);
}
return true;
@@ -370,6 +372,10 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
return nullptr;
}
+void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ llvm_unreachable("Not a MachO target");
+}
+
bool TargetInstrInfo::
canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const {
@@ -498,7 +504,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const MachineOperand &MO = MI->getOperand(1-Ops[0]);
MachineBasicBlock::iterator Pos = MI;
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (Flags == MachineMemOperand::MOStore)
storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
@@ -562,8 +568,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
AliasAnalysis *AA) const {
const MachineFunction &MF = *MI->getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetMachine &TM = MF.getTarget();
- const TargetInstrInfo &TII = *TM.getInstrInfo();
// Remat clients assume operand 0 is the defined register.
if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
@@ -582,7 +586,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
// redundant with subsequent checks, but it's target-independent,
// simple, and a common case.
int FrameIdx = 0;
- if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+ if (isLoadFromStackSlot(MI, FrameIdx) &&
MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
return true;
@@ -655,8 +659,8 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// saves compile time, because it doesn't require every single
// stack slot reference to depend on the instruction that does the
// modification.
- const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
return true;
@@ -746,14 +750,14 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
}
/// Return the default expected latency for a def based on it's opcode.
-unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel,
const MachineInstr *DefMI) const {
if (DefMI->isTransient())
return 0;
if (DefMI->mayLoad())
- return SchedModel->LoadLatency;
+ return SchedModel.LoadLatency;
if (isHighLatencyDef(DefMI->getOpcode()))
- return SchedModel->HighLatency;
+ return SchedModel.HighLatency;
return 1;
}
@@ -852,3 +856,77 @@ computeOperandLatency(const InstrItineraryData *ItinData,
defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
+
+bool TargetInstrInfo::getRegSequenceInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+ assert((MI.isRegSequence() ||
+ MI.isRegSequenceLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isRegSequence())
+ return getRegSequenceLikeInputs(MI, DefIdx, InputRegs);
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ assert(DefIdx == 0 && "REG_SEQUENCE only has one def");
+ for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
+ OpIdx += 2) {
+ const MachineOperand &MOReg = MI.getOperand(OpIdx);
+ const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ // Record Reg:SubReg, SubIdx.
+ InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(),
+ (unsigned)MOSubIdx.getImm()));
+ }
+ return true;
+}
+
+bool TargetInstrInfo::getExtractSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const {
+ assert((MI.isExtractSubreg() ||
+ MI.isExtractSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isExtractSubreg())
+ return getExtractSubregLikeInputs(MI, DefIdx, InputReg);
+
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0.sub1, sub0.
+ assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def");
+ const MachineOperand &MOReg = MI.getOperand(1);
+ const MachineOperand &MOSubIdx = MI.getOperand(2);
+ assert(MOSubIdx.isImm() &&
+ "The subindex of the extract_subreg is not an immediate");
+
+ InputReg.Reg = MOReg.getReg();
+ InputReg.SubReg = MOReg.getSubReg();
+ InputReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
+
+bool TargetInstrInfo::getInsertSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const {
+ assert((MI.isInsertSubreg() ||
+ MI.isInsertSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isInsertSubreg())
+ return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg);
+
+ // We are looking at:
+ // Def = INSERT_SEQUENCE v0, v1, sub0.
+ assert(DefIdx == 0 && "INSERT_SUBREG only has one def");
+ const MachineOperand &MOBaseReg = MI.getOperand(1);
+ const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ const MachineOperand &MOSubIdx = MI.getOperand(3);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ BaseReg.Reg = MOBaseReg.getReg();
+ BaseReg.SubReg = MOBaseReg.getSubReg();
+
+ InsertedReg.Reg = MOInsertedReg.getReg();
+ InsertedReg.SubReg = MOInsertedReg.getSubReg();
+ InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index c574fd4..e833fd3 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -34,6 +34,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
@@ -205,6 +206,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FLOOR_F80] = "floorl";
Names[RTLIB::FLOOR_F128] = "floorl";
Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::FMIN_F32] = "fminf";
+ Names[RTLIB::FMIN_F64] = "fmin";
+ Names[RTLIB::FMIN_F80] = "fminl";
+ Names[RTLIB::FMIN_F128] = "fminl";
+ Names[RTLIB::FMIN_PPCF128] = "fminl";
+ Names[RTLIB::FMAX_F32] = "fmaxf";
+ Names[RTLIB::FMAX_F64] = "fmax";
+ Names[RTLIB::FMAX_F80] = "fmaxl";
+ Names[RTLIB::FMAX_F128] = "fmaxl";
+ Names[RTLIB::FMAX_PPCF128] = "fmaxl";
Names[RTLIB::ROUND_F32] = "roundf";
Names[RTLIB::ROUND_F64] = "round";
Names[RTLIB::ROUND_F80] = "roundl";
@@ -220,6 +231,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2";
+ Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2";
+ Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2";
+ Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2";
Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
@@ -418,7 +433,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::f32)
+ return FPEXT_F16_F32;
+ } else if (OpVT == MVT::f32) {
if (RetVT == MVT::f64)
return FPEXT_F32_F64;
if (RetVT == MVT::f128)
@@ -434,7 +452,18 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
/// getFPROUND - Return the FPROUND_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
- if (RetVT == MVT::f32) {
+ if (RetVT == MVT::f16) {
+ if (OpVT == MVT::f32)
+ return FPROUND_F32_F16;
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F16;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F16;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F16;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F16;
+ } else if (RetVT == MVT::f32) {
if (OpVT == MVT::f64)
return FPROUND_F64_F32;
if (OpVT == MVT::f80)
@@ -665,10 +694,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
CCs[RTLIB::O_F128] = ISD::SETEQ;
}
-/// NOTE: The constructor takes ownership of TLOF.
-TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
- const TargetLoweringObjectFile *tlof)
- : TM(tm), DL(TM.getDataLayout()), TLOF(*tlof) {
+/// NOTE: The TargetMachine owns TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
+ : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) {
initActions();
// Perform these initializations only once.
@@ -682,10 +710,11 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
IntDivIsCheap = false;
- Pow2DivIsCheap = false;
+ Pow2SDivIsCheap = false;
JumpIsExpensive = false;
PredictableSelectIsExpensive = false;
MaskAndBranchFoldingIsLegal = false;
+ HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
@@ -700,7 +729,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
PrefLoopAlignment = 0;
MinStackArgumentAlignment = 1;
InsertFencesForAtomic = false;
- SupportJumpTables = true;
MinimumJumpTableEntries = 4;
InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple()));
@@ -708,10 +736,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
InitLibcallCallingConvs(LibcallCallingConvs);
}
-TargetLoweringBase::~TargetLoweringBase() {
- delete &TLOF;
-}
-
void TargetLoweringBase::initActions() {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
@@ -738,6 +762,8 @@ void TargetLoweringBase::initActions() {
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMINNUM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMAXNUM, (MVT::SimpleValueType)VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand);
@@ -774,6 +800,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
setOperationAction(ISD::FCEIL, MVT::f16, Expand);
setOperationAction(ISD::FRINT, MVT::f16, Expand);
@@ -785,6 +813,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f32, Expand);
setOperationAction(ISD::FEXP2, MVT::f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
setOperationAction(ISD::FCEIL, MVT::f32, Expand);
setOperationAction(ISD::FRINT, MVT::f32, Expand);
@@ -796,6 +826,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
setOperationAction(ISD::FRINT, MVT::f64, Expand);
@@ -807,6 +839,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , MVT::f128, Expand);
setOperationAction(ISD::FEXP2, MVT::f128, Expand);
setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f128, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f128, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
setOperationAction(ISD::FCEIL, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
@@ -958,11 +992,10 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
// Add a new memory operand for this FI.
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- TM.getDataLayout()->getPointerSize(),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSize(),
+ MFI.getObjectAlignment(FI));
MIB->addMemOperand(MF, MMO);
// Replace the instruction and update the operand index.
@@ -978,7 +1011,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
/// of the register class for the specified type and its associated "cost".
std::pair<const TargetRegisterClass*, uint8_t>
TargetLoweringBase::findRepresentativeClass(MVT VT) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
if (!RC)
return std::make_pair(RC, 0);
@@ -1005,8 +1039,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const {
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLoweringBase::computeRegisterProperties() {
- assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
- "Too many value types for ValueTypeActions to hold!");
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
// Everything defaults to needing one register.
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
@@ -1089,6 +1123,13 @@ void TargetLoweringBase::computeRegisterProperties() {
}
}
+ if (!isTypeLegal(MVT::f16)) {
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
+ TransformToType[MVT::f16] = MVT::i16;
+ ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
+ }
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1154,8 +1195,12 @@ void TargetLoweringBase::computeRegisterProperties() {
TransformToType[i] = MVT::Other;
if (PreferredAction == TypeScalarizeVector)
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
- else
+ else if (PreferredAction == TypeSplitVector)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ else
+ // Set type action according to the number of elements.
+ ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector
+ : TypeSplitVector);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 03f4a51..efd15e1 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
using namespace dwarf;
@@ -72,9 +73,10 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Flags,
SectionKind::getDataRel(),
0, Label->getName());
- unsigned Size = TM.getDataLayout()->getPointerSize();
+ unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(
+ TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::Create(Size, getContext());
Streamer.EmitELFSize(Label, E);
@@ -287,7 +289,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
unsigned Align =
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
+ TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
+ cast<GlobalVariable>(GV));
const char *SizeSpec = ".rodata.str1.";
if (Kind.isMergeable2ByteCString())
@@ -338,8 +341,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
/// getSectionForConstant - Given a mergeable constant with the
/// specified size and relocation information, return a section that it
/// should be placed in.
-const MCSection *TargetLoweringObjectFileELF::
-getSectionForConstant(SectionKind Kind) const {
+const MCSection *
+TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
+ const Constant *C) const {
if (Kind.isMergeableConst4() && MergeableConst4Section)
return MergeableConst4Section;
if (Kind.isMergeableConst8() && MergeableConst8Section)
@@ -354,44 +358,59 @@ getSectionForConstant(SectionKind Kind) const {
return DataRelROSection;
}
-const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym) const {
- // The default scheme is .ctor / .dtor, so we have to invert the priority
- // numbering.
- if (Priority == 65535)
- return StaticCtorSection;
+static const MCSectionELF *getStaticStructorSection(MCContext &Ctx,
+ bool UseInitArray,
+ bool IsCtor,
+ unsigned Priority,
+ const MCSymbol *KeySym) {
+ std::string Name;
+ unsigned Type;
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
+ SectionKind Kind = SectionKind::getDataRel();
+ StringRef COMDAT = KeySym ? KeySym->getName() : "";
+
+ if (KeySym)
+ Flags |= ELF::SHF_GROUP;
if (UseInitArray) {
- std::string Name = std::string(".init_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ if (IsCtor) {
+ Type = ELF::SHT_INIT_ARRAY;
+ Name = ".init_array";
+ } else {
+ Type = ELF::SHT_FINI_ARRAY;
+ Name = ".fini_array";
+ }
+ if (Priority != 65535) {
+ Name += '.';
+ Name += utostr(Priority);
+ }
} else {
- std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (IsCtor)
+ Name = ".ctors";
+ else
+ Name = ".dtors";
+ if (Priority != 65535) {
+ Name += '.';
+ Name += utostr(65535 - Priority);
+ }
+ Type = ELF::SHT_PROGBITS;
}
+
+ return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT);
}
-const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- // The default scheme is .ctor / .dtor, so we have to invert the priority
- // numbering.
- if (Priority == 65535)
- return StaticDtorSection;
+ return getStaticStructorSection(getContext(), UseInitArray, true, Priority,
+ KeySym);
+}
- if (UseInitArray) {
- std::string Name = std::string(".fini_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
- } else {
- std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
- }
+const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getStaticStructorSection(getContext(), UseInitArray, false, Priority,
+ KeySym);
}
void
@@ -565,10 +584,29 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols(
if (SMO.getKind().isMergeable1ByteCString())
return false;
+ if (SMO.getSegmentName() == "__TEXT" &&
+ SMO.getSectionName() == "__objc_classname" &&
+ SMO.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
+ if (SMO.getSegmentName() == "__TEXT" &&
+ SMO.getSectionName() == "__objc_methname" &&
+ SMO.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
+ if (SMO.getSegmentName() == "__TEXT" &&
+ SMO.getSectionName() == "__objc_methtype" &&
+ SMO.getType() == MachO::S_CSTRING_LITERALS)
+ return false;
+
if (SMO.getSegmentName() == "__DATA" &&
SMO.getSectionName() == "__cfstring")
return false;
+ // no_dead_strip sections are not atomized in practice.
+ if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
+ return false;
+
switch (SMO.getType()) {
default:
return true;
@@ -610,17 +648,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
- TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
return UStringSection;
- if (Kind.isMergeableConst()) {
+ // With MachO only variables whose corresponding symbol starts with 'l' or
+ // 'L' can be merged, so we only try merging GVs with private linkage.
+ if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) {
if (Kind.isMergeableConst4())
return FourByteConstantSection;
if (Kind.isMergeableConst8())
@@ -654,7 +696,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
}
const MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind,
+ const Constant *C) const {
// If this constant requires a relocation, we have to put it in the data
// segment, not in the text segment.
if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
@@ -737,7 +780,7 @@ getCOFFSectionFlags(SectionKind K) {
COFF::IMAGE_SCN_MEM_EXECUTE |
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_CNT_CODE;
- else if (K.isBSS ())
+ else if (K.isBSS())
Flags |=
COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
@@ -747,7 +790,7 @@ getCOFFSectionFlags(SectionKind K) {
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE;
- else if (K.isReadOnly())
+ else if (K.isReadOnly() || K.isReadOnlyWithRel())
Flags |=
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ;
@@ -772,7 +815,7 @@ static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) {
if (ComdatGV->getComdat() != C)
report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
- "' is not a key for it's COMDAT.");
+ "' is not a key for its COMDAT.");
return ComdatGV;
}
@@ -841,9 +884,9 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
return ".bss";
if (Kind.isThreadLocal())
return ".tls$";
- if (Kind.isWriteable())
- return ".data";
- return ".rdata";
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
+ return ".rdata";
+ return ".data";
}
@@ -891,7 +934,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isThreadLocal())
return TLSDataSection;
- if (Kind.isReadOnly())
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
return ReadOnlySection;
// Note: we claim that common symbols are put in BSSSection, but they are
@@ -958,29 +1001,14 @@ emitModuleFlags(MCStreamer &Streamer,
}
}
-static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
- const MCSection *Sec,
- const MCSymbol *KeySym) {
- // Return the normal section if we don't have to be associative.
- if (!KeySym)
- return Sec;
-
- // Make an associative section with the same name and kind as the normal
- // section.
- const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec);
- unsigned Characteristics =
- SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT;
- return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics,
- SecCOFF->getKind(), KeySym->getName(),
- COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
-}
-
const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym);
+ return getContext().getAssociativeCOFFSection(
+ cast<MCSectionCOFF>(StaticCtorSection), KeySym);
}
const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym);
+ return getContext().getAssociativeCOFFSection(
+ cast<MCSectionCOFF>(StaticDtorSection), KeySym);
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 3ca2017..618d903 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -51,3 +51,10 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const {
StringRef TargetOptions::getTrapFunctionName() const {
return TrapFuncName;
}
+
+/// getCFIFuncName - If this returns a non-empty string, then it is the name of
+/// the function that gets called on CFI violations in CFI non-enforcing mode
+/// (!TargetOptions::CFIEnforcing).
+StringRef TargetOptions::getCFIFuncName() const {
+ return CFIFuncName;
+}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index a3a4fb3..61a66b6 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -108,7 +109,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
/// register of the given type, picking the most sub register class of
/// the right type that contains this physreg.
const TargetRegisterClass *
-TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
assert(isPhysicalRegister(reg) && "reg must be a physical register");
// Pick the most sub register class of the right type that contains
@@ -293,3 +294,11 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
// All clear, tell the register allocator to prefer this register.
Hints.push_back(Phys);
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void
+TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
+ const TargetRegisterInfo *TRI) {
+ dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
+}
+#endif
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index b0f2ca6..ef2dab1 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -16,7 +16,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -157,7 +156,7 @@ unsigned TargetSchedModel::computeOperandLatency(
const MachineInstr *UseMI, unsigned UseOperIdx) const {
if (!hasInstrSchedModel() && !hasInstrItineraries())
- return TII->defaultDefLatency(&SchedModel, DefMI);
+ return TII->defaultDefLatency(SchedModel, DefMI);
if (hasInstrItineraries()) {
int OperLatency = 0;
@@ -181,7 +180,7 @@ unsigned TargetSchedModel::computeOperandLatency(
// applicable to the InstrItins model. InstrSchedModel should model all
// special cases without TII hooks.
InstrLatency = std::max(InstrLatency,
- TII->defaultDefLatency(&SchedModel, DefMI));
+ TII->defaultDefLatency(SchedModel, DefMI));
return InstrLatency;
}
// hasInstrSchedModel()
@@ -222,7 +221,29 @@ unsigned TargetSchedModel::computeOperandLatency(
// FIXME: Automatically giving all implicit defs defaultDefLatency is
// undesirable. We should only do it for defs that are known to the MC
// desc like flags. Truly implicit defs should get 1 cycle latency.
- return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI);
+}
+
+unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
+ assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
+
+ unsigned SCIdx = TII->get(Opcode).getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
+ unsigned Latency = 0;
+
+ if (SCDesc->isValid() && !SCDesc->isVariant()) {
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, capLatency(WLEntry->Cycles));
+ }
+ return Latency;
+ }
+
+ assert(Latency && "No MI sched latency");
+ return 0;
}
unsigned
@@ -248,7 +269,7 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
return Latency;
}
}
- return TII->defaultDefLatency(&SchedModel, MI);
+ return TII->defaultDefLatency(SchedModel, MI);
}
unsigned TargetSchedModel::
@@ -268,7 +289,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
// for predicated defs.
unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
const MachineFunction &MF = *DefMI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
return computeInstrLatency(DefMI);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index f42d47b..e218a83 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -48,6 +48,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "twoaddrinstr"
@@ -544,10 +545,21 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
if (ToRegA) {
unsigned FromRegB = getMappedReg(regB, SrcRegMap);
unsigned FromRegC = getMappedReg(regC, SrcRegMap);
- bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI);
- bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI);
- if (BComp != CComp)
- return !BComp && CComp;
+ bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI);
+
+ // Compute if any of the following are true:
+ // -RegB is not tied to a register and RegC is compatible with RegA.
+ // -RegB is tied to the wrong physical register, but RegC is.
+ // -RegB is tied to the wrong physical register, and RegC isn't tied.
+ if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC)))
+ return true;
+ // Don't compute if any of the following are true:
+ // -RegC is not tied to a register and RegB is compatible with RegA.
+ // -RegC is tied to the wrong physical register, but RegB is.
+ // -RegC is tied to the wrong physical register, and RegB isn't tied.
+ if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB)))
+ return false;
}
// If there is a use of regC between its last def (could be livein) and this
@@ -666,7 +678,7 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) {
unsigned Reg = DstReg;
while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
NewReg, IsDstPhys)) {
- if (IsCopy && !Processed.insert(UseMI))
+ if (IsCopy && !Processed.insert(UseMI).second)
break;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
@@ -1503,9 +1515,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const TargetMachine &TM = MF->getTarget();
MRI = &MF->getRegInfo();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
- InstrItins = TM.getInstrItineraryData();
+ TII = TM.getSubtargetImpl()->getInstrInfo();
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ InstrItins = TM.getSubtargetImpl()->getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 2e22082..7824f92 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -64,9 +64,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
SmallPtrSet<BasicBlock*, 8> Reachable;
// Mark all reachable blocks.
- for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
- df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
- /* Mark all reachable blocks */;
+ for (BasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
// Loop over all dead blocks, remembering them and deleting all instructions
// in them.
@@ -125,10 +124,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
// Mark all reachable blocks.
- for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
- I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
- I != E; ++I)
- /* Mark all reachable blocks */;
+ for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
// Loop over all dead blocks, remembering them and deleting all instructions
// in them.
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 704736f..0d17d43 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -36,6 +36,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
@@ -54,8 +55,8 @@ INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
- TII = mf.getTarget().getInstrInfo();
- TRI = mf.getTarget().getRegisterInfo();
+ TII = mf.getSubtarget().getInstrInfo();
+ TRI = mf.getSubtarget().getRegisterInfo();
MF = &mf;
Virt2PhysMap.clear();
@@ -123,7 +124,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
OS << '[' << PrintReg(Reg, TRI) << " -> "
<< PrintReg(Virt2PhysMap[Reg], TRI) << "] "
- << MRI->getRegClass(Reg)->getName() << "\n";
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
}
}
@@ -131,7 +132,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
- << "] " << MRI->getRegClass(Reg)->getName() << "\n";
+ << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
}
}
OS << '\n';
@@ -205,8 +206,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
TM = &MF->getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
MRI = &MF->getRegInfo();
Indexes = &getAnalysis<SlotIndexes>();
LIS = &getAnalysis<LiveIntervals>();
diff --git a/lib/DebugInfo/Android.mk b/lib/DebugInfo/Android.mk
index 12dfb3b..e777e9c 100644
--- a/lib/DebugInfo/Android.mk
+++ b/lib/DebugInfo/Android.mk
@@ -3,6 +3,7 @@ LOCAL_PATH:= $(call my-dir)
debuginfo_SRC_FILES := \
DIContext.cpp \
DWARFAbbreviationDeclaration.cpp \
+ DWARFAcceleratorTable.cpp \
DWARFCompileUnit.cpp \
DWARFContext.cpp \
DWARFDebugAbbrev.cpp \
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index 61a3fb0..81fc84d 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMDebugInfo
DIContext.cpp
DWARFAbbreviationDeclaration.cpp
+ DWARFAcceleratorTable.cpp
DWARFCompileUnit.cpp
DWARFContext.cpp
DWARFDebugAbbrev.cpp
diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp
index 49a4409..01aecf8 100644
--- a/lib/DebugInfo/DIContext.cpp
+++ b/lib/DebugInfo/DIContext.cpp
@@ -13,6 +13,6 @@ using namespace llvm;
DIContext::~DIContext() {}
-DIContext *DIContext::getDWARFContext(object::ObjectFile *Obj) {
+DIContext *DIContext::getDWARFContext(const object::ObjectFile &Obj) {
return new DWARFContextInMemory(Obj);
}
diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
index b86b9ec..bb05c30 100644
--- a/lib/DebugInfo/DWARFAbbreviationDeclaration.h
+++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
-#define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
+#define LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataExtractor.h"
diff --git a/lib/DebugInfo/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARFAcceleratorTable.cpp
new file mode 100644
index 0000000..703274d
--- /dev/null
+++ b/lib/DebugInfo/DWARFAcceleratorTable.cpp
@@ -0,0 +1,133 @@
+//===--- DWARFAcceleratorTable.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFAcceleratorTable.h"
+
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+bool DWARFAcceleratorTable::extract() {
+ uint32_t Offset = 0;
+
+ // Check that we can at least read the header.
+ if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength)+4))
+ return false;
+
+ Hdr.Magic = AccelSection.getU32(&Offset);
+ Hdr.Version = AccelSection.getU16(&Offset);
+ Hdr.HashFunction = AccelSection.getU16(&Offset);
+ Hdr.NumBuckets = AccelSection.getU32(&Offset);
+ Hdr.NumHashes = AccelSection.getU32(&Offset);
+ Hdr.HeaderDataLength = AccelSection.getU32(&Offset);
+
+ // Check that we can read all the hashes and offsets from the
+ // section (see SourceLevelDebugging.rst for the structure of the index).
+ if (!AccelSection.isValidOffset(sizeof(Hdr) + Hdr.HeaderDataLength +
+ Hdr.NumBuckets*4 + Hdr.NumHashes*8))
+ return false;
+
+ HdrData.DIEOffsetBase = AccelSection.getU32(&Offset);
+ uint32_t NumAtoms = AccelSection.getU32(&Offset);
+
+ for (unsigned i = 0; i < NumAtoms; ++i) {
+ uint16_t AtomType = AccelSection.getU16(&Offset);
+ uint16_t AtomForm = AccelSection.getU16(&Offset);
+ HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm));
+ }
+
+ return true;
+}
+
+void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
+ // Dump the header.
+ OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n'
+ << "Version = " << format("0x%04x", Hdr.Version) << '\n'
+ << "Hash function = " << format("0x%08x", Hdr.HashFunction) << '\n'
+ << "Bucket count = " << Hdr.NumBuckets << '\n'
+ << "Hashes count = " << Hdr.NumHashes << '\n'
+ << "HeaderData length = " << Hdr.HeaderDataLength << '\n'
+ << "DIE offset base = " << HdrData.DIEOffsetBase << '\n'
+ << "Number of atoms = " << HdrData.Atoms.size() << '\n';
+
+ unsigned i = 0;
+ SmallVector<DWARFFormValue, 3> AtomForms;
+ for (const auto &Atom: HdrData.Atoms) {
+ OS << format("Atom[%d] Type: ", i++);
+ if (const char *TypeString = dwarf::AtomTypeString(Atom.first))
+ OS << TypeString;
+ else
+ OS << format("DW_ATOM_Unknown_0x%x", Atom.first);
+ OS << " Form: ";
+ if (const char *FormString = dwarf::FormEncodingString(Atom.second))
+ OS << FormString;
+ else
+ OS << format("DW_FORM_Unknown_0x%x", Atom.second);
+ OS << '\n';
+ AtomForms.push_back(DWARFFormValue(Atom.second));
+ }
+
+ // Now go through the actual tables and dump them.
+ uint32_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength;
+ unsigned HashesBase = Offset + Hdr.NumBuckets * 4;
+ unsigned OffsetsBase = HashesBase + Hdr.NumHashes * 4;
+
+ for (unsigned Bucket = 0; Bucket < Hdr.NumBuckets; ++Bucket) {
+ unsigned Index = AccelSection.getU32(&Offset);
+
+ OS << format("Bucket[%d]\n", Bucket);
+ if (Index == UINT32_MAX) {
+ OS << " EMPTY\n";
+ continue;
+ }
+
+ for (unsigned HashIdx = Index; HashIdx < Hdr.NumHashes; ++HashIdx) {
+ unsigned HashOffset = HashesBase + HashIdx*4;
+ unsigned OffsetsOffset = OffsetsBase + HashIdx*4;
+ uint32_t Hash = AccelSection.getU32(&HashOffset);
+
+ if (Hash % Hdr.NumBuckets != Bucket)
+ break;
+
+ unsigned DataOffset = AccelSection.getU32(&OffsetsOffset);
+ OS << format(" Hash = 0x%08x Offset = 0x%08x\n", Hash, DataOffset);
+ if (!AccelSection.isValidOffset(DataOffset)) {
+ OS << " Invalid section offset\n";
+ continue;
+ }
+ while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) {
+ unsigned StringOffset = AccelSection.getU32(&DataOffset);
+ RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4);
+ if (Reloc != Relocs.end())
+ StringOffset += Reloc->second.second;
+ if (!StringOffset)
+ break;
+ OS << format(" Name: %08x \"%s\"\n", StringOffset,
+ StringSection.getCStr(&StringOffset));
+ unsigned NumData = AccelSection.getU32(&DataOffset);
+ for (unsigned Data = 0; Data < NumData; ++Data) {
+ OS << format(" Data[%d] => ", Data);
+ unsigned i = 0;
+ for (auto &Atom : AtomForms) {
+ OS << format("{Atom[%d]: ", i++);
+ if (Atom.extractValue(AccelSection, &DataOffset, nullptr))
+ Atom.dump(OS, nullptr);
+ else
+ OS << "Error extracting the value";
+ OS << "} ";
+ }
+ OS << '\n';
+ }
+ }
+ }
+ }
+}
+}
diff --git a/lib/DebugInfo/DWARFAcceleratorTable.h b/lib/DebugInfo/DWARFAcceleratorTable.h
new file mode 100644
index 0000000..7dc9591
--- /dev/null
+++ b/lib/DebugInfo/DWARFAcceleratorTable.h
@@ -0,0 +1,51 @@
+//===--- DWARFAcceleratorTable.h --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFRelocMap.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
+
+#include <cstdint>
+
+namespace llvm {
+
+class DWARFAcceleratorTable {
+
+ struct Header {
+ uint32_t Magic;
+ uint16_t Version;
+ uint16_t HashFunction;
+ uint32_t NumBuckets;
+ uint32_t NumHashes;
+ uint32_t HeaderDataLength;
+ };
+
+ struct HeaderData {
+ typedef uint16_t AtomType;
+ typedef uint16_t Form;
+ uint32_t DIEOffsetBase;
+ SmallVector<std::pair<AtomType, Form>, 3> Atoms;
+ };
+
+ struct Header Hdr;
+ struct HeaderData HdrData;
+ DataExtractor AccelSection;
+ DataExtractor StringSection;
+ const RelocAddrMap& Relocs;
+public:
+ DWARFAcceleratorTable(DataExtractor AccelSection, DataExtractor StringSection,
+ const RelocAddrMap &Relocs)
+ : AccelSection(AccelSection), StringSection(StringSection), Relocs(Relocs) {}
+
+ bool extract();
+ void dump(raw_ostream &OS) const;
+};
+
+}
diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h
index 2ed188e..b3190b18 100644
--- a/lib/DebugInfo/DWARFCompileUnit.h
+++ b/lib/DebugInfo/DWARFCompileUnit.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
-#define LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFCOMPILEUNIT_H
+#define LLVM_LIB_DEBUGINFO_DWARFCOMPILEUNIT_H
#include "DWARFUnit.h"
@@ -16,10 +16,11 @@ namespace llvm {
class DWARFCompileUnit : public DWARFUnit {
public:
- DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
- StringRef SS, StringRef SOS, StringRef AOS,
- const RelocAddrMap *M, bool LE)
- : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {}
+ DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
+ const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
+ StringRef SOS, StringRef AOS, bool LE,
+ const DWARFUnitSectionBase &UnitSection)
+ : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {}
void dump(raw_ostream &OS);
// VTable anchor.
~DWARFCompileUnit() override;
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 3961905..9a2c7cc 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -9,6 +9,7 @@
#include "DWARFContext.h"
#include "DWARFDebugArangeSet.h"
+#include "DWARFAcceleratorTable.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
@@ -59,6 +60,18 @@ static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data,
}
}
+static void dumpAccelSection(raw_ostream &OS, StringRef Name,
+ const DWARFSection& Section, StringRef StringSection,
+ bool LittleEndian) {
+ DataExtractor AccelSection(Section.Data, LittleEndian, 0);
+ DataExtractor StrData(StringSection, LittleEndian, 0);
+ OS << "\n." << Name << " contents:\n";
+ DWARFAcceleratorTable Accel(AccelSection, StrData, Section.Relocs);
+ if (!Accel.extract())
+ return;
+ Accel.dump(OS);
+}
+
void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) {
OS << ".debug_abbrev contents:\n";
@@ -86,15 +99,17 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
if ((DumpType == DIDT_All || DumpType == DIDT_Types) && getNumTypeUnits()) {
OS << "\n.debug_types contents:\n";
- for (const auto &TU : type_units())
- TU->dump(OS);
+ for (const auto &TUS : type_unit_sections())
+ for (const auto &TU : TUS)
+ TU->dump(OS);
}
if ((DumpType == DIDT_All || DumpType == DIDT_TypesDwo) &&
getNumDWOTypeUnits()) {
OS << "\n.debug_types.dwo contents:\n";
- for (const auto &DWOTU : dwo_type_units())
- DWOTU->dump(OS);
+ for (const auto &DWOTUS : dwo_type_unit_sections())
+ for (const auto &DWOTU : DWOTUS)
+ DWOTU->dump(OS);
}
if (DumpType == DIDT_All || DumpType == DIDT_Loc) {
@@ -216,6 +231,22 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
}
}
+
+ if (DumpType == DIDT_All || DumpType == DIDT_AppleNames)
+ dumpAccelSection(OS, "apple_names", getAppleNamesSection(),
+ getStringSection(), isLittleEndian());
+
+ if (DumpType == DIDT_All || DumpType == DIDT_AppleTypes)
+ dumpAccelSection(OS, "apple_types", getAppleTypesSection(),
+ getStringSection(), isLittleEndian());
+
+ if (DumpType == DIDT_All || DumpType == DIDT_AppleNamespaces)
+ dumpAccelSection(OS, "apple_namespaces", getAppleNamespacesSection(),
+ getStringSection(), isLittleEndian());
+
+ if (DumpType == DIDT_All || DumpType == DIDT_AppleObjC)
+ dumpAccelSection(OS, "apple_objc", getAppleObjCSection(),
+ getStringSection(), isLittleEndian());
}
const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
@@ -291,7 +322,7 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
}
const DWARFLineTable *
-DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
+DWARFContext::getLineTableForUnit(DWARFUnit *cu) {
if (!Line)
Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
@@ -312,110 +343,34 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
}
void DWARFContext::parseCompileUnits() {
- if (!CUs.empty())
- return;
- uint32_t offset = 0;
- const DataExtractor &DIData = DataExtractor(getInfoSection().Data,
- isLittleEndian(), 0);
- while (DIData.isValidOffset(offset)) {
- std::unique_ptr<DWARFCompileUnit> CU(new DWARFCompileUnit(
- getDebugAbbrev(), getInfoSection().Data, getRangeSection(),
- getStringSection(), StringRef(), getAddrSection(),
- &getInfoSection().Relocs, isLittleEndian()));
- if (!CU->extract(DIData, &offset)) {
- break;
- }
- CUs.push_back(std::move(CU));
- offset = CUs.back()->getNextUnitOffset();
- }
+ CUs.parse(*this, getInfoSection());
}
void DWARFContext::parseTypeUnits() {
if (!TUs.empty())
return;
for (const auto &I : getTypesSections()) {
- uint32_t offset = 0;
- const DataExtractor &DIData =
- DataExtractor(I.second.Data, isLittleEndian(), 0);
- while (DIData.isValidOffset(offset)) {
- std::unique_ptr<DWARFTypeUnit> TU(
- new DWARFTypeUnit(getDebugAbbrev(), I.second.Data, getRangeSection(),
- getStringSection(), StringRef(), getAddrSection(),
- &I.second.Relocs, isLittleEndian()));
- if (!TU->extract(DIData, &offset))
- break;
- TUs.push_back(std::move(TU));
- offset = TUs.back()->getNextUnitOffset();
- }
+ TUs.push_back(DWARFUnitSection<DWARFTypeUnit>());
+ TUs.back().parse(*this, I.second);
}
}
void DWARFContext::parseDWOCompileUnits() {
- if (!DWOCUs.empty())
- return;
- uint32_t offset = 0;
- const DataExtractor &DIData =
- DataExtractor(getInfoDWOSection().Data, isLittleEndian(), 0);
- while (DIData.isValidOffset(offset)) {
- std::unique_ptr<DWARFCompileUnit> DWOCU(new DWARFCompileUnit(
- getDebugAbbrevDWO(), getInfoDWOSection().Data, getRangeDWOSection(),
- getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(),
- &getInfoDWOSection().Relocs, isLittleEndian()));
- if (!DWOCU->extract(DIData, &offset)) {
- break;
- }
- DWOCUs.push_back(std::move(DWOCU));
- offset = DWOCUs.back()->getNextUnitOffset();
- }
+ DWOCUs.parseDWO(*this, getInfoDWOSection());
}
void DWARFContext::parseDWOTypeUnits() {
if (!DWOTUs.empty())
return;
for (const auto &I : getTypesDWOSections()) {
- uint32_t offset = 0;
- const DataExtractor &DIData =
- DataExtractor(I.second.Data, isLittleEndian(), 0);
- while (DIData.isValidOffset(offset)) {
- std::unique_ptr<DWARFTypeUnit> TU(new DWARFTypeUnit(
- getDebugAbbrevDWO(), I.second.Data, getRangeDWOSection(),
- getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(),
- &I.second.Relocs, isLittleEndian()));
- if (!TU->extract(DIData, &offset))
- break;
- DWOTUs.push_back(std::move(TU));
- offset = DWOTUs.back()->getNextUnitOffset();
- }
+ DWOTUs.push_back(DWARFUnitSection<DWARFTypeUnit>());
+ DWOTUs.back().parseDWO(*this, I.second);
}
}
-namespace {
- struct OffsetComparator {
-
- bool operator()(const std::unique_ptr<DWARFCompileUnit> &LHS,
- const std::unique_ptr<DWARFCompileUnit> &RHS) const {
- return LHS->getOffset() < RHS->getOffset();
- }
- bool operator()(const std::unique_ptr<DWARFCompileUnit> &LHS,
- uint32_t RHS) const {
- return LHS->getOffset() < RHS;
- }
- bool operator()(uint32_t LHS,
- const std::unique_ptr<DWARFCompileUnit> &RHS) const {
- return LHS < RHS->getOffset();
- }
- };
-}
-
DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
parseCompileUnits();
-
- std::unique_ptr<DWARFCompileUnit> *CU =
- std::lower_bound(CUs.begin(), CUs.end(), Offset, OffsetComparator());
- if (CU != CUs.end()) {
- return CU->get();
- }
- return nullptr;
+ return CUs.getUnitForOffset(Offset);
}
DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
@@ -425,47 +380,6 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
return getCompileUnitForOffset(CUOffset);
}
-static bool getFileNameForCompileUnit(DWARFCompileUnit *CU,
- const DWARFLineTable *LineTable,
- uint64_t FileIndex, FileLineInfoKind Kind,
- std::string &FileName) {
- if (!CU || !LineTable || Kind == FileLineInfoKind::None ||
- !LineTable->getFileNameByIndex(FileIndex, Kind, FileName))
- return false;
- if (Kind == FileLineInfoKind::AbsoluteFilePath &&
- sys::path::is_relative(FileName)) {
- // We may still need to append compilation directory of compile unit.
- SmallString<16> AbsolutePath;
- if (const char *CompilationDir = CU->getCompilationDir()) {
- sys::path::append(AbsolutePath, CompilationDir);
- }
- sys::path::append(AbsolutePath, FileName);
- FileName = AbsolutePath.str();
- }
- return true;
-}
-
-static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU,
- const DWARFLineTable *LineTable,
- uint64_t Address,
- FileLineInfoKind Kind,
- DILineInfo &Result) {
- if (!CU || !LineTable)
- return false;
- // Get the index of row we're looking for in the line table.
- uint32_t RowIndex = LineTable->lookupAddress(Address);
- if (RowIndex == -1U)
- return false;
- // Take file number and line/column from the row.
- const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex];
- if (!getFileNameForCompileUnit(CU, LineTable, Row.File, Kind,
- Result.FileName))
- return false;
- Result.Line = Row.Line;
- Result.Column = Row.Column;
- return true;
-}
-
static bool getFunctionNameForAddress(DWARFCompileUnit *CU, uint64_t Address,
FunctionNameKind Kind,
std::string &FunctionName) {
@@ -496,8 +410,9 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
return Result;
getFunctionNameForAddress(CU, Address, Spec.FNKind, Result.FunctionName);
if (Spec.FLIKind != FileLineInfoKind::None) {
- const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
- getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, Result);
+ if (const DWARFLineTable *LineTable = getLineTableForUnit(CU))
+ LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
+ Spec.FLIKind, Result);
}
return Result;
}
@@ -522,7 +437,7 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
return Lines;
}
- const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
+ const DWARFLineTable *LineTable = getLineTableForUnit(CU);
// Get the index of row we're looking for in the line table.
std::vector<uint32_t> RowVector;
@@ -533,8 +448,8 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
// Take file number and line/column from the row.
const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex];
DILineInfo Result;
- getFileNameForCompileUnit(CU, LineTable, Row.File, Spec.FLIKind,
- Result.FileName);
+ LineTable->getFileNameByIndex(Row.File, CU->getCompilationDir(),
+ Spec.FLIKind, Result.FileName);
Result.FunctionName = FunctionName;
Result.Line = Row.Line;
Result.Column = Row.Column;
@@ -561,11 +476,11 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
// try to at least get file/line info from symbol table.
if (Spec.FLIKind != FileLineInfoKind::None) {
DILineInfo Frame;
- LineTable = getLineTableForCompileUnit(CU);
- if (getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind,
- Frame)) {
+ LineTable = getLineTableForUnit(CU);
+ if (LineTable &&
+ LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
+ Spec.FLIKind, Frame))
InliningInfo.addFrame(Frame);
- }
}
return InliningInfo;
}
@@ -582,15 +497,17 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
if (i == 0) {
// For the topmost frame, initialize the line table of this
// compile unit and fetch file/line info from it.
- LineTable = getLineTableForCompileUnit(CU);
+ LineTable = getLineTableForUnit(CU);
// For the topmost routine, get file/line info from line table.
- getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind,
- Frame);
+ if (LineTable)
+ LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
+ Spec.FLIKind, Frame);
} else {
// Otherwise, use call file, call line and call column from
// previous DIE in inlined chain.
- getFileNameForCompileUnit(CU, LineTable, CallFile, Spec.FLIKind,
- Frame.FileName);
+ if (LineTable)
+ LineTable->getFileNameByIndex(CallFile, CU->getCompilationDir(),
+ Spec.FLIKind, Frame.FileName);
Frame.Line = CallLine;
Frame.Column = CallColumn;
}
@@ -621,12 +538,19 @@ static bool consumeCompressedDebugSectionHeader(StringRef &data,
return true;
}
-DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
- : IsLittleEndian(Obj->isLittleEndian()),
- AddressSize(Obj->getBytesInAddress()) {
- for (const SectionRef &Section : Obj->sections()) {
+DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj)
+ : IsLittleEndian(Obj.isLittleEndian()),
+ AddressSize(Obj.getBytesInAddress()) {
+ for (const SectionRef &Section : Obj.sections()) {
StringRef name;
Section.getName(name);
+ // Skip BSS and Virtual sections, they aren't interesting.
+ bool IsBSS = Section.isBSS();
+ if (IsBSS)
+ continue;
+ bool IsVirtual = Section.isVirtual();
+ if (IsVirtual)
+ continue;
StringRef data;
Section.getContents(data);
@@ -670,6 +594,11 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
.Case("debug_str.dwo", &StringDWOSection)
.Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
.Case("debug_addr", &AddrSection)
+ .Case("apple_names", &AppleNamesSection.Data)
+ .Case("apple_types", &AppleTypesSection.Data)
+ .Case("apple_namespaces", &AppleNamespacesSection.Data)
+ .Case("apple_namespac", &AppleNamespacesSection.Data)
+ .Case("apple_objc", &AppleObjCSection.Data)
// Any more debug info sections go here.
.Default(nullptr);
if (SectionData) {
@@ -687,7 +616,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
}
section_iterator RelocatedSection = Section.getRelocatedSection();
- if (RelocatedSection == Obj->section_end())
+ if (RelocatedSection == Obj.section_end())
continue;
StringRef RelSecName;
@@ -702,6 +631,11 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
.Case("debug_loc", &LocSection.Relocs)
.Case("debug_info.dwo", &InfoDWOSection.Relocs)
.Case("debug_line", &LineSection.Relocs)
+ .Case("apple_names", &AppleNamesSection.Relocs)
+ .Case("apple_types", &AppleTypesSection.Relocs)
+ .Case("apple_namespaces", &AppleNamespacesSection.Relocs)
+ .Case("apple_namespac", &AppleNamespacesSection.Relocs)
+ .Case("apple_objc", &AppleObjCSection.Relocs)
.Default(nullptr);
if (!Map) {
// Find debug_types relocs by section rather than name as there are
@@ -715,23 +649,19 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
}
if (Section.relocation_begin() != Section.relocation_end()) {
- uint64_t SectionSize;
- RelocatedSection->getSize(SectionSize);
+ uint64_t SectionSize = RelocatedSection->getSize();
for (const RelocationRef &Reloc : Section.relocations()) {
uint64_t Address;
Reloc.getOffset(Address);
uint64_t Type;
Reloc.getType(Type);
uint64_t SymAddr = 0;
- // ELF relocations may need the symbol address
- if (Obj->isELF()) {
- object::symbol_iterator Sym = Reloc.getSymbol();
+ object::symbol_iterator Sym = Reloc.getSymbol();
+ if (Sym != Obj.symbol_end())
Sym->getAddress(SymAddr);
- }
- object::RelocVisitor V(Obj->getFileFormatName());
- // The section address is always 0 for debug sections.
- object::RelocToApply R(V.visit(Type, Reloc, 0, SymAddr));
+ object::RelocVisitor V(Obj);
+ object::RelocToApply R(V.visit(Type, Reloc, SymAddr));
if (V.error()) {
SmallString<32> Name;
std::error_code ec(Reloc.getTypeName(Name));
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index 6d1ae92..dd3fcc7 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===/
-#ifndef LLVM_DEBUGINFO_DWARFCONTEXT_H
-#define LLVM_DEBUGINFO_DWARFCONTEXT_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFCONTEXT_H
+#define LLVM_LIB_DEBUGINFO_DWARFCONTEXT_H
#include "DWARFCompileUnit.h"
#include "DWARFDebugAranges.h"
@@ -16,10 +16,12 @@
#include "DWARFDebugLine.h"
#include "DWARFDebugLoc.h"
#include "DWARFDebugRangeList.h"
+#include "DWARFSection.h"
#include "DWARFTypeUnit.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DIContext.h"
+#include <vector>
namespace llvm {
@@ -28,19 +30,17 @@ namespace llvm {
/// information parsing. The actual data is supplied through pure virtual
/// methods that a concrete implementation provides.
class DWARFContext : public DIContext {
- typedef SmallVector<std::unique_ptr<DWARFCompileUnit>, 1> CUVector;
- typedef SmallVector<std::unique_ptr<DWARFTypeUnit>, 1> TUVector;
- CUVector CUs;
- TUVector TUs;
+ DWARFUnitSection<DWARFCompileUnit> CUs;
+ std::vector<DWARFUnitSection<DWARFTypeUnit>> TUs;
std::unique_ptr<DWARFDebugAbbrev> Abbrev;
std::unique_ptr<DWARFDebugLoc> Loc;
std::unique_ptr<DWARFDebugAranges> Aranges;
std::unique_ptr<DWARFDebugLine> Line;
std::unique_ptr<DWARFDebugFrame> DebugFrame;
- CUVector DWOCUs;
- TUVector DWOTUs;
+ DWARFUnitSection<DWARFCompileUnit> DWOCUs;
+ std::vector<DWARFUnitSection<DWARFTypeUnit>> DWOTUs;
std::unique_ptr<DWARFDebugAbbrev> AbbrevDWO;
std::unique_ptr<DWARFDebugLocDWO> LocDWO;
@@ -64,11 +64,6 @@ class DWARFContext : public DIContext {
void parseDWOTypeUnits();
public:
- struct Section {
- StringRef Data;
- RelocAddrMap Relocs;
- };
-
DWARFContext() : DIContext(CK_DWARF) {}
static bool classof(const DIContext *DICtx) {
@@ -77,8 +72,9 @@ public:
void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All) override;
- typedef iterator_range<CUVector::iterator> cu_iterator_range;
- typedef iterator_range<TUVector::iterator> tu_iterator_range;
+ typedef DWARFUnitSection<DWARFCompileUnit>::iterator_range cu_iterator_range;
+ typedef DWARFUnitSection<DWARFTypeUnit>::iterator_range tu_iterator_range;
+ typedef iterator_range<std::vector<DWARFUnitSection<DWARFTypeUnit>>::iterator> tu_section_iterator_range;
/// Get compile units in this context.
cu_iterator_range compile_units() {
@@ -87,9 +83,9 @@ public:
}
/// Get type units in this context.
- tu_iterator_range type_units() {
+ tu_section_iterator_range type_unit_sections() {
parseTypeUnits();
- return tu_iterator_range(TUs.begin(), TUs.end());
+ return tu_section_iterator_range(TUs.begin(), TUs.end());
}
/// Get compile units in the DWO context.
@@ -99,9 +95,9 @@ public:
}
/// Get type units in the DWO context.
- tu_iterator_range dwo_type_units() {
+ tu_section_iterator_range dwo_type_unit_sections() {
parseDWOTypeUnits();
- return tu_iterator_range(DWOTUs.begin(), DWOTUs.end());
+ return tu_section_iterator_range(DWOTUs.begin(), DWOTUs.end());
}
/// Get the number of compile units in this context.
@@ -159,8 +155,7 @@ public:
const DWARFDebugFrame *getDebugFrame();
/// Get a pointer to a parsed line table corresponding to a compile unit.
- const DWARFDebugLine::LineTable *
- getLineTableForCompileUnit(DWARFCompileUnit *cu);
+ const DWARFDebugLine::LineTable *getLineTableForUnit(DWARFUnit *cu);
DILineInfo getLineInfoForAddress(uint64_t Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
@@ -171,17 +166,15 @@ public:
virtual bool isLittleEndian() const = 0;
virtual uint8_t getAddressSize() const = 0;
- virtual const Section &getInfoSection() = 0;
- typedef MapVector<object::SectionRef, Section,
- std::map<object::SectionRef, unsigned> > TypeSectionMap;
+ virtual const DWARFSection &getInfoSection() = 0;
+ typedef MapVector<object::SectionRef, DWARFSection,
+ std::map<object::SectionRef, unsigned>> TypeSectionMap;
virtual const TypeSectionMap &getTypesSections() = 0;
virtual StringRef getAbbrevSection() = 0;
- virtual const Section &getLocSection() = 0;
- virtual const Section &getLocDWOSection() = 0;
+ virtual const DWARFSection &getLocSection() = 0;
virtual StringRef getARangeSection() = 0;
virtual StringRef getDebugFrameSection() = 0;
- virtual const Section &getLineSection() = 0;
- virtual const Section &getLineDWOSection() = 0;
+ virtual const DWARFSection &getLineSection() = 0;
virtual StringRef getStringSection() = 0;
virtual StringRef getRangeSection() = 0;
virtual StringRef getPubNamesSection() = 0;
@@ -190,13 +183,19 @@ public:
virtual StringRef getGnuPubTypesSection() = 0;
// Sections for DWARF5 split dwarf proposal.
- virtual const Section &getInfoDWOSection() = 0;
+ virtual const DWARFSection &getInfoDWOSection() = 0;
virtual const TypeSectionMap &getTypesDWOSections() = 0;
virtual StringRef getAbbrevDWOSection() = 0;
+ virtual const DWARFSection &getLineDWOSection() = 0;
+ virtual const DWARFSection &getLocDWOSection() = 0;
virtual StringRef getStringDWOSection() = 0;
virtual StringRef getStringOffsetDWOSection() = 0;
virtual StringRef getRangeDWOSection() = 0;
virtual StringRef getAddrSection() = 0;
+ virtual const DWARFSection& getAppleNamesSection() = 0;
+ virtual const DWARFSection& getAppleTypesSection() = 0;
+ virtual const DWARFSection& getAppleNamespacesSection() = 0;
+ virtual const DWARFSection& getAppleObjCSection() = 0;
static bool isSupportedVersion(unsigned version) {
return version == 2 || version == 3 || version == 4;
@@ -217,15 +216,13 @@ class DWARFContextInMemory : public DWARFContext {
virtual void anchor();
bool IsLittleEndian;
uint8_t AddressSize;
- Section InfoSection;
+ DWARFSection InfoSection;
TypeSectionMap TypesSections;
StringRef AbbrevSection;
- Section LocSection;
- Section LocDWOSection;
+ DWARFSection LocSection;
StringRef ARangeSection;
StringRef DebugFrameSection;
- Section LineSection;
- Section LineDWOSection;
+ DWARFSection LineSection;
StringRef StringSection;
StringRef RangeSection;
StringRef PubNamesSection;
@@ -234,42 +231,52 @@ class DWARFContextInMemory : public DWARFContext {
StringRef GnuPubTypesSection;
// Sections for DWARF5 split dwarf proposal.
- Section InfoDWOSection;
+ DWARFSection InfoDWOSection;
TypeSectionMap TypesDWOSections;
StringRef AbbrevDWOSection;
+ DWARFSection LineDWOSection;
+ DWARFSection LocDWOSection;
StringRef StringDWOSection;
StringRef StringOffsetDWOSection;
StringRef RangeDWOSection;
StringRef AddrSection;
+ DWARFSection AppleNamesSection;
+ DWARFSection AppleTypesSection;
+ DWARFSection AppleNamespacesSection;
+ DWARFSection AppleObjCSection;
SmallVector<SmallString<32>, 4> UncompressedSections;
public:
- DWARFContextInMemory(object::ObjectFile *);
+ DWARFContextInMemory(const object::ObjectFile &Obj);
bool isLittleEndian() const override { return IsLittleEndian; }
uint8_t getAddressSize() const override { return AddressSize; }
- const Section &getInfoSection() override { return InfoSection; }
+ const DWARFSection &getInfoSection() override { return InfoSection; }
const TypeSectionMap &getTypesSections() override { return TypesSections; }
StringRef getAbbrevSection() override { return AbbrevSection; }
- const Section &getLocSection() override { return LocSection; }
- const Section &getLocDWOSection() override { return LocDWOSection; }
+ const DWARFSection &getLocSection() override { return LocSection; }
StringRef getARangeSection() override { return ARangeSection; }
StringRef getDebugFrameSection() override { return DebugFrameSection; }
- const Section &getLineSection() override { return LineSection; }
- const Section &getLineDWOSection() override { return LineDWOSection; }
+ const DWARFSection &getLineSection() override { return LineSection; }
StringRef getStringSection() override { return StringSection; }
StringRef getRangeSection() override { return RangeSection; }
StringRef getPubNamesSection() override { return PubNamesSection; }
StringRef getPubTypesSection() override { return PubTypesSection; }
StringRef getGnuPubNamesSection() override { return GnuPubNamesSection; }
StringRef getGnuPubTypesSection() override { return GnuPubTypesSection; }
+ const DWARFSection& getAppleNamesSection() override { return AppleNamesSection; }
+ const DWARFSection& getAppleTypesSection() override { return AppleTypesSection; }
+ const DWARFSection& getAppleNamespacesSection() override { return AppleNamespacesSection; }
+ const DWARFSection& getAppleObjCSection() override { return AppleObjCSection; }
// Sections for DWARF5 split dwarf proposal.
- const Section &getInfoDWOSection() override { return InfoDWOSection; }
+ const DWARFSection &getInfoDWOSection() override { return InfoDWOSection; }
const TypeSectionMap &getTypesDWOSections() override {
return TypesDWOSections;
}
StringRef getAbbrevDWOSection() override { return AbbrevDWOSection; }
+ const DWARFSection &getLineDWOSection() override { return LineDWOSection; }
+ const DWARFSection &getLocDWOSection() override { return LocDWOSection; }
StringRef getStringDWOSection() override { return StringDWOSection; }
StringRef getStringOffsetDWOSection() override {
return StringOffsetDWOSection;
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp
index 8426bf9..c1a088e 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp
@@ -30,7 +30,6 @@ bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data,
DWARFAbbreviationDeclaration AbbrDecl;
uint32_t PrevAbbrCode = 0;
while (AbbrDecl.extract(Data, OffsetPtr)) {
- Decls.push_back(AbbrDecl);
if (FirstAbbrCode == 0) {
FirstAbbrCode = AbbrDecl.getCode();
} else {
@@ -40,6 +39,7 @@ bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data,
}
}
PrevAbbrCode = AbbrDecl.getCode();
+ Decls.push_back(std::move(AbbrDecl));
}
return BeginOffset != *OffsetPtr;
}
@@ -82,7 +82,7 @@ void DWARFDebugAbbrev::extract(DataExtractor Data) {
uint32_t CUAbbrOffset = Offset;
if (!AbbrDecls.extract(Data, &Offset))
break;
- AbbrDeclSets[CUAbbrOffset] = AbbrDecls;
+ AbbrDeclSets[CUAbbrOffset] = std::move(AbbrDecls);
}
}
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h
index 3a9adba..4b3b814 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.h
+++ b/lib/DebugInfo/DWARFDebugAbbrev.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGABBREV_H
-#define LLVM_DEBUGINFO_DWARFDEBUGABBREV_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGABBREV_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGABBREV_H
#include "DWARFAbbreviationDeclaration.h"
#include <list>
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
index d6c2d8b..837a8e6 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.h
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H
-#define LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGESET_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGESET_H
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/DataExtractor.h"
diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h
index a9f37fe..791f010 100644
--- a/lib/DebugInfo/DWARFDebugAranges.h
+++ b/lib/DebugInfo/DWARFDebugAranges.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
-#define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGES_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGES_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/DataExtractor.h"
diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp
index a33548e..dfa7e82 100644
--- a/lib/DebugInfo/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARFDebugFrame.cpp
@@ -202,7 +202,8 @@ public:
SmallString<8> Augmentation, uint64_t CodeAlignmentFactor,
int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister)
: FrameEntry(FK_CIE, Offset, Length), Version(Version),
- Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor),
+ Augmentation(std::move(Augmentation)),
+ CodeAlignmentFactor(CodeAlignmentFactor),
DataAlignmentFactor(DataAlignmentFactor),
ReturnAddressRegister(ReturnAddressRegister) {}
diff --git a/lib/DebugInfo/DWARFDebugFrame.h b/lib/DebugInfo/DWARFDebugFrame.h
index bd4ef45..be925cb 100644
--- a/lib/DebugInfo/DWARFDebugFrame.h
+++ b/lib/DebugInfo/DWARFDebugFrame.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGFRAME_H
-#define LLVM_DEBUGINFO_DWARFDEBUGFRAME_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGFRAME_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGFRAME_H
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 2e7a54a..583e700 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -12,15 +12,26 @@
#include "DWARFContext.h"
#include "DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARFFormValue.h"
+#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace dwarf;
-typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
-void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u,
+// Small helper to extract a DIE pointed by a reference
+// attribute. It looks up the Unit containing the DIE and calls
+// DIE.extractFast with the right unit. Returns new unit on success,
+// nullptr otherwise.
+static const DWARFUnit *findUnitAndExtractFast(DWARFDebugInfoEntryMinimal &DIE,
+ const DWARFUnit *Unit,
+ uint32_t *Offset) {
+ Unit = Unit->getUnitSection().getUnitForOffset(*Offset);
+ return (Unit && DIE.extractFast(Unit, Offset)) ? Unit : nullptr;
+}
+
+void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, DWARFUnit *u,
unsigned recurseDepth,
unsigned indent) const {
DataExtractor debug_info_data = u->getDebugInfoExtractor();
@@ -62,12 +73,42 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u,
}
}
+static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) {
+ OS << " (";
+ do {
+ uint64_t Bit = 1ULL << countTrailingZeros(Val);
+ if (const char *PropName = ApplePropertyString(Bit))
+ OS << PropName;
+ else
+ OS << format("DW_APPLE_PROPERTY_0x%" PRIx64, Bit);
+ if (!(Val ^= Bit))
+ break;
+ OS << ", ";
+ } while (true);
+ OS << ")";
+}
+
+static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges,
+ unsigned AddressSize, unsigned Indent) {
+ if (Ranges.empty())
+ return;
+
+ for (const auto &Range: Ranges) {
+ OS << '\n';
+ OS.indent(Indent);
+ OS << format("[0x%0*" PRIx64 " - 0x%0*" PRIx64 ")",
+ AddressSize*2, Range.first,
+ AddressSize*2, Range.second);
+ }
+}
+
void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
- const DWARFUnit *u,
+ DWARFUnit *u,
uint32_t *offset_ptr,
uint16_t attr, uint16_t form,
unsigned indent) const {
- OS << " ";
+ const char BaseIndent[] = " ";
+ OS << BaseIndent;
OS.indent(indent+2);
const char *attrString = AttributeString(attr);
if (attrString)
@@ -86,7 +127,48 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
return;
OS << "\t(";
- formValue.dump(OS, u);
+
+ const char *Name = nullptr;
+ std::string File;
+ if (attr == DW_AT_decl_file || attr == DW_AT_call_file) {
+ if (const auto *LT = u->getContext().getLineTableForUnit(u))
+ if (LT->getFileNameByIndex(
+ formValue.getAsUnsignedConstant().getValue(),
+ u->getCompilationDir(),
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) {
+ File = '"' + File + '"';
+ Name = File.c_str();
+ }
+ } else if (Optional<uint64_t> Val = formValue.getAsUnsignedConstant())
+ Name = AttributeValueString(attr, *Val);
+
+ if (Name) {
+ OS << Name;
+ } else if (attr == DW_AT_decl_line || attr == DW_AT_call_line) {
+ OS << *formValue.getAsUnsignedConstant();
+ } else {
+ formValue.dump(OS, u);
+ }
+
+ // We have dumped the attribute raw value. For some attributes
+ // having both the raw value and the pretty-printed value is
+ // interesting. These attributes are handled below.
+ if ((attr == DW_AT_specification || attr == DW_AT_abstract_origin) &&
+ // The signature references aren't handled.
+ formValue.getForm() != DW_FORM_ref_sig8) {
+ uint32_t Ref = formValue.getAsReference(u).getValue();
+ DWARFDebugInfoEntryMinimal DIE;
+ if (const DWARFUnit *RefU = findUnitAndExtractFast(DIE, u, &Ref))
+ if (const char *Ref = DIE.getName(RefU, DINameKind::LinkageName))
+ OS << " \"" << Ref << '\"';
+ } else if (attr == DW_AT_APPLE_property_attribute) {
+ if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
+ dumpApplePropertyAttribute(OS, *OptVal);
+ } else if (attr == DW_AT_ranges) {
+ dumpRanges(OS, getAddressRanges(u), u->getAddressByteSize(),
+ sizeof(BaseIndent)+indent+4);
+ }
+
OS << ")\n";
}
@@ -284,11 +366,19 @@ bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress(
const char *
DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U,
- FunctionNameKind Kind) const {
- if (!isSubroutineDIE() || Kind == FunctionNameKind::None)
+ DINameKind Kind) const {
+ if (!isSubroutineDIE())
+ return nullptr;
+ return getName(U, Kind);
+}
+
+const char *
+DWARFDebugInfoEntryMinimal::getName(const DWARFUnit *U,
+ DINameKind Kind) const {
+ if (Kind == DINameKind::None)
return nullptr;
// Try to get mangled name only if it was asked for.
- if (Kind == FunctionNameKind::LinkageName) {
+ if (Kind == DINameKind::LinkageName) {
if (const char *name =
getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, nullptr))
return name;
@@ -303,8 +393,8 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U,
getAttributeValueAsReference(U, DW_AT_specification, -1U);
if (spec_ref != -1U) {
DWARFDebugInfoEntryMinimal spec_die;
- if (spec_die.extractFast(U, &spec_ref)) {
- if (const char *name = spec_die.getSubroutineName(U, Kind))
+ if (const DWARFUnit *RefU = findUnitAndExtractFast(spec_die, U, &spec_ref)) {
+ if (const char *name = spec_die.getName(RefU, Kind))
return name;
}
}
@@ -313,8 +403,9 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U,
getAttributeValueAsReference(U, DW_AT_abstract_origin, -1U);
if (abs_origin_ref != -1U) {
DWARFDebugInfoEntryMinimal abs_origin_die;
- if (abs_origin_die.extractFast(U, &abs_origin_ref)) {
- if (const char *name = abs_origin_die.getSubroutineName(U, Kind))
+ if (const DWARFUnit *RefU = findUnitAndExtractFast(abs_origin_die, U,
+ &abs_origin_ref)) {
+ if (const char *name = abs_origin_die.getName(RefU, Kind))
return name;
}
}
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index cc58eb6..7e7efb9 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
-#define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGINFOENTRY_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGINFOENTRY_H
#include "DWARFAbbreviationDeclaration.h"
#include "DWARFDebugRangeList.h"
@@ -38,9 +38,9 @@ public:
DWARFDebugInfoEntryMinimal()
: Offset(0), SiblingIdx(0), AbbrevDecl(nullptr) {}
- void dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth,
+ void dump(raw_ostream &OS, DWARFUnit *u, unsigned recurseDepth,
unsigned indent = 0) const;
- void dumpAttribute(raw_ostream &OS, const DWARFUnit *u, uint32_t *offset_ptr,
+ void dumpAttribute(raw_ostream &OS, DWARFUnit *u, uint32_t *offset_ptr,
uint16_t attr, uint16_t form, unsigned indent = 0) const;
/// Extracts a debug info entry, which is a child of a given unit,
@@ -125,9 +125,12 @@ public:
/// returns its mangled name (or short name, if mangled is missing).
/// This name may be fetched from specification or abstract origin
/// for this subprogram. Returns null if no name is found.
- const char *
- getSubroutineName(const DWARFUnit *U,
- DILineInfoSpecifier::FunctionNameKind Kind) const;
+ const char *getSubroutineName(const DWARFUnit *U, DINameKind Kind) const;
+
+ /// Return the DIE name resolving DW_AT_sepcification or
+ /// DW_AT_abstract_origin references if necessary.
+ /// Returns null if no name is found.
+ const char *getName(const DWARFUnit *U, DINameKind Kind) const;
/// Retrieves values of DW_AT_call_file, DW_AT_call_line and
/// DW_AT_call_column from DIE (or zeroes if they are missing).
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index ce87635..a6ee461 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -644,6 +644,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
bool
DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
+ const char *CompDir,
FileLineInfoKind Kind,
std::string &Result) const {
if (FileIndex == 0 || FileIndex > Prologue.FileNames.size() ||
@@ -656,15 +657,42 @@ DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
Result = FileName;
return true;
}
+
SmallString<16> FilePath;
uint64_t IncludeDirIndex = Entry.DirIdx;
+ const char *IncludeDir = "";
// Be defensive about the contents of Entry.
if (IncludeDirIndex > 0 &&
- IncludeDirIndex <= Prologue.IncludeDirectories.size()) {
- const char *IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1];
- sys::path::append(FilePath, IncludeDir);
- }
- sys::path::append(FilePath, FileName);
+ IncludeDirIndex <= Prologue.IncludeDirectories.size())
+ IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1];
+
+ // We may still need to append compilation directory of compile unit.
+ // We know that FileName is not absolute, the only way to have an
+ // absolute path at this point would be if IncludeDir is absolute.
+ if (CompDir && Kind == FileLineInfoKind::AbsoluteFilePath &&
+ sys::path::is_relative(IncludeDir))
+ sys::path::append(FilePath, CompDir);
+
+ // sys::path::append skips empty strings.
+ sys::path::append(FilePath, IncludeDir, FileName);
Result = FilePath.str();
return true;
}
+
+bool
+DWARFDebugLine::LineTable::getFileLineInfoForAddress(uint64_t Address,
+ const char *CompDir,
+ FileLineInfoKind Kind,
+ DILineInfo &Result) const {
+ // Get the index of row we're looking for in the line table.
+ uint32_t RowIndex = lookupAddress(Address);
+ if (RowIndex == -1U)
+ return false;
+ // Take file number and line/column from the row.
+ const auto &Row = Rows[RowIndex];
+ if (!getFileNameByIndex(Row.File, CompDir, Kind, Result.FileName))
+ return false;
+ Result.Line = Row.Line;
+ Result.Column = Row.Column;
+ return true;
+}
diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h
index c7b7ec2..7a6f1bd 100644
--- a/lib/DebugInfo/DWARFDebugLine.h
+++ b/lib/DebugInfo/DWARFDebugLine.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H
-#define LLVM_DEBUGINFO_DWARFDEBUGLINE_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGLINE_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGLINE_H
#include "DWARFRelocMap.h"
#include "llvm/DebugInfo/DIContext.h"
@@ -179,10 +179,16 @@ public:
// Extracts filename by its index in filename table in prologue.
// Returns true on success.
- bool getFileNameByIndex(uint64_t FileIndex,
+ bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
DILineInfoSpecifier::FileLineInfoKind Kind,
std::string &Result) const;
+ // Fills the Result argument with the file and line information
+ // corresponding to Address. Returns true on success.
+ bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir,
+ DILineInfoSpecifier::FileLineInfoKind Kind,
+ DILineInfo &Result) const;
+
void dump(raw_ostream &OS) const;
void clear();
diff --git a/lib/DebugInfo/DWARFDebugLoc.h b/lib/DebugInfo/DWARFDebugLoc.h
index 663acbb4..50110b3 100644
--- a/lib/DebugInfo/DWARFDebugLoc.h
+++ b/lib/DebugInfo/DWARFDebugLoc.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGLOC_H
-#define LLVM_DEBUGINFO_DWARFDEBUGLOC_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGLOC_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGLOC_H
#include "DWARFRelocMap.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/lib/DebugInfo/DWARFDebugRangeList.h b/lib/DebugInfo/DWARFDebugRangeList.h
index 587b550..4ee3bda 100644
--- a/lib/DebugInfo/DWARFDebugRangeList.h
+++ b/lib/DebugInfo/DWARFDebugRangeList.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H
-#define LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGRANGELIST_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGRANGELIST_H
#include "llvm/Support/DataExtractor.h"
#include <vector>
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index 8d0f966..69b9771 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -139,6 +139,8 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
switch (Form) {
case DW_FORM_addr:
case DW_FORM_ref_addr: {
+ if (!cu)
+ return false;
uint16_t AddrSize =
(Form == DW_FORM_addr)
? cu->getAddressByteSize()
@@ -179,8 +181,10 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
break;
case DW_FORM_data4:
case DW_FORM_ref4: {
- RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr);
Value.uval = data.getU32(offset_ptr);
+ if (!cu)
+ break;
+ RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4);
if (AI != cu->getRelocMap()->end())
Value.uval += AI->second.second;
break;
@@ -193,13 +197,12 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
Value.sval = data.getSLEB128(offset_ptr);
break;
case DW_FORM_strp: {
- RelocAddrMap::const_iterator AI
- = cu->getRelocMap()->find(*offset_ptr);
- if (AI != cu->getRelocMap()->end()) {
- const std::pair<uint8_t, int64_t> &R = AI->second;
- Value.uval = data.getU32(offset_ptr) + R.second;
- } else
- Value.uval = data.getU32(offset_ptr);
+ Value.uval = data.getU32(offset_ptr);
+ if (!cu)
+ break;
+ RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4);
+ if (AI != cu->getRelocMap()->end())
+ Value.uval += AI->second.second;
break;
}
case DW_FORM_udata:
@@ -215,13 +218,12 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
break;
case DW_FORM_sec_offset: {
// FIXME: This is 64-bit for DWARF64.
- RelocAddrMap::const_iterator AI
- = cu->getRelocMap()->find(*offset_ptr);
- if (AI != cu->getRelocMap()->end()) {
- const std::pair<uint8_t, int64_t> &R = AI->second;
- Value.uval = data.getU32(offset_ptr) + R.second;
- } else
- Value.uval = data.getU32(offset_ptr);
+ Value.uval = data.getU32(offset_ptr);
+ if (!cu)
+ break;
+ RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4);
+ if (AI != cu->getRelocMap()->end())
+ Value.uval += AI->second.second;
break;
}
case DW_FORM_flag_present:
@@ -360,8 +362,6 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
void
DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
- DataExtractor debug_str_data(cu->getStringSection(), true, 0);
- DataExtractor debug_str_offset_data(cu->getStringOffsetSection(), true, 0);
uint64_t uvalue = Value.uval;
bool cu_relative_offset = false;
@@ -543,7 +543,15 @@ Optional<uint64_t> DWARFFormValue::getAsSectionOffset() const {
}
Optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const {
- if (!isFormClass(FC_Constant) || Form == DW_FORM_sdata)
+ if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag))
+ || Form == DW_FORM_sdata)
return None;
return Value.uval;
}
+
+Optional<ArrayRef<uint8_t>> DWARFFormValue::getAsBlock() const {
+ if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc))
+ return None;
+ return ArrayRef<uint8_t>(Value.data, Value.uval);
+}
+
diff --git a/lib/DebugInfo/DWARFRelocMap.h b/lib/DebugInfo/DWARFRelocMap.h
index 6929e36..d7fe303 100644
--- a/lib/DebugInfo/DWARFRelocMap.h
+++ b/lib/DebugInfo/DWARFRelocMap.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFRELOCMAP_H
-#define LLVM_DEBUGINFO_DWARFRELOCMAP_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFRELOCMAP_H
+#define LLVM_LIB_DEBUGINFO_DWARFRELOCMAP_H
#include "llvm/ADT/DenseMap.h"
@@ -18,5 +18,5 @@ typedef DenseMap<uint64_t, std::pair<uint8_t, int64_t> > RelocAddrMap;
} // namespace llvm
-#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H
+#endif
diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/DebugInfo/DWARFSection.h
index 81b4978..3aaf0ff 100644
--- a/lib/CodeGen/MachineCodeEmitter.cpp
+++ b/lib/DebugInfo/DWARFSection.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
+//===-- DWARFSection.h ------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineCodeEmitter.h"
+#ifndef LLVM_LIB_DEBUGINFO_DWARFSECTION_H
+#define LLVM_LIB_DEBUGINFO_DWARFSECTION_H
-using namespace llvm;
+#include "DWARFRelocMap.h"
-void MachineCodeEmitter::anchor() { }
+namespace llvm {
+
+struct DWARFSection {
+ StringRef Data;
+ RelocAddrMap Relocs;
+};
+
+}
+
+#endif
diff --git a/lib/DebugInfo/DWARFTypeUnit.h b/lib/DebugInfo/DWARFTypeUnit.h
index cf773b8..7471b5a 100644
--- a/lib/DebugInfo/DWARFTypeUnit.h
+++ b/lib/DebugInfo/DWARFTypeUnit.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFTYPEUNIT_H
-#define LLVM_DEBUGINFO_DWARFTYPEUNIT_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFTYPEUNIT_H
+#define LLVM_LIB_DEBUGINFO_DWARFTYPEUNIT_H
#include "DWARFUnit.h"
@@ -19,10 +19,11 @@ private:
uint64_t TypeHash;
uint32_t TypeOffset;
public:
- DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
- StringRef SS, StringRef SOS, StringRef AOS,
- const RelocAddrMap *M, bool LE)
- : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {}
+ DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section,
+ const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
+ StringRef SOS, StringRef AOS, bool LE,
+ const DWARFUnitSectionBase &UnitSection)
+ : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {}
uint32_t getHeaderSize() const override {
return DWARFUnit::getHeaderSize() + 12;
}
diff --git a/lib/DebugInfo/DWARFUnit.cpp b/lib/DebugInfo/DWARFUnit.cpp
index 39d0a0f..82c4529 100644
--- a/lib/DebugInfo/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARFUnit.cpp
@@ -17,12 +17,26 @@
using namespace llvm;
using namespace dwarf;
-DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
- StringRef SS, StringRef SOS, StringRef AOS,
- const RelocAddrMap *M, bool LE)
- : Abbrev(DA), InfoSection(IS), RangeSection(RS), StringSection(SS),
- StringOffsetSection(SOS), AddrOffsetSection(AOS), RelocMap(M),
- isLittleEndian(LE) {
+void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
+ parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(),
+ C.getStringSection(), StringRef(), C.getAddrSection(),
+ C.isLittleEndian());
+}
+
+void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
+ const DWARFSection &DWOSection) {
+ parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(),
+ C.getStringDWOSection(), C.getStringOffsetDWOSection(),
+ C.getAddrSection(), C.isLittleEndian());
+}
+
+DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
+ const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
+ StringRef SOS, StringRef AOS, bool LE,
+ const DWARFUnitSectionBase &UnitSection)
+ : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
+ StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
+ isLittleEndian(LE), UnitSection(UnitSection) {
clear();
}
@@ -235,10 +249,14 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
return DieArray.size();
}
-DWARFUnit::DWOHolder::DWOHolder(object::ObjectFile *DWOFile)
- : DWOFile(DWOFile),
- DWOContext(cast<DWARFContext>(DIContext::getDWARFContext(DWOFile))),
- DWOU(nullptr) {
+DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath)
+ : DWOFile(), DWOContext(), DWOU(nullptr) {
+ auto Obj = object::ObjectFile::createObjectFile(DWOPath);
+ if (!Obj)
+ return;
+ DWOFile = std::move(Obj.get());
+ DWOContext.reset(
+ cast<DWARFContext>(DIContext::getDWARFContext(*DWOFile.getBinary())));
if (DWOContext->getNumDWOCompileUnits() > 0)
DWOU = DWOContext->getDWOCompileUnitAtIndex(0);
}
@@ -260,12 +278,7 @@ bool DWARFUnit::parseDWO() {
sys::path::append(AbsolutePath, CompilationDir);
}
sys::path::append(AbsolutePath, DWOFileName);
- ErrorOr<object::ObjectFile *> DWOFile =
- object::ObjectFile::createObjectFile(AbsolutePath);
- if (!DWOFile)
- return false;
- // Reset DWOHolder.
- DWO.reset(new DWOHolder(DWOFile.get()));
+ DWO = llvm::make_unique<DWOHolder>(AbsolutePath);
DWARFUnit *DWOCU = DWO->getUnit();
// Verify that compile unit in .dwo file is valid.
if (!DWOCU || DWOCU->getDWOId() != getDWOId()) {
diff --git a/lib/DebugInfo/DWARFUnit.h b/lib/DebugInfo/DWARFUnit.h
index 471da36..786f00f 100644
--- a/lib/DebugInfo/DWARFUnit.h
+++ b/lib/DebugInfo/DWARFUnit.h
@@ -7,13 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEBUGINFO_DWARFUNIT_H
-#define LLVM_DEBUGINFO_DWARFUNIT_H
+#ifndef LLVM_LIB_DEBUGINFO_DWARFUNIT_H
+#define LLVM_LIB_DEBUGINFO_DWARFUNIT_H
#include "DWARFDebugAbbrev.h"
#include "DWARFDebugInfoEntry.h"
#include "DWARFDebugRangeList.h"
#include "DWARFRelocMap.h"
+#include "DWARFSection.h"
#include <vector>
namespace llvm {
@@ -22,21 +23,96 @@ namespace object {
class ObjectFile;
}
+class DWARFContext;
class DWARFDebugAbbrev;
+class DWARFUnit;
class StringRef;
class raw_ostream;
+/// Base class for all DWARFUnitSection classes. This provides the
+/// functionality common to all unit types.
+class DWARFUnitSectionBase {
+public:
+ /// Returns the Unit that contains the given section offset in the
+ /// same section this Unit originated from.
+ virtual DWARFUnit *getUnitForOffset(uint32_t Offset) const = 0;
+
+ void parse(DWARFContext &C, const DWARFSection &Section);
+ void parseDWO(DWARFContext &C, const DWARFSection &DWOSection);
+
+protected:
+ virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section,
+ const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
+ StringRef SOS, StringRef AOS, bool isLittleEndian) = 0;
+
+ ~DWARFUnitSectionBase() {}
+};
+
+/// Concrete instance of DWARFUnitSection, specialized for one Unit type.
+template<typename UnitType>
+class DWARFUnitSection final : public SmallVector<std::unique_ptr<UnitType>, 1>,
+ public DWARFUnitSectionBase {
+
+ struct UnitOffsetComparator {
+ bool operator()(uint32_t LHS,
+ const std::unique_ptr<UnitType> &RHS) const {
+ return LHS < RHS->getNextUnitOffset();
+ }
+ };
+
+ bool Parsed;
+
+public:
+ DWARFUnitSection() : Parsed(false) {}
+ DWARFUnitSection(DWARFUnitSection &&DUS) :
+ SmallVector<std::unique_ptr<UnitType>, 1>(std::move(DUS)), Parsed(DUS.Parsed) {}
+
+ typedef llvm::SmallVectorImpl<std::unique_ptr<UnitType>> UnitVector;
+ typedef typename UnitVector::iterator iterator;
+ typedef llvm::iterator_range<typename UnitVector::iterator> iterator_range;
+
+ UnitType *getUnitForOffset(uint32_t Offset) const override {
+ auto *CU = std::upper_bound(this->begin(), this->end(), Offset,
+ UnitOffsetComparator());
+ if (CU != this->end())
+ return CU->get();
+ return nullptr;
+ }
+
+private:
+ void parseImpl(DWARFContext &Context, const DWARFSection &Section,
+ const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
+ StringRef SOS, StringRef AOS, bool LE) override {
+ if (Parsed)
+ return;
+ DataExtractor Data(Section.Data, LE, 0);
+ uint32_t Offset = 0;
+ while (Data.isValidOffset(Offset)) {
+ auto U = llvm::make_unique<UnitType>(Context, Section, DA, RS, SS, SOS,
+ AOS, LE, *this);
+ if (!U->extract(Data, &Offset))
+ break;
+ this->push_back(std::move(U));
+ Offset = this->back()->getNextUnitOffset();
+ }
+ Parsed = true;
+ }
+};
+
class DWARFUnit {
+ DWARFContext &Context;
+ // Section containing this DWARFUnit.
+ const DWARFSection &InfoSection;
+
const DWARFDebugAbbrev *Abbrev;
- StringRef InfoSection;
StringRef RangeSection;
uint32_t RangeSectionBase;
StringRef StringSection;
StringRef StringOffsetSection;
StringRef AddrOffsetSection;
uint32_t AddrOffsetSectionBase;
- const RelocAddrMap *RelocMap;
bool isLittleEndian;
+ const DWARFUnitSectionBase &UnitSection;
uint32_t Offset;
uint32_t Length;
@@ -48,11 +124,11 @@ class DWARFUnit {
std::vector<DWARFDebugInfoEntryMinimal> DieArray;
class DWOHolder {
- std::unique_ptr<object::ObjectFile> DWOFile;
+ object::OwningBinary<object::ObjectFile> DWOFile;
std::unique_ptr<DWARFContext> DWOContext;
DWARFUnit *DWOU;
public:
- DWOHolder(object::ObjectFile *DWOFile);
+ DWOHolder(StringRef DWOPath);
DWARFUnit *getUnit() const { return DWOU; }
};
std::unique_ptr<DWOHolder> DWO;
@@ -63,12 +139,15 @@ protected:
virtual uint32_t getHeaderSize() const { return 11; }
public:
- DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
- StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M,
- bool LE);
+ DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
+ const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
+ StringRef SOS, StringRef AOS, bool LE,
+ const DWARFUnitSectionBase &UnitSection);
virtual ~DWARFUnit();
+ DWARFContext& getContext() const { return Context; }
+
StringRef getStringSection() const { return StringSection; }
StringRef getStringOffsetSection() const { return StringOffsetSection; }
void setAddrOffsetSection(StringRef AOS, uint32_t Base) {
@@ -85,13 +164,13 @@ public:
bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const;
DataExtractor getDebugInfoExtractor() const {
- return DataExtractor(InfoSection, isLittleEndian, AddrSize);
+ return DataExtractor(InfoSection.Data, isLittleEndian, AddrSize);
}
DataExtractor getStringExtractor() const {
return DataExtractor(StringSection, false, 0);
}
- const RelocAddrMap *getRelocMap() const { return RelocMap; }
+ const RelocAddrMap *getRelocMap() const { return &InfoSection.Relocs; }
bool extract(DataExtractor debug_info, uint32_t* offset_ptr);
@@ -131,6 +210,9 @@ public:
/// chain is valid as long as parsed compile unit DIEs are not cleared.
DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address);
+ /// getUnitSection - Return the DWARFUnitSection containing this unit.
+ const DWARFUnitSectionBase &getUnitSection() const { return UnitSection; }
+
private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); }
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 3102c7b..fae5bb9 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -3,12 +3,12 @@
add_llvm_library(LLVMExecutionEngine
ExecutionEngine.cpp
ExecutionEngineBindings.cpp
+ JITEventListener.cpp
RTDyldMemoryManager.cpp
TargetSelect.cpp
)
add_subdirectory(Interpreter)
-add_subdirectory(JIT)
add_subdirectory(MCJIT)
add_subdirectory(RuntimeDyld)
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index b0e985d..5a6d656 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -24,6 +24,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -47,22 +48,13 @@ void ObjectCache::anchor() {}
void ObjectBuffer::anchor() {}
void ObjectBufferStream::anchor() {}
-ExecutionEngine *(*ExecutionEngine::JITCtor)(
- Module *M,
- std::string *ErrorStr,
- JITMemoryManager *JMM,
- bool GVsWithCode,
- TargetMachine *TM) = nullptr;
ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
- Module *M,
- std::string *ErrorStr,
- RTDyldMemoryManager *MCJMM,
- bool GVsWithCode,
- TargetMachine *TM) = nullptr;
-ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
+ std::unique_ptr<Module> M, std::string *ErrorStr,
+ RTDyldMemoryManager *MCJMM, std::unique_ptr<TargetMachine> TM) = nullptr;
+ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
std::string *ErrorStr) =nullptr;
-ExecutionEngine::ExecutionEngine(Module *M)
+ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
: EEState(*this),
LazyFunctionCreator(nullptr) {
CompilingLazily = false;
@@ -77,14 +69,12 @@ ExecutionEngine::ExecutionEngine(Module *M)
VerifyModules = false;
#endif
- Modules.push_back(M);
assert(M && "Module is null?");
+ Modules.push_back(std::move(M));
}
ExecutionEngine::~ExecutionEngine() {
clearAllGlobalMappings();
- for (unsigned i = 0, e = Modules.size(); i != e; ++i)
- delete Modules[i];
}
namespace {
@@ -101,8 +91,8 @@ public:
Type *ElTy = GV->getType()->getElementType();
size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
void *RawMemory = ::operator new(
- DataLayout::RoundUpAlignment(sizeof(GVMemoryBlock),
- TD.getPreferredAlignment(GV))
+ RoundUpToAlignment(sizeof(GVMemoryBlock),
+ TD.getPreferredAlignment(GV))
+ GVSize);
new(RawMemory) GVMemoryBlock(GV);
return static_cast<char*>(RawMemory) + sizeof(GVMemoryBlock);
@@ -126,11 +116,20 @@ void ExecutionEngine::addObjectFile(std::unique_ptr<object::ObjectFile> O) {
llvm_unreachable("ExecutionEngine subclass doesn't implement addObjectFile.");
}
+void
+ExecutionEngine::addObjectFile(object::OwningBinary<object::ObjectFile> O) {
+ llvm_unreachable("ExecutionEngine subclass doesn't implement addObjectFile.");
+}
+
+void ExecutionEngine::addArchive(object::OwningBinary<object::Archive> A) {
+ llvm_unreachable("ExecutionEngine subclass doesn't implement addArchive.");
+}
+
bool ExecutionEngine::removeModule(Module *M) {
- for(SmallVectorImpl<Module *>::iterator I = Modules.begin(),
- E = Modules.end(); I != E; ++I) {
- Module *Found = *I;
+ for (auto I = Modules.begin(), E = Modules.end(); I != E; ++I) {
+ Module *Found = I->get();
if (Found == M) {
+ I->release();
Modules.erase(I);
clearGlobalMappingsFromModule(M);
return true;
@@ -254,19 +253,9 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
namespace {
class ArgvArray {
- char *Array;
- std::vector<char*> Values;
+ std::unique_ptr<char[]> Array;
+ std::vector<std::unique_ptr<char[]>> Values;
public:
- ArgvArray() : Array(nullptr) {}
- ~ArgvArray() { clear(); }
- void clear() {
- delete[] Array;
- Array = nullptr;
- for (size_t I = 0, E = Values.size(); I != E; ++I) {
- delete[] Values[I];
- }
- Values.clear();
- }
/// Turn a vector of strings into a nice argv style array of pointers to null
/// terminated strings.
void *reset(LLVMContext &C, ExecutionEngine *EE,
@@ -275,38 +264,39 @@ public:
} // anonymous namespace
void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
const std::vector<std::string> &InputArgv) {
- clear(); // Free the old contents.
+ Values.clear(); // Free the old contents.
+ Values.reserve(InputArgv.size());
unsigned PtrSize = EE->getDataLayout()->getPointerSize();
- Array = new char[(InputArgv.size()+1)*PtrSize];
+ Array = make_unique<char[]>((InputArgv.size()+1)*PtrSize);
- DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
+ DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array.get() << "\n");
Type *SBytePtr = Type::getInt8PtrTy(C);
for (unsigned i = 0; i != InputArgv.size(); ++i) {
unsigned Size = InputArgv[i].size()+1;
- char *Dest = new char[Size];
- Values.push_back(Dest);
- DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
+ auto Dest = make_unique<char[]>(Size);
+ DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest.get() << "\n");
- std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
+ std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest.get());
Dest[Size-1] = 0;
// Endian safe: Array[i] = (PointerTy)Dest;
- EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize),
- SBytePtr);
+ EE->StoreValueToMemory(PTOGV(Dest.get()),
+ (GenericValue*)(&Array[i*PtrSize]), SBytePtr);
+ Values.push_back(std::move(Dest));
}
// Null terminate it
EE->StoreValueToMemory(PTOGV(nullptr),
- (GenericValue*)(Array+InputArgv.size()*PtrSize),
+ (GenericValue*)(&Array[InputArgv.size()*PtrSize]),
SBytePtr);
- return Array;
+ return Array.get();
}
-void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
+void ExecutionEngine::runStaticConstructorsDestructors(Module &module,
bool isDtors) {
const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
- GlobalVariable *GV = module->getNamedGlobal(Name);
+ GlobalVariable *GV = module.getNamedGlobal(Name);
// If this global has internal linkage, or if it has a use, then it must be
// an old-style (llvmgcc3) static ctor with __main linked in and in use. If
@@ -344,8 +334,8 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
// Execute global ctors/dtors for each module in the program.
- for (unsigned i = 0, e = Modules.size(); i != e; ++i)
- runStaticConstructorsDestructors(Modules[i], isDtors);
+ for (std::unique_ptr<Module> &M : Modules)
+ runStaticConstructorsDestructors(*M, isDtors);
}
#ifndef NDEBUG
@@ -406,68 +396,14 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
return runFunction(Fn, GVArgs).IntVal.getZExtValue();
}
-ExecutionEngine *ExecutionEngine::create(Module *M,
- bool ForceInterpreter,
- std::string *ErrorStr,
- CodeGenOpt::Level OptLevel,
- bool GVsWithCode) {
-
- EngineBuilder EB =
- EngineBuilder(M)
- .setEngineKind(ForceInterpreter ? EngineKind::Interpreter
- : EngineKind::Either)
- .setErrorStr(ErrorStr)
- .setOptLevel(OptLevel)
- .setAllocateGVsWithCode(GVsWithCode);
-
- return EB.create();
-}
-
-/// createJIT - This is the factory method for creating a JIT for the current
-/// machine, it does not fall back to the interpreter. This takes ownership
-/// of the module.
-ExecutionEngine *ExecutionEngine::createJIT(Module *M,
- std::string *ErrorStr,
- JITMemoryManager *JMM,
- CodeGenOpt::Level OL,
- bool GVsWithCode,
- Reloc::Model RM,
- CodeModel::Model CMM) {
- if (!ExecutionEngine::JITCtor) {
- if (ErrorStr)
- *ErrorStr = "JIT has not been linked in.";
- return nullptr;
- }
-
- // Use the defaults for extra parameters. Users can use EngineBuilder to
- // set them.
- EngineBuilder EB(M);
- EB.setEngineKind(EngineKind::JIT);
- EB.setErrorStr(ErrorStr);
- EB.setRelocationModel(RM);
- EB.setCodeModel(CMM);
- EB.setAllocateGVsWithCode(GVsWithCode);
- EB.setOptLevel(OL);
- EB.setJITMemoryManager(JMM);
-
- // TODO: permit custom TargetOptions here
- TargetMachine *TM = EB.selectTarget();
- if (!TM || (ErrorStr && ErrorStr->length() > 0)) return nullptr;
-
- return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
-}
-
void EngineBuilder::InitEngine() {
WhichEngine = EngineKind::Either;
ErrorStr = nullptr;
OptLevel = CodeGenOpt::Default;
MCJMM = nullptr;
- JMM = nullptr;
Options = TargetOptions();
- AllocateGVsWithCode = false;
RelocModel = Reloc::Default;
CMModel = CodeModel::JITDefault;
- UseMCJIT = false;
// IR module verification is enabled by default in debug builds, and disabled
// by default in release builds.
@@ -485,13 +421,11 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
// to the function tells DynamicLibrary to load the program, not a library.
if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, ErrorStr))
return nullptr;
-
- assert(!(JMM && MCJMM));
// If the user specified a memory manager but didn't specify which engine to
// create, we assume they only want the JIT, and we fail if they only want
// the interpreter.
- if (JMM || MCJMM) {
+ if (MCJMM) {
if (WhichEngine & EngineKind::JIT)
WhichEngine = EngineKind::JIT;
else {
@@ -500,14 +434,6 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
return nullptr;
}
}
-
- if (MCJMM && ! UseMCJIT) {
- if (ErrorStr)
- *ErrorStr =
- "Cannot create a legacy JIT with a runtime dyld memory "
- "manager.";
- return nullptr;
- }
// Unless the interpreter was explicitly selected or the JIT is not linked,
// try making a JIT.
@@ -520,13 +446,9 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
}
ExecutionEngine *EE = nullptr;
- if (UseMCJIT && ExecutionEngine::MCJITCtor)
- EE = ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM,
- AllocateGVsWithCode, TheTM.release());
- else if (ExecutionEngine::JITCtor)
- EE = ExecutionEngine::JITCtor(M, ErrorStr, JMM,
- AllocateGVsWithCode, TheTM.release());
-
+ if (ExecutionEngine::MCJITCtor)
+ EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, MCJMM,
+ std::move(TheTM));
if (EE) {
EE->setVerifyModules(VerifyModules);
return EE;
@@ -537,14 +459,13 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
// an interpreter instead.
if (WhichEngine & EngineKind::Interpreter) {
if (ExecutionEngine::InterpCtor)
- return ExecutionEngine::InterpCtor(M, ErrorStr);
+ return ExecutionEngine::InterpCtor(std::move(M), ErrorStr);
if (ErrorStr)
*ErrorStr = "Interpreter has not been linked in.";
return nullptr;
}
- if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::JITCtor &&
- !ExecutionEngine::MCJITCtor) {
+ if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::MCJITCtor) {
if (ErrorStr)
*ErrorStr = "JIT has not been linked in.";
}
@@ -890,9 +811,6 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
- else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
- Result = PTOGV(getPointerToBasicBlock(const_cast<BasicBlock*>(
- BA->getBasicBlock())));
else
llvm_unreachable("Unknown constant pointer type!");
break;
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 6ff1e7a..58271df 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -27,14 +27,6 @@ using namespace llvm;
// Wrapping the C bindings types.
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
-inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
- return reinterpret_cast<TargetLibraryInfo*>(P);
-}
-
-inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
- TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
- return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
-}
inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
return
@@ -110,7 +102,7 @@ LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
LLVMModuleRef M,
char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(M));
+ EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
builder.setEngineKind(EngineKind::Either)
.setErrorStr(&Error);
if (ExecutionEngine *EE = builder.create()){
@@ -125,7 +117,7 @@ LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
LLVMModuleRef M,
char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(M));
+ EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
builder.setEngineKind(EngineKind::Interpreter)
.setErrorStr(&Error);
if (ExecutionEngine *Interp = builder.create()) {
@@ -141,7 +133,7 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
unsigned OptLevel,
char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(M));
+ EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
builder.setEngineKind(EngineKind::JIT)
.setErrorStr(&Error)
.setOptLevel((CodeGenOpt::Level)OptLevel);
@@ -189,10 +181,9 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
targetOptions.EnableFastISel = options.EnableFastISel;
std::string Error;
- EngineBuilder builder(unwrap(M));
+ EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
builder.setEngineKind(EngineKind::JIT)
.setErrorStr(&Error)
- .setUseMCJIT(true)
.setOptLevel((CodeGenOpt::Level)options.OptLevel)
.setCodeModel(unwrap(options.CodeModel))
.setTargetOptions(targetOptions);
@@ -275,11 +266,10 @@ LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
}
void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
- unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
}
void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
- unwrap(EE)->addModule(unwrap(M));
+ unwrap(EE)->addModule(std::unique_ptr<Module>(unwrap(M)));
}
void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
@@ -314,7 +304,7 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
LLVMValueRef Fn) {
- return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
+ return nullptr;
}
LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 4e22a8b..b23ca88 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -57,29 +57,11 @@ public:
~IntelJITEventListener() {
}
- virtual void NotifyFunctionEmitted(const Function &F,
- void *FnStart, size_t FnSize,
- const EmittedFunctionDetails &Details);
-
- virtual void NotifyFreeingMachineCode(void *OldPtr);
-
virtual void NotifyObjectEmitted(const ObjectImage &Obj);
virtual void NotifyFreeingObject(const ObjectImage &Obj);
};
-static LineNumberInfo LineStartToIntelJITFormat(
- uintptr_t StartAddress,
- uintptr_t Address,
- DebugLoc Loc) {
- LineNumberInfo Result;
-
- Result.Offset = Address - StartAddress;
- Result.LineNumber = Loc.getLine();
-
- return Result;
-}
-
static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress,
uintptr_t Address,
DILineInfo Line) {
@@ -113,84 +95,10 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat(
return Result;
}
-// Adds the just-emitted function to the symbol table.
-void IntelJITEventListener::NotifyFunctionEmitted(
- const Function &F, void *FnStart, size_t FnSize,
- const EmittedFunctionDetails &Details) {
- iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
- F.getName().data(),
- reinterpret_cast<uint64_t>(FnStart),
- FnSize);
-
- std::vector<LineNumberInfo> LineInfo;
-
- if (!Details.LineStarts.empty()) {
- // Now convert the line number information from the address/DebugLoc
- // format in Details to the offset/lineno in Intel JIT API format.
-
- LineInfo.reserve(Details.LineStarts.size() + 1);
-
- DebugLoc FirstLoc = Details.LineStarts[0].Loc;
- assert(!FirstLoc.isUnknown()
- && "LineStarts should not contain unknown DebugLocs");
-
- MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
- DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
- if (FunctionDI.Verify()) {
- FunctionMessage.source_file_name = const_cast<char*>(
- Filenames.getFullPath(FirstLocScope));
-
- LineNumberInfo FirstLine;
- FirstLine.Offset = 0;
- FirstLine.LineNumber = FunctionDI.getLineNumber();
- LineInfo.push_back(FirstLine);
- }
-
- for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I =
- Details.LineStarts.begin(), E = Details.LineStarts.end();
- I != E; ++I) {
- // This implementation ignores the DebugLoc filename because the Intel
- // JIT API does not support multiple source files associated with a single
- // JIT function
- LineInfo.push_back(LineStartToIntelJITFormat(
- reinterpret_cast<uintptr_t>(FnStart),
- I->Address,
- I->Loc));
-
- // If we have no file name yet for the function, use the filename from
- // the first instruction that has one
- if (FunctionMessage.source_file_name == 0) {
- MDNode *scope = I->Loc.getScope(
- Details.MF->getFunction()->getContext());
- FunctionMessage.source_file_name = const_cast<char*>(
- Filenames.getFullPath(scope));
- }
- }
-
- FunctionMessage.line_number_size = LineInfo.size();
- FunctionMessage.line_number_table = &*LineInfo.begin();
- } else {
- FunctionMessage.line_number_size = 0;
- FunctionMessage.line_number_table = 0;
- }
-
- Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
- &FunctionMessage);
- MethodIDs[FnStart] = FunctionMessage.method_id;
-}
-
-void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
- MethodIDMap::iterator I = MethodIDs.find(FnStart);
- if (I != MethodIDs.end()) {
- Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
- MethodIDs.erase(I);
- }
-}
-
void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
// Get the address of the object image for use as a unique identifier
const void* ObjData = Obj.getData().data();
- DIContext* Context = DIContext::getDWARFContext(Obj.getObjectFile());
+ DIContext* Context = DIContext::getDWARFContext(*Obj.getObjectFile());
MethodAddressVector Functions;
// Use symbol info to iterate functions in the object.
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
index 9c06fda..e36493e 100644
--- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -21,3 +21,4 @@
type = OptionalLibrary
name = IntelJITEvents
parent = ExecutionEngine
+required_libraries = Core DebugInfo Support
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index 74df8f0..1aac3ac 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -13,7 +13,7 @@ add_llvm_library(LLVMInterpreter
)
if( LLVM_ENABLE_FFI )
- target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
+ target_link_libraries( LLVMInterpreter ${cmake_2_8_12_PRIVATE} ${FFI_LIBRARY_PATH} )
endif()
add_dependencies(LLVMInterpreter intrinsics_gen)
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 671bbee..b022101 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
+#include "llvm/Support/UniqueLock.h"
#include <cmath>
#include <csignal>
#include <cstdio>
@@ -51,7 +52,7 @@ static ManagedStatic<sys::Mutex> FunctionsLock;
typedef GenericValue (*ExFunc)(FunctionType *,
const std::vector<GenericValue> &);
static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
-static std::map<std::string, ExFunc> FuncNames;
+static ManagedStatic<std::map<std::string, ExFunc> > FuncNames;
#ifdef USE_LIBFFI
typedef void (*RawFunc)();
@@ -97,9 +98,9 @@ static ExFunc lookupFunction(const Function *F) {
ExtName += "_" + F->getName().str();
sys::ScopedLock Writer(*FunctionsLock);
- ExFunc FnPtr = FuncNames[ExtName];
+ ExFunc FnPtr = (*FuncNames)[ExtName];
if (!FnPtr)
- FnPtr = FuncNames["lle_X_" + F->getName().str()];
+ FnPtr = (*FuncNames)["lle_X_" + F->getName().str()];
if (!FnPtr) // Try calling a generic function... if it exists...
FnPtr = (ExFunc)(intptr_t)
sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
@@ -248,14 +249,14 @@ GenericValue Interpreter::callExternalFunction(Function *F,
const std::vector<GenericValue> &ArgVals) {
TheInterpreter = this;
- FunctionsLock->acquire();
+ unique_lock<sys::Mutex> Guard(*FunctionsLock);
// Do a lookup to see if the function is in our cache... this should just be a
// deferred annotation!
std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F);
if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F)
: FI->second) {
- FunctionsLock->release();
+ Guard.unlock();
return Fn(F->getFunctionType(), ArgVals);
}
@@ -273,7 +274,7 @@ GenericValue Interpreter::callExternalFunction(Function *F,
RawFn = RF->second;
}
- FunctionsLock->release();
+ Guard.unlock();
GenericValue Result;
if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getDataLayout(), Result))
@@ -497,15 +498,15 @@ static GenericValue lle_X_memcpy(FunctionType *FT,
void Interpreter::initializeExternalFunctions() {
sys::ScopedLock Writer(*FunctionsLock);
- FuncNames["lle_X_atexit"] = lle_X_atexit;
- FuncNames["lle_X_exit"] = lle_X_exit;
- FuncNames["lle_X_abort"] = lle_X_abort;
-
- FuncNames["lle_X_printf"] = lle_X_printf;
- FuncNames["lle_X_sprintf"] = lle_X_sprintf;
- FuncNames["lle_X_sscanf"] = lle_X_sscanf;
- FuncNames["lle_X_scanf"] = lle_X_scanf;
- FuncNames["lle_X_fprintf"] = lle_X_fprintf;
- FuncNames["lle_X_memset"] = lle_X_memset;
- FuncNames["lle_X_memcpy"] = lle_X_memcpy;
+ (*FuncNames)["lle_X_atexit"] = lle_X_atexit;
+ (*FuncNames)["lle_X_exit"] = lle_X_exit;
+ (*FuncNames)["lle_X_abort"] = lle_X_abort;
+
+ (*FuncNames)["lle_X_printf"] = lle_X_printf;
+ (*FuncNames)["lle_X_sprintf"] = lle_X_sprintf;
+ (*FuncNames)["lle_X_sscanf"] = lle_X_sscanf;
+ (*FuncNames)["lle_X_scanf"] = lle_X_scanf;
+ (*FuncNames)["lle_X_fprintf"] = lle_X_fprintf;
+ (*FuncNames)["lle_X_memset"] = lle_X_memset;
+ (*FuncNames)["lle_X_memcpy"] = lle_X_memcpy;
}
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index 814efcc..8562981 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -30,9 +30,10 @@ static struct RegisterInterp {
extern "C" void LLVMLinkInInterpreter() { }
-/// create - Create a new interpreter object. This can never fail.
+/// Create a new interpreter object.
///
-ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
+ExecutionEngine *Interpreter::create(std::unique_ptr<Module> M,
+ std::string *ErrStr) {
// Tell this Module to materialize everything and release the GVMaterializer.
if (std::error_code EC = M->materializeAllPermanently()) {
if (ErrStr)
@@ -41,15 +42,15 @@ ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
return nullptr;
}
- return new Interpreter(M);
+ return new Interpreter(std::move(M));
}
//===----------------------------------------------------------------------===//
// Interpreter ctor - Initialize stuff
//
-Interpreter::Interpreter(Module *M)
- : ExecutionEngine(M), TD(M) {
-
+Interpreter::Interpreter(std::unique_ptr<Module> M)
+ : ExecutionEngine(std::move(M)), TD(Modules.back().get()) {
+
memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
setDataLayout(&TD);
// Initialize the "backend"
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 2145cde..2be9c59 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLI_INTERPRETER_H
-#define LLI_INTERPRETER_H
+#ifndef LLVM_LIB_EXECUTIONENGINE_INTERPRETER_INTERPRETER_H
+#define LLVM_LIB_EXECUTIONENGINE_INTERPRETER_INTERPRETER_H
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/GenericValue.h"
@@ -37,29 +37,24 @@ typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
// stack, which causes the dtor to be run, which frees all the alloca'd memory.
//
class AllocaHolder {
- friend class AllocaHolderHandle;
- std::vector<void*> Allocations;
- unsigned RefCnt;
+ std::vector<void *> Allocations;
+
public:
- AllocaHolder() : RefCnt(0) {}
- void add(void *mem) { Allocations.push_back(mem); }
- ~AllocaHolder() {
- for (unsigned i = 0; i < Allocations.size(); ++i)
- free(Allocations[i]);
+ AllocaHolder() {}
+
+ // Make this type move-only. Define explicit move special members for MSVC.
+ AllocaHolder(AllocaHolder &&RHS) : Allocations(std::move(RHS.Allocations)) {}
+ AllocaHolder &operator=(AllocaHolder &&RHS) {
+ Allocations = std::move(RHS.Allocations);
+ return *this;
}
-};
-// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into
-// a vector...
-//
-class AllocaHolderHandle {
- AllocaHolder *H;
-public:
- AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; }
- AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; }
- ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; }
+ ~AllocaHolder() {
+ for (void *Allocation : Allocations)
+ free(Allocation);
+ }
- void add(void *mem) { H->add(mem); }
+ void add(void *Mem) { Allocations.push_back(Mem); }
};
typedef std::vector<GenericValue> ValuePlaneTy;
@@ -71,11 +66,29 @@ struct ExecutionContext {
Function *CurFunction;// The currently executing function
BasicBlock *CurBB; // The currently executing BB
BasicBlock::iterator CurInst; // The next instruction to execute
- std::map<Value *, GenericValue> Values; // LLVM values used in this invocation
- std::vector<GenericValue> VarArgs; // Values passed through an ellipsis
CallSite Caller; // Holds the call that called subframes.
// NULL if main func or debugger invoked fn
- AllocaHolderHandle Allocas; // Track memory allocated by alloca
+ std::map<Value *, GenericValue> Values; // LLVM values used in this invocation
+ std::vector<GenericValue> VarArgs; // Values passed through an ellipsis
+ AllocaHolder Allocas; // Track memory allocated by alloca
+
+ ExecutionContext() : CurFunction(nullptr), CurBB(nullptr), CurInst(nullptr) {}
+
+ ExecutionContext(ExecutionContext &&O)
+ : CurFunction(O.CurFunction), CurBB(O.CurBB), CurInst(O.CurInst),
+ Caller(O.Caller), Values(std::move(O.Values)),
+ VarArgs(std::move(O.VarArgs)), Allocas(std::move(O.Allocas)) {}
+
+ ExecutionContext &operator=(ExecutionContext &&O) {
+ CurFunction = O.CurFunction;
+ CurBB = O.CurBB;
+ CurInst = O.CurInst;
+ Caller = O.Caller;
+ Values = std::move(O.Values);
+ VarArgs = std::move(O.VarArgs);
+ Allocas = std::move(O.Allocas);
+ return *this;
+ }
};
// Interpreter - This class represents the entirety of the interpreter.
@@ -94,7 +107,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
std::vector<Function*> AtExitHandlers;
public:
- explicit Interpreter(Module *M);
+ explicit Interpreter(std::unique_ptr<Module> M);
~Interpreter();
/// runAtExitHandlers - Run any functions registered by the program's calls to
@@ -105,33 +118,23 @@ public:
static void Register() {
InterpCtor = create;
}
-
- /// create - Create an interpreter ExecutionEngine. This can never fail.
+
+ /// Create an interpreter ExecutionEngine.
///
- static ExecutionEngine *create(Module *M, std::string *ErrorStr = nullptr);
+ static ExecutionEngine *create(std::unique_ptr<Module> M,
+ std::string *ErrorStr = nullptr);
/// run - Start execution with the specified function and arguments.
///
GenericValue runFunction(Function *F,
const std::vector<GenericValue> &ArgValues) override;
- void *getPointerToNamedFunction(const std::string &Name,
+ void *getPointerToNamedFunction(StringRef Name,
bool AbortOnFailure = true) override {
// FIXME: not implemented.
return nullptr;
}
- /// recompileAndRelinkFunction - For the interpreter, functions are always
- /// up-to-date.
- ///
- void *recompileAndRelinkFunction(Function *F) override {
- return getPointerToFunction(F);
- }
-
- /// freeMachineCodeForFunction - The interpreter does not generate any code.
- ///
- void freeMachineCodeForFunction(Function *F) override { }
-
// Methods used to execute code:
// Place a call on the stack
void callFunction(Function *F, const std::vector<GenericValue> &ArgVals);
@@ -213,7 +216,6 @@ private: // Helper functions
void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF);
void *getPointerToFunction(Function *F) override { return (void*)F; }
- void *getPointerToBasicBlock(BasicBlock *BB) override { return (void*)BB; }
void initializeExecutionEngine() { }
void initializeExternalFunctions();
diff --git a/lib/ExecutionEngine/JIT/Android.mk b/lib/ExecutionEngine/JIT/Android.mk
deleted file mode 100644
index 0466ba0..0000000
--- a/lib/ExecutionEngine/JIT/Android.mk
+++ /dev/null
@@ -1,17 +0,0 @@
-LOCAL_PATH:= $(call my-dir)
-
-# For the host
-# =====================================================
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
- JIT.cpp \
- JITEmitter.cpp \
- JITMemoryManager.cpp
-
-LOCAL_MODULE:= libLLVMJIT
-
-LOCAL_MODULE_TAGS := optional
-
-include $(LLVM_HOST_BUILD_MK)
-include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
deleted file mode 100644
index e16baed..0000000
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# TODO: Support other architectures. See Makefile.
-add_definitions(-DENABLE_X86_JIT)
-
-add_llvm_library(LLVMJIT
- JIT.cpp
- JITEmitter.cpp
- JITMemoryManager.cpp
- )
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
deleted file mode 100644
index 83ec978..0000000
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ /dev/null
@@ -1,695 +0,0 @@
-//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This tool implements a just-in-time compiler for LLVM, allowing direct
-// execution of LLVM bitcode in an efficient manner.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JIT.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineCodeInfo.h"
-#include "llvm/Config/config.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MutexGuard.h"
-#include "llvm/Target/TargetJITInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-#ifdef __APPLE__
-// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead
-// of atexit). It passes the address of linker generated symbol __dso_handle
-// to the function.
-// This configuration change happened at version 5330.
-# include <AvailabilityMacros.h>
-# if defined(MAC_OS_X_VERSION_10_4) && \
- ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \
- (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \
- __APPLE_CC__ >= 5330))
-# ifndef HAVE___DSO_HANDLE
-# define HAVE___DSO_HANDLE 1
-# endif
-# endif
-#endif
-
-#if HAVE___DSO_HANDLE
-extern void *__dso_handle __attribute__ ((__visibility__ ("hidden")));
-#endif
-
-namespace {
-
-static struct RegisterJIT {
- RegisterJIT() { JIT::Register(); }
-} JITRegistrator;
-
-}
-
-extern "C" void LLVMLinkInJIT() {
-}
-
-/// createJIT - This is the factory method for creating a JIT for the current
-/// machine, it does not fall back to the interpreter. This takes ownership
-/// of the module.
-ExecutionEngine *JIT::createJIT(Module *M,
- std::string *ErrorStr,
- JITMemoryManager *JMM,
- bool GVsWithCode,
- TargetMachine *TM) {
- // Try to register the program as a source of symbols to resolve against.
- //
- // FIXME: Don't do this here.
- sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr);
-
- // If the target supports JIT code generation, create the JIT.
- if (TargetJITInfo *TJ = TM->getJITInfo()) {
- return new JIT(M, *TM, *TJ, JMM, GVsWithCode);
- } else {
- if (ErrorStr)
- *ErrorStr = "target does not support JIT code generation";
- return nullptr;
- }
-}
-
-namespace {
-/// This class supports the global getPointerToNamedFunction(), which allows
-/// bugpoint or gdb users to search for a function by name without any context.
-class JitPool {
- SmallPtrSet<JIT*, 1> JITs; // Optimize for process containing just 1 JIT.
- mutable sys::Mutex Lock;
-public:
- void Add(JIT *jit) {
- MutexGuard guard(Lock);
- JITs.insert(jit);
- }
- void Remove(JIT *jit) {
- MutexGuard guard(Lock);
- JITs.erase(jit);
- }
- void *getPointerToNamedFunction(const char *Name) const {
- MutexGuard guard(Lock);
- assert(JITs.size() != 0 && "No Jit registered");
- //search function in every instance of JIT
- for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
- end = JITs.end();
- Jit != end; ++Jit) {
- if (Function *F = (*Jit)->FindFunctionNamed(Name))
- return (*Jit)->getPointerToFunction(F);
- }
- // The function is not available : fallback on the first created (will
- // search in symbol of the current program/library)
- return (*JITs.begin())->getPointerToNamedFunction(Name);
- }
-};
-ManagedStatic<JitPool> AllJits;
-}
-extern "C" {
- // getPointerToNamedFunction - This function is used as a global wrapper to
- // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
- // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
- // need to resolve function(s) that are being mis-codegenerated, so we need to
- // resolve their addresses at runtime, and this is the way to do it.
- void *getPointerToNamedFunction(const char *Name) {
- return AllJits->getPointerToNamedFunction(Name);
- }
-}
-
-JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
- JITMemoryManager *jmm, bool GVsWithCode)
- : ExecutionEngine(M), TM(tm), TJI(tji),
- JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()),
- AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) {
- setDataLayout(TM.getDataLayout());
-
- jitstate = new JITState(M);
-
- // Initialize JCE
- JCE = createEmitter(*this, JMM, TM);
-
- // Register in global list of all JITs.
- AllJits->Add(this);
-
- // Add target data
- MutexGuard locked(lock);
- FunctionPassManager &PM = jitstate->getPM();
- M->setDataLayout(TM.getDataLayout());
- PM.add(new DataLayoutPass(M));
-
- // Turn the machine code intermediate representation into bytes in memory that
- // may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) {
- report_fatal_error("Target does not support machine code emission!");
- }
-
- // Initialize passes.
- PM.doInitialization();
-}
-
-JIT::~JIT() {
- // Cleanup.
- AllJits->Remove(this);
- delete jitstate;
- delete JCE;
- // JMM is a ownership of JCE, so we no need delete JMM here.
- delete &TM;
-}
-
-/// addModule - Add a new Module to the JIT. If we previously removed the last
-/// Module, we need re-initialize jitstate with a valid Module.
-void JIT::addModule(Module *M) {
- MutexGuard locked(lock);
-
- if (Modules.empty()) {
- assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
-
- jitstate = new JITState(M);
-
- FunctionPassManager &PM = jitstate->getPM();
- M->setDataLayout(TM.getDataLayout());
- PM.add(new DataLayoutPass(M));
-
- // Turn the machine code intermediate representation into bytes in memory
- // that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) {
- report_fatal_error("Target does not support machine code emission!");
- }
-
- // Initialize passes.
- PM.doInitialization();
- }
-
- ExecutionEngine::addModule(M);
-}
-
-/// removeModule - If we are removing the last Module, invalidate the jitstate
-/// since the PassManager it contains references a released Module.
-bool JIT::removeModule(Module *M) {
- bool result = ExecutionEngine::removeModule(M);
-
- MutexGuard locked(lock);
-
- if (jitstate && jitstate->getModule() == M) {
- delete jitstate;
- jitstate = nullptr;
- }
-
- if (!jitstate && !Modules.empty()) {
- jitstate = new JITState(Modules[0]);
-
- FunctionPassManager &PM = jitstate->getPM();
- M->setDataLayout(TM.getDataLayout());
- PM.add(new DataLayoutPass(M));
-
- // Turn the machine code intermediate representation into bytes in memory
- // that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) {
- report_fatal_error("Target does not support machine code emission!");
- }
-
- // Initialize passes.
- PM.doInitialization();
- }
- return result;
-}
-
-/// run - Start execution with the specified function and arguments.
-///
-GenericValue JIT::runFunction(Function *F,
- const std::vector<GenericValue> &ArgValues) {
- assert(F && "Function *F was null at entry to run()");
-
- void *FPtr = getPointerToFunction(F);
- assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
- FunctionType *FTy = F->getFunctionType();
- Type *RetTy = FTy->getReturnType();
-
- assert((FTy->getNumParams() == ArgValues.size() ||
- (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
- "Wrong number of arguments passed into function!");
- assert(FTy->getNumParams() == ArgValues.size() &&
- "This doesn't support passing arguments through varargs (yet)!");
-
- // Handle some common cases first. These cases correspond to common `main'
- // prototypes.
- if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
- switch (ArgValues.size()) {
- case 3:
- if (FTy->getParamType(0)->isIntegerTy(32) &&
- FTy->getParamType(1)->isPointerTy() &&
- FTy->getParamType(2)->isPointerTy()) {
- int (*PF)(int, char **, const char **) =
- (int(*)(int, char **, const char **))(intptr_t)FPtr;
-
- // Call the function.
- GenericValue rv;
- rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
- (char **)GVTOP(ArgValues[1]),
- (const char **)GVTOP(ArgValues[2])));
- return rv;
- }
- break;
- case 2:
- if (FTy->getParamType(0)->isIntegerTy(32) &&
- FTy->getParamType(1)->isPointerTy()) {
- int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
-
- // Call the function.
- GenericValue rv;
- rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(),
- (char **)GVTOP(ArgValues[1])));
- return rv;
- }
- break;
- case 1:
- if (FTy->getParamType(0)->isIntegerTy(32)) {
- GenericValue rv;
- int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
- rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
- return rv;
- }
- if (FTy->getParamType(0)->isPointerTy()) {
- GenericValue rv;
- int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr;
- rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0])));
- return rv;
- }
- break;
- }
- }
-
- // Handle cases where no arguments are passed first.
- if (ArgValues.empty()) {
- GenericValue rv;
- switch (RetTy->getTypeID()) {
- default: llvm_unreachable("Unknown return type for function call!");
- case Type::IntegerTyID: {
- unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
- if (BitWidth == 1)
- rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
- else if (BitWidth <= 8)
- rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
- else if (BitWidth <= 16)
- rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
- else if (BitWidth <= 32)
- rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
- else if (BitWidth <= 64)
- rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
- else
- llvm_unreachable("Integer types > 64 bits not supported");
- return rv;
- }
- case Type::VoidTyID:
- rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
- return rv;
- case Type::FloatTyID:
- rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
- return rv;
- case Type::DoubleTyID:
- rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
- return rv;
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- llvm_unreachable("long double not supported yet");
- case Type::PointerTyID:
- return PTOGV(((void*(*)())(intptr_t)FPtr)());
- }
- }
-
- // Okay, this is not one of our quick and easy cases. Because we don't have a
- // full FFI, we have to codegen a nullary stub function that just calls the
- // function we are interested in, passing in constants for all of the
- // arguments. Make this function and return.
-
- // First, create the function.
- FunctionType *STy=FunctionType::get(RetTy, false);
- Function *Stub = Function::Create(STy, Function::InternalLinkage, "",
- F->getParent());
-
- // Insert a basic block.
- BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub);
-
- // Convert all of the GenericValue arguments over to constants. Note that we
- // currently don't support varargs.
- SmallVector<Value*, 8> Args;
- for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
- Constant *C = nullptr;
- Type *ArgTy = FTy->getParamType(i);
- const GenericValue &AV = ArgValues[i];
- switch (ArgTy->getTypeID()) {
- default: llvm_unreachable("Unknown argument type for function call!");
- case Type::IntegerTyID:
- C = ConstantInt::get(F->getContext(), AV.IntVal);
- break;
- case Type::FloatTyID:
- C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal));
- break;
- case Type::DoubleTyID:
- C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal));
- break;
- case Type::PPC_FP128TyID:
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(),
- AV.IntVal));
- break;
- case Type::PointerTyID:
- void *ArgPtr = GVTOP(AV);
- if (sizeof(void*) == 4)
- C = ConstantInt::get(Type::getInt32Ty(F->getContext()),
- (int)(intptr_t)ArgPtr);
- else
- C = ConstantInt::get(Type::getInt64Ty(F->getContext()),
- (intptr_t)ArgPtr);
- // Cast the integer to pointer
- C = ConstantExpr::getIntToPtr(C, ArgTy);
- break;
- }
- Args.push_back(C);
- }
-
- CallInst *TheCall = CallInst::Create(F, Args, "", StubBB);
- TheCall->setCallingConv(F->getCallingConv());
- TheCall->setTailCall();
- if (!TheCall->getType()->isVoidTy())
- // Return result of the call.
- ReturnInst::Create(F->getContext(), TheCall, StubBB);
- else
- ReturnInst::Create(F->getContext(), StubBB); // Just return void.
-
- // Finally, call our nullary stub function.
- GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
- // Erase it, since no other function can have a reference to it.
- Stub->eraseFromParent();
- // And return the result.
- return Result;
-}
-
-void JIT::RegisterJITEventListener(JITEventListener *L) {
- if (!L)
- return;
- MutexGuard locked(lock);
- EventListeners.push_back(L);
-}
-void JIT::UnregisterJITEventListener(JITEventListener *L) {
- if (!L)
- return;
- MutexGuard locked(lock);
- std::vector<JITEventListener*>::reverse_iterator I=
- std::find(EventListeners.rbegin(), EventListeners.rend(), L);
- if (I != EventListeners.rend()) {
- std::swap(*I, EventListeners.back());
- EventListeners.pop_back();
- }
-}
-void JIT::NotifyFunctionEmitted(
- const Function &F,
- void *Code, size_t Size,
- const JITEvent_EmittedFunctionDetails &Details) {
- MutexGuard locked(lock);
- for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
- EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
- }
-}
-
-void JIT::NotifyFreeingMachineCode(void *OldPtr) {
- MutexGuard locked(lock);
- for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
- EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
- }
-}
-
-/// runJITOnFunction - Run the FunctionPassManager full of
-/// just-in-time compilation passes on F, hopefully filling in
-/// GlobalAddress[F] with the address of F's machine code.
-///
-void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
- MutexGuard locked(lock);
-
- class MCIListener : public JITEventListener {
- MachineCodeInfo *const MCI;
- public:
- MCIListener(MachineCodeInfo *mci) : MCI(mci) {}
- void NotifyFunctionEmitted(const Function &, void *Code, size_t Size,
- const EmittedFunctionDetails &) override {
- MCI->setAddress(Code);
- MCI->setSize(Size);
- }
- };
- MCIListener MCIL(MCI);
- if (MCI)
- RegisterJITEventListener(&MCIL);
-
- runJITOnFunctionUnlocked(F);
-
- if (MCI)
- UnregisterJITEventListener(&MCIL);
-}
-
-void JIT::runJITOnFunctionUnlocked(Function *F) {
- assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
-
- jitTheFunctionUnlocked(F);
-
- // If the function referred to another function that had not yet been
- // read from bitcode, and we are jitting non-lazily, emit it now.
- while (!jitstate->getPendingFunctions().empty()) {
- Function *PF = jitstate->getPendingFunctions().back();
- jitstate->getPendingFunctions().pop_back();
-
- assert(!PF->hasAvailableExternallyLinkage() &&
- "Externally-defined function should not be in pending list.");
-
- jitTheFunctionUnlocked(PF);
-
- // Now that the function has been jitted, ask the JITEmitter to rewrite
- // the stub with real address of the function.
- updateFunctionStubUnlocked(PF);
- }
-}
-
-void JIT::jitTheFunctionUnlocked(Function *F) {
- isAlreadyCodeGenerating = true;
- jitstate->getPM().run(*F);
- isAlreadyCodeGenerating = false;
-
- // clear basic block addresses after this function is done
- getBasicBlockAddressMap().clear();
-}
-
-/// getPointerToFunction - This method is used to get the address of the
-/// specified function, compiling it if necessary.
-///
-void *JIT::getPointerToFunction(Function *F) {
-
- if (void *Addr = getPointerToGlobalIfAvailable(F))
- return Addr; // Check if function already code gen'd
-
- MutexGuard locked(lock);
-
- // Now that this thread owns the lock, make sure we read in the function if it
- // exists in this Module.
- std::string ErrorMsg;
- if (F->Materialize(&ErrorMsg)) {
- report_fatal_error("Error reading function '" + F->getName()+
- "' from bitcode file: " + ErrorMsg);
- }
-
- // ... and check if another thread has already code gen'd the function.
- if (void *Addr = getPointerToGlobalIfAvailable(F))
- return Addr;
-
- if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
- bool AbortOnFailure = !F->hasExternalWeakLinkage();
- void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
- addGlobalMapping(F, Addr);
- return Addr;
- }
-
- runJITOnFunctionUnlocked(F);
-
- void *Addr = getPointerToGlobalIfAvailable(F);
- assert(Addr && "Code generation didn't add function to GlobalAddress table!");
- return Addr;
-}
-
-void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
- MutexGuard locked(lock);
-
- BasicBlockAddressMapTy::iterator I =
- getBasicBlockAddressMap().find(BB);
- if (I == getBasicBlockAddressMap().end()) {
- getBasicBlockAddressMap()[BB] = Addr;
- } else {
- // ignore repeats: some BBs can be split into few MBBs?
- }
-}
-
-void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
- MutexGuard locked(lock);
- getBasicBlockAddressMap().erase(BB);
-}
-
-void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
- // make sure it's function is compiled by JIT
- (void)getPointerToFunction(BB->getParent());
-
- // resolve basic block address
- MutexGuard locked(lock);
-
- BasicBlockAddressMapTy::iterator I =
- getBasicBlockAddressMap().find(BB);
- if (I != getBasicBlockAddressMap().end()) {
- return I->second;
- } else {
- llvm_unreachable("JIT does not have BB address for address-of-label, was"
- " it eliminated by optimizer?");
- }
-}
-
-void *JIT::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure){
- if (!isSymbolSearchingDisabled()) {
- void *ptr = JMM->getPointerToNamedFunction(Name, false);
- if (ptr)
- return ptr;
- }
-
- /// If a LazyFunctionCreator is installed, use it to get/create the function.
- if (LazyFunctionCreator)
- if (void *RP = LazyFunctionCreator(Name))
- return RP;
-
- if (AbortOnFailure) {
- report_fatal_error("Program used external function '"+Name+
- "' which could not be resolved!");
- }
- return nullptr;
-}
-
-
-/// getOrEmitGlobalVariable - Return the address of the specified global
-/// variable, possibly emitting it to memory if needed. This is used by the
-/// Emitter.
-void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
- MutexGuard locked(lock);
-
- void *Ptr = getPointerToGlobalIfAvailable(GV);
- if (Ptr) return Ptr;
-
- // If the global is external, just remember the address.
- if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) {
-#if HAVE___DSO_HANDLE
- if (GV->getName() == "__dso_handle")
- return (void*)&__dso_handle;
-#endif
- Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
- if (!Ptr) {
- report_fatal_error("Could not resolve external global address: "
- +GV->getName());
- }
- addGlobalMapping(GV, Ptr);
- } else {
- // If the global hasn't been emitted to memory yet, allocate space and
- // emit it into memory.
- Ptr = getMemoryForGV(GV);
- addGlobalMapping(GV, Ptr);
- EmitGlobalVariable(GV); // Initialize the variable.
- }
- return Ptr;
-}
-
-/// recompileAndRelinkFunction - This method is used to force a function
-/// which has already been compiled, to be compiled again, possibly
-/// after it has been modified. Then the entry to the old copy is overwritten
-/// with a branch to the new copy. If there was no old copy, this acts
-/// just like JIT::getPointerToFunction().
-///
-void *JIT::recompileAndRelinkFunction(Function *F) {
- void *OldAddr = getPointerToGlobalIfAvailable(F);
-
- // If it's not already compiled there is no reason to patch it up.
- if (!OldAddr) return getPointerToFunction(F);
-
- // Delete the old function mapping.
- addGlobalMapping(F, nullptr);
-
- // Recodegen the function
- runJITOnFunction(F);
-
- // Update state, forward the old function to the new function.
- void *Addr = getPointerToGlobalIfAvailable(F);
- assert(Addr && "Code generation didn't add function to GlobalAddress table!");
- TJI.replaceMachineCodeForFunction(OldAddr, Addr);
- return Addr;
-}
-
-/// getMemoryForGV - This method abstracts memory allocation of global
-/// variable so that the JIT can allocate thread local variables depending
-/// on the target.
-///
-char* JIT::getMemoryForGV(const GlobalVariable* GV) {
- char *Ptr;
-
- // GlobalVariable's which are not "constant" will cause trouble in a server
- // situation. It's returned in the same block of memory as code which may
- // not be writable.
- if (isGVCompilationDisabled() && !GV->isConstant()) {
- report_fatal_error("Compilation of non-internal GlobalValue is disabled!");
- }
-
- // Some applications require globals and code to live together, so they may
- // be allocated into the same buffer, but in general globals are allocated
- // through the memory manager which puts them near the code but not in the
- // same buffer.
- Type *GlobalType = GV->getType()->getElementType();
- size_t S = getDataLayout()->getTypeAllocSize(GlobalType);
- size_t A = getDataLayout()->getPreferredAlignment(GV);
- if (GV->isThreadLocal()) {
- MutexGuard locked(lock);
- Ptr = TJI.allocateThreadLocalMemory(S);
- } else if (TJI.allocateSeparateGVMemory()) {
- if (A <= 8) {
- Ptr = (char*)malloc(S);
- } else {
- // Allocate S+A bytes of memory, then use an aligned pointer within that
- // space.
- Ptr = (char*)malloc(S+A);
- unsigned MisAligned = ((intptr_t)Ptr & (A-1));
- Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0);
- }
- } else if (AllocateGVsWithCode) {
- Ptr = (char*)JCE->allocateSpace(S, A);
- } else {
- Ptr = (char*)JCE->allocateGlobal(S, A);
- }
- return Ptr;
-}
-
-void JIT::addPendingFunction(Function *F) {
- MutexGuard locked(lock);
- jitstate->getPendingFunctions().push_back(F);
-}
-
-
-JITEventListener::~JITEventListener() {}
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
deleted file mode 100644
index 69a7c36..0000000
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ /dev/null
@@ -1,229 +0,0 @@
-//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the top-level JIT data structure.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef JIT_H
-#define JIT_H
-
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/PassManager.h"
-
-namespace llvm {
-
-class Function;
-struct JITEvent_EmittedFunctionDetails;
-class MachineCodeEmitter;
-class MachineCodeInfo;
-class TargetJITInfo;
-class TargetMachine;
-
-class JITState {
-private:
- FunctionPassManager PM; // Passes to compile a function
- Module *M; // Module used to create the PM
-
- /// PendingFunctions - Functions which have not been code generated yet, but
- /// were called from a function being code generated.
- std::vector<AssertingVH<Function> > PendingFunctions;
-
-public:
- explicit JITState(Module *M) : PM(M), M(M) {}
-
- FunctionPassManager &getPM() {
- return PM;
- }
-
- Module *getModule() const { return M; }
- std::vector<AssertingVH<Function> > &getPendingFunctions() {
- return PendingFunctions;
- }
-};
-
-
-class JIT : public ExecutionEngine {
- /// types
- typedef ValueMap<const BasicBlock *, void *>
- BasicBlockAddressMapTy;
- /// data
- TargetMachine &TM; // The current target we are compiling to
- TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
- JITCodeEmitter *JCE; // JCE object
- JITMemoryManager *JMM;
- std::vector<JITEventListener*> EventListeners;
-
- /// AllocateGVsWithCode - Some applications require that global variables and
- /// code be allocated into the same region of memory, in which case this flag
- /// should be set to true. Doing so breaks freeMachineCodeForFunction.
- bool AllocateGVsWithCode;
-
- /// True while the JIT is generating code. Used to assert against recursive
- /// entry.
- bool isAlreadyCodeGenerating;
-
- JITState *jitstate;
-
- /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their
- /// actualized version, only filled for basic blocks that have their address
- /// taken.
- BasicBlockAddressMapTy BasicBlockAddressMap;
-
-
- JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
- JITMemoryManager *JMM, bool AllocateGVsWithCode);
-public:
- ~JIT();
-
- static void Register() {
- JITCtor = createJIT;
- }
-
- /// getJITInfo - Return the target JIT information structure.
- ///
- TargetJITInfo &getJITInfo() const { return TJI; }
-
- /// create - Create an return a new JIT compiler if there is one available
- /// for the current target. Otherwise, return null.
- ///
- static ExecutionEngine *create(Module *M,
- std::string *Err,
- JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel =
- CodeGenOpt::Default,
- bool GVsWithCode = true,
- Reloc::Model RM = Reloc::Default,
- CodeModel::Model CMM = CodeModel::JITDefault) {
- return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
- RM, CMM);
- }
-
- void addModule(Module *M) override;
-
- /// removeModule - Remove a Module from the list of modules. Returns true if
- /// M is found.
- bool removeModule(Module *M) override;
-
- /// runFunction - Start execution with the specified function and arguments.
- ///
- GenericValue runFunction(Function *F,
- const std::vector<GenericValue> &ArgValues) override;
-
- /// getPointerToNamedFunction - This method returns the address of the
- /// specified function by using the MemoryManager. As such it is only
- /// useful for resolving library symbols, not code generated symbols.
- ///
- /// If AbortOnFailure is false and no function with the given name is
- /// found, this function silently returns a null pointer. Otherwise,
- /// it prints a message to stderr and aborts.
- ///
- void *getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure = true) override;
-
- // CompilationCallback - Invoked the first time that a call site is found,
- // which causes lazy compilation of the target function.
- //
- static void CompilationCallback();
-
- /// getPointerToFunction - This returns the address of the specified function,
- /// compiling it if necessary.
- ///
- void *getPointerToFunction(Function *F) override;
-
- /// addPointerToBasicBlock - Adds address of the specific basic block.
- void addPointerToBasicBlock(const BasicBlock *BB, void *Addr);
-
- /// clearPointerToBasicBlock - Removes address of specific basic block.
- void clearPointerToBasicBlock(const BasicBlock *BB);
-
- /// getPointerToBasicBlock - This returns the address of the specified basic
- /// block, assuming function is compiled.
- void *getPointerToBasicBlock(BasicBlock *BB) override;
-
- /// getOrEmitGlobalVariable - Return the address of the specified global
- /// variable, possibly emitting it to memory if needed. This is used by the
- /// Emitter.
- void *getOrEmitGlobalVariable(const GlobalVariable *GV) override;
-
- /// getPointerToFunctionOrStub - If the specified function has been
- /// code-gen'd, return a pointer to the function. If not, compile it, or use
- /// a stub to implement lazy compilation if available.
- ///
- void *getPointerToFunctionOrStub(Function *F) override;
-
- /// recompileAndRelinkFunction - This method is used to force a function
- /// which has already been compiled, to be compiled again, possibly
- /// after it has been modified. Then the entry to the old copy is overwritten
- /// with a branch to the new copy. If there was no old copy, this acts
- /// just like JIT::getPointerToFunction().
- ///
- void *recompileAndRelinkFunction(Function *F) override;
-
- /// freeMachineCodeForFunction - deallocate memory used to code-generate this
- /// Function.
- ///
- void freeMachineCodeForFunction(Function *F) override;
-
- /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
- /// function was encountered. Add it to a pending list to be processed after
- /// the current function.
- ///
- void addPendingFunction(Function *F);
-
- /// getCodeEmitter - Return the code emitter this JIT is emitting into.
- ///
- JITCodeEmitter *getCodeEmitter() const { return JCE; }
-
- static ExecutionEngine *createJIT(Module *M,
- std::string *ErrorStr,
- JITMemoryManager *JMM,
- bool GVsWithCode,
- TargetMachine *TM);
-
- // Run the JIT on F and return information about the generated code
- void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override;
-
- void RegisterJITEventListener(JITEventListener *L) override;
- void UnregisterJITEventListener(JITEventListener *L) override;
-
- TargetMachine *getTargetMachine() override { return &TM; }
-
- /// These functions correspond to the methods on JITEventListener. They
- /// iterate over the registered listeners and call the corresponding method on
- /// each.
- void NotifyFunctionEmitted(
- const Function &F, void *Code, size_t Size,
- const JITEvent_EmittedFunctionDetails &Details);
- void NotifyFreeingMachineCode(void *OldPtr);
-
- BasicBlockAddressMapTy &
- getBasicBlockAddressMap() {
- return BasicBlockAddressMap;
- }
-
-
-private:
- static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
- TargetMachine &tm);
- void runJITOnFunctionUnlocked(Function *F);
- void updateFunctionStubUnlocked(Function *F);
- void jitTheFunctionUnlocked(Function *F);
-
-protected:
-
- /// getMemoryforGV - Allocate memory for a global variable.
- char* getMemoryForGV(const GlobalVariable* GV) override;
-
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
deleted file mode 100644
index 50b8c10..0000000
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ /dev/null
@@ -1,1256 +0,0 @@
-//===-- JITEmitter.cpp - Write machine code to executable memory ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a MachineCodeEmitter object that is used by the JIT to
-// write machine code to memory and remember where relocatable values are.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JIT.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineCodeInfo.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/ValueMap.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Disassembler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include <algorithm>
-#ifndef NDEBUG
-#include <iomanip>
-#endif
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumBytes, "Number of bytes of machine code compiled");
-STATISTIC(NumRelos, "Number of relocations applied");
-STATISTIC(NumRetries, "Number of retries with more memory");
-
-
-// A declaration may stop being a declaration once it's fully read from bitcode.
-// This function returns true if F is fully read and is still a declaration.
-static bool isNonGhostDeclaration(const Function *F) {
- return F->isDeclaration() && !F->isMaterializable();
-}
-
-//===----------------------------------------------------------------------===//
-// JIT lazy compilation code.
-//
-namespace {
- class JITEmitter;
- class JITResolverState;
-
- template<typename ValueTy>
- struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
- typedef JITResolverState *ExtraData;
- static void onRAUW(JITResolverState *, Value *Old, Value *New) {
- llvm_unreachable("The JIT doesn't know how to handle a"
- " RAUW on a value it has emitted.");
- }
- };
-
- struct CallSiteValueMapConfig : public NoRAUWValueMapConfig<Function*> {
- typedef JITResolverState *ExtraData;
- static void onDelete(JITResolverState *JRS, Function *F);
- };
-
- class JITResolverState {
- public:
- typedef ValueMap<Function*, void*, NoRAUWValueMapConfig<Function*> >
- FunctionToLazyStubMapTy;
- typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy;
- typedef ValueMap<Function *, SmallPtrSet<void*, 1>,
- CallSiteValueMapConfig> FunctionToCallSitesMapTy;
- typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
- private:
- /// FunctionToLazyStubMap - Keep track of the lazy stub created for a
- /// particular function so that we can reuse them if necessary.
- FunctionToLazyStubMapTy FunctionToLazyStubMap;
-
- /// CallSiteToFunctionMap - Keep track of the function that each lazy call
- /// site corresponds to, and vice versa.
- CallSiteToFunctionMapTy CallSiteToFunctionMap;
- FunctionToCallSitesMapTy FunctionToCallSitesMap;
-
- /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a
- /// particular GlobalVariable so that we can reuse them if necessary.
- GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
-
-#ifndef NDEBUG
- /// Instance of the JIT this ResolverState serves.
- JIT *TheJIT;
-#endif
-
- public:
- JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
- FunctionToCallSitesMap(this) {
-#ifndef NDEBUG
- TheJIT = jit;
-#endif
- }
-
- FunctionToLazyStubMapTy& getFunctionToLazyStubMap() {
- return FunctionToLazyStubMap;
- }
-
- GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() {
- return GlobalToIndirectSymMap;
- }
-
- std::pair<void *, Function *> LookupFunctionFromCallSite(
- void *CallSite) const {
- // The address given to us for the stub may not be exactly right, it
- // might be a little bit after the stub. As such, use upper_bound to
- // find it.
- CallSiteToFunctionMapTy::const_iterator I =
- CallSiteToFunctionMap.upper_bound(CallSite);
- assert(I != CallSiteToFunctionMap.begin() &&
- "This is not a known call site!");
- --I;
- return *I;
- }
-
- void AddCallSite(void *CallSite, Function *F) {
- bool Inserted = CallSiteToFunctionMap.insert(
- std::make_pair(CallSite, F)).second;
- (void)Inserted;
- assert(Inserted && "Pair was already in CallSiteToFunctionMap");
- FunctionToCallSitesMap[F].insert(CallSite);
- }
-
- void EraseAllCallSitesForPrelocked(Function *F);
-
- // Erases _all_ call sites regardless of their function. This is used to
- // unregister the stub addresses from the StubToResolverMap in
- // ~JITResolver().
- void EraseAllCallSitesPrelocked();
- };
-
- /// JITResolver - Keep track of, and resolve, call sites for functions that
- /// have not yet been compiled.
- class JITResolver {
- typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy;
- typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy;
- typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy;
-
- /// LazyResolverFn - The target lazy resolver function that we actually
- /// rewrite instructions to use.
- TargetJITInfo::LazyResolverFn LazyResolverFn;
-
- JITResolverState state;
-
- /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap
- /// for external functions. TODO: Of course, external functions don't need
- /// a lazy stub. It's actually here to make it more likely that far calls
- /// succeed, but no single stub can guarantee that. I'll remove this in a
- /// subsequent checkin when I actually fix far calls.
- std::map<void*, void*> ExternalFnToStubMap;
-
- /// revGOTMap - map addresses to indexes in the GOT
- std::map<void*, unsigned> revGOTMap;
- unsigned nextGOTIndex;
-
- JITEmitter &JE;
-
- /// Instance of JIT corresponding to this Resolver.
- JIT *TheJIT;
-
- public:
- explicit JITResolver(JIT &jit, JITEmitter &je)
- : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
- LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
- }
-
- ~JITResolver();
-
- /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
- /// lazy-compilation stub if it has already been created.
- void *getLazyFunctionStubIfAvailable(Function *F);
-
- /// getLazyFunctionStub - This returns a pointer to a function's
- /// lazy-compilation stub, creating one on demand as needed.
- void *getLazyFunctionStub(Function *F);
-
- /// getExternalFunctionStub - Return a stub for the function at the
- /// specified address, created lazily on demand.
- void *getExternalFunctionStub(void *FnAddr);
-
- /// getGlobalValueIndirectSym - Return an indirect symbol containing the
- /// specified GV address.
- void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
-
- /// getGOTIndexForAddress - Return a new or existing index in the GOT for
- /// an address. This function only manages slots, it does not manage the
- /// contents of the slots or the memory associated with the GOT.
- unsigned getGOTIndexForAddr(void *addr);
-
- /// JITCompilerFn - This function is called to resolve a stub to a compiled
- /// address. If the LLVM Function corresponding to the stub has not yet
- /// been compiled, this function compiles it first.
- static void *JITCompilerFn(void *Stub);
- };
-
- class StubToResolverMapTy {
- /// Map a stub address to a specific instance of a JITResolver so that
- /// lazily-compiled functions can find the right resolver to use.
- ///
- /// Guarded by Lock.
- std::map<void*, JITResolver*> Map;
-
- /// Guards Map from concurrent accesses.
- mutable sys::Mutex Lock;
-
- public:
- /// Registers a Stub to be resolved by Resolver.
- void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
- MutexGuard guard(Lock);
- Map.insert(std::make_pair(Stub, Resolver));
- }
- /// Unregisters the Stub when it's invalidated.
- void UnregisterStubResolver(void *Stub) {
- MutexGuard guard(Lock);
- Map.erase(Stub);
- }
- /// Returns the JITResolver instance that owns the Stub.
- JITResolver *getResolverFromStub(void *Stub) const {
- MutexGuard guard(Lock);
- // The address given to us for the stub may not be exactly right, it might
- // be a little bit after the stub. As such, use upper_bound to find it.
- // This is the same trick as in LookupFunctionFromCallSite from
- // JITResolverState.
- std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
- assert(I != Map.begin() && "This is not a known stub!");
- --I;
- return I->second;
- }
- /// True if any stubs refer to the given resolver. Only used in an assert().
- /// O(N)
- bool ResolverHasStubs(JITResolver* Resolver) const {
- MutexGuard guard(Lock);
- for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
- E = Map.end(); I != E; ++I) {
- if (I->second == Resolver)
- return true;
- }
- return false;
- }
- };
- /// This needs to be static so that a lazy call stub can access it with no
- /// context except the address of the stub.
- ManagedStatic<StubToResolverMapTy> StubToResolverMap;
-
- /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
- /// used to output functions to memory for execution.
- class JITEmitter : public JITCodeEmitter {
- JITMemoryManager *MemMgr;
-
- // When outputting a function stub in the context of some other function, we
- // save BufferBegin/BufferEnd/CurBufferPtr here.
- uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
-
- // When reattempting to JIT a function after running out of space, we store
- // the estimated size of the function we're trying to JIT here, so we can
- // ask the memory manager for at least this much space. When we
- // successfully emit the function, we reset this back to zero.
- uintptr_t SizeEstimate;
-
- /// Relocations - These are the relocations that the function needs, as
- /// emitted.
- std::vector<MachineRelocation> Relocations;
-
- /// MBBLocations - This vector is a mapping from MBB ID's to their address.
- /// It is filled in by the StartMachineBasicBlock callback and queried by
- /// the getMachineBasicBlockAddress callback.
- std::vector<uintptr_t> MBBLocations;
-
- /// ConstantPool - The constant pool for the current function.
- ///
- MachineConstantPool *ConstantPool;
-
- /// ConstantPoolBase - A pointer to the first entry in the constant pool.
- ///
- void *ConstantPoolBase;
-
- /// ConstPoolAddresses - Addresses of individual constant pool entries.
- ///
- SmallVector<uintptr_t, 8> ConstPoolAddresses;
-
- /// JumpTable - The jump tables for the current function.
- ///
- MachineJumpTableInfo *JumpTable;
-
- /// JumpTableBase - A pointer to the first entry in the jump table.
- ///
- void *JumpTableBase;
-
- /// Resolver - This contains info about the currently resolved functions.
- JITResolver Resolver;
-
- /// LabelLocations - This vector is a mapping from Label ID's to their
- /// address.
- DenseMap<MCSymbol*, uintptr_t> LabelLocations;
-
- /// MMI - Machine module info for exception informations
- MachineModuleInfo* MMI;
-
- // CurFn - The llvm function being emitted. Only valid during
- // finishFunction().
- const Function *CurFn;
-
- /// Information about emitted code, which is passed to the
- /// JITEventListeners. This is reset in startFunction and used in
- /// finishFunction.
- JITEvent_EmittedFunctionDetails EmissionDetails;
-
- struct EmittedCode {
- void *FunctionBody; // Beginning of the function's allocation.
- void *Code; // The address the function's code actually starts at.
- void *ExceptionTable;
- EmittedCode() : FunctionBody(nullptr), Code(nullptr),
- ExceptionTable(nullptr) {}
- };
- struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
- typedef JITEmitter *ExtraData;
- static void onDelete(JITEmitter *, const Function*);
- static void onRAUW(JITEmitter *, const Function*, const Function*);
- };
- ValueMap<const Function *, EmittedCode,
- EmittedFunctionConfig> EmittedFunctions;
-
- DebugLoc PrevDL;
-
- /// Instance of the JIT
- JIT *TheJIT;
-
- public:
- JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
- : SizeEstimate(0), Resolver(jit, *this), MMI(nullptr), CurFn(nullptr),
- EmittedFunctions(this), TheJIT(&jit) {
- MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
- if (jit.getJITInfo().needsGOT()) {
- MemMgr->AllocateGOT();
- DEBUG(dbgs() << "JIT is managing a GOT\n");
- }
-
- }
- ~JITEmitter() {
- delete MemMgr;
- }
-
- JITResolver &getJITResolver() { return Resolver; }
-
- void startFunction(MachineFunction &F) override;
- bool finishFunction(MachineFunction &F) override;
-
- void emitConstantPool(MachineConstantPool *MCP);
- void initJumpTableInfo(MachineJumpTableInfo *MJTI);
- void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
-
- void startGVStub(const GlobalValue* GV,
- unsigned StubSize, unsigned Alignment = 1);
- void startGVStub(void *Buffer, unsigned StubSize);
- void finishGVStub();
- void *allocIndirectGV(const GlobalValue *GV, const uint8_t *Buffer,
- size_t Size, unsigned Alignment) override;
-
- /// allocateSpace - Reserves space in the current block if any, or
- /// allocate a new one of the given size.
- void *allocateSpace(uintptr_t Size, unsigned Alignment) override;
-
- /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace,
- /// this method does not allocate memory in the current output buffer,
- /// because a global may live longer than the current function.
- void *allocateGlobal(uintptr_t Size, unsigned Alignment) override;
-
- void addRelocation(const MachineRelocation &MR) override {
- Relocations.push_back(MR);
- }
-
- void StartMachineBasicBlock(MachineBasicBlock *MBB) override {
- if (MBBLocations.size() <= (unsigned)MBB->getNumber())
- MBBLocations.resize((MBB->getNumber()+1)*2);
- MBBLocations[MBB->getNumber()] = getCurrentPCValue();
- if (MBB->hasAddressTaken())
- TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(),
- (void*)getCurrentPCValue());
- DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
- << (void*) getCurrentPCValue() << "]\n");
- }
-
- uintptr_t getConstantPoolEntryAddress(unsigned Entry) const override;
- uintptr_t getJumpTableEntryAddress(unsigned Entry) const override;
-
- uintptr_t
- getMachineBasicBlockAddress(MachineBasicBlock *MBB) const override {
- assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
- MBBLocations[MBB->getNumber()] && "MBB not emitted!");
- return MBBLocations[MBB->getNumber()];
- }
-
- /// retryWithMoreMemory - Log a retry and deallocate all memory for the
- /// given function. Increase the minimum allocation size so that we get
- /// more memory next time.
- void retryWithMoreMemory(MachineFunction &F);
-
- /// deallocateMemForFunction - Deallocate all memory for the specified
- /// function body.
- void deallocateMemForFunction(const Function *F);
-
- void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) override;
-
- void emitLabel(MCSymbol *Label) override {
- LabelLocations[Label] = getCurrentPCValue();
- }
-
- DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() override {
- return &LabelLocations;
- }
-
- uintptr_t getLabelAddress(MCSymbol *Label) const override {
- assert(LabelLocations.count(Label) && "Label not emitted!");
- return LabelLocations.find(Label)->second;
- }
-
- void setModuleInfo(MachineModuleInfo* Info) override {
- MMI = Info;
- }
-
- private:
- void *getPointerToGlobal(GlobalValue *GV, void *Reference,
- bool MayNeedFarStub);
- void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
- };
-}
-
-void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
- JRS->EraseAllCallSitesForPrelocked(F);
-}
-
-void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
- FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
- if (F2C == FunctionToCallSitesMap.end())
- return;
- StubToResolverMapTy &S2RMap = *StubToResolverMap;
- for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
- E = F2C->second.end(); I != E; ++I) {
- S2RMap.UnregisterStubResolver(*I);
- bool Erased = CallSiteToFunctionMap.erase(*I);
- (void)Erased;
- assert(Erased && "Missing call site->function mapping");
- }
- FunctionToCallSitesMap.erase(F2C);
-}
-
-void JITResolverState::EraseAllCallSitesPrelocked() {
- StubToResolverMapTy &S2RMap = *StubToResolverMap;
- for (CallSiteToFunctionMapTy::const_iterator
- I = CallSiteToFunctionMap.begin(),
- E = CallSiteToFunctionMap.end(); I != E; ++I) {
- S2RMap.UnregisterStubResolver(I->first);
- }
- CallSiteToFunctionMap.clear();
- FunctionToCallSitesMap.clear();
-}
-
-JITResolver::~JITResolver() {
- // No need to lock because we're in the destructor, and state isn't shared.
- state.EraseAllCallSitesPrelocked();
- assert(!StubToResolverMap->ResolverHasStubs(this) &&
- "Resolver destroyed with stubs still alive.");
-}
-
-/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
-/// if it has already been created.
-void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
- MutexGuard locked(TheJIT->lock);
-
- // If we already have a stub for this function, recycle it.
- return state.getFunctionToLazyStubMap().lookup(F);
-}
-
-/// getFunctionStub - This returns a pointer to a function stub, creating
-/// one on demand as needed.
-void *JITResolver::getLazyFunctionStub(Function *F) {
- MutexGuard locked(TheJIT->lock);
-
- // If we already have a lazy stub for this function, recycle it.
- void *&Stub = state.getFunctionToLazyStubMap()[F];
- if (Stub) return Stub;
-
- // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we
- // must resolve the symbol now.
- void *Actual = TheJIT->isCompilingLazily()
- ? (void *)(intptr_t)LazyResolverFn : (void *)nullptr;
-
- // If this is an external declaration, attempt to resolve the address now
- // to place in the stub.
- if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) {
- Actual = TheJIT->getPointerToFunction(F);
-
- // If we resolved the symbol to a null address (eg. a weak external)
- // don't emit a stub. Return a null pointer to the application.
- if (!Actual) return nullptr;
- }
-
- TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
- JE.startGVStub(F, SL.Size, SL.Alignment);
- // Codegen a new stub, calling the lazy resolver or the actual address of the
- // external function, if it was resolved.
- Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
- JE.finishGVStub();
-
- if (Actual != (void*)(intptr_t)LazyResolverFn) {
- // If we are getting the stub for an external function, we really want the
- // address of the stub in the GlobalAddressMap for the JIT, not the address
- // of the external function.
- TheJIT->updateGlobalMapping(F, Stub);
- }
-
- DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
- << F->getName() << "'\n");
-
- if (TheJIT->isCompilingLazily()) {
- // Register this JITResolver as the one corresponding to this call site so
- // JITCompilerFn will be able to find it.
- StubToResolverMap->RegisterStubResolver(Stub, this);
-
- // Finally, keep track of the stub-to-Function mapping so that the
- // JITCompilerFn knows which function to compile!
- state.AddCallSite(Stub, F);
- } else if (!Actual) {
- // If we are JIT'ing non-lazily but need to call a function that does not
- // exist yet, add it to the JIT's work list so that we can fill in the
- // stub address later.
- assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
- "'Actual' should have been set above.");
- TheJIT->addPendingFunction(F);
- }
-
- return Stub;
-}
-
-/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
-/// GV address.
-void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
- MutexGuard locked(TheJIT->lock);
-
- // If we already have a stub for this global variable, recycle it.
- void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV];
- if (IndirectSym) return IndirectSym;
-
- // Otherwise, codegen a new indirect symbol.
- IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
- JE);
-
- DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym
- << "] for GV '" << GV->getName() << "'\n");
-
- return IndirectSym;
-}
-
-/// getExternalFunctionStub - Return a stub for the function at the
-/// specified address, created lazily on demand.
-void *JITResolver::getExternalFunctionStub(void *FnAddr) {
- // If we already have a stub for this function, recycle it.
- void *&Stub = ExternalFnToStubMap[FnAddr];
- if (Stub) return Stub;
-
- TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
- JE.startGVStub(nullptr, SL.Size, SL.Alignment);
- Stub = TheJIT->getJITInfo().emitFunctionStub(nullptr, FnAddr, JE);
- JE.finishGVStub();
-
- DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub
- << "] for external function at '" << FnAddr << "'\n");
- return Stub;
-}
-
-unsigned JITResolver::getGOTIndexForAddr(void* addr) {
- unsigned idx = revGOTMap[addr];
- if (!idx) {
- idx = ++nextGOTIndex;
- revGOTMap[addr] = idx;
- DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr ["
- << addr << "]\n");
- }
- return idx;
-}
-
-/// JITCompilerFn - This function is called when a lazy compilation stub has
-/// been entered. It looks up which function this stub corresponds to, compiles
-/// it if necessary, then returns the resultant function pointer.
-void *JITResolver::JITCompilerFn(void *Stub) {
- JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
- assert(JR && "Unable to find the corresponding JITResolver to the call site");
-
- Function* F = nullptr;
- void* ActualPtr = nullptr;
-
- {
- // Only lock for getting the Function. The call getPointerToFunction made
- // in this function might trigger function materializing, which requires
- // JIT lock to be unlocked.
- MutexGuard locked(JR->TheJIT->lock);
-
- // The address given to us for the stub may not be exactly right, it might
- // be a little bit after the stub. As such, use upper_bound to find it.
- std::pair<void*, Function*> I =
- JR->state.LookupFunctionFromCallSite(Stub);
- F = I.second;
- ActualPtr = I.first;
- }
-
- // If we have already code generated the function, just return the address.
- void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
-
- if (!Result) {
- // Otherwise we don't have it, do lazy compilation now.
-
- // If lazy compilation is disabled, emit a useful error message and abort.
- if (!JR->TheJIT->isCompilingLazily()) {
- report_fatal_error("LLVM JIT requested to do lazy compilation of"
- " function '"
- + F->getName() + "' when lazy compiles are disabled!");
- }
-
- DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName()
- << "' In stub ptr = " << Stub << " actual ptr = "
- << ActualPtr << "\n");
- (void)ActualPtr;
-
- Result = JR->TheJIT->getPointerToFunction(F);
- }
-
- // Reacquire the lock to update the GOT map.
- MutexGuard locked(JR->TheJIT->lock);
-
- // We might like to remove the call site from the CallSiteToFunction map, but
- // we can't do that! Multiple threads could be stuck, waiting to acquire the
- // lock above. As soon as the 1st function finishes compiling the function,
- // the next one will be released, and needs to be able to find the function it
- // needs to call.
-
- // FIXME: We could rewrite all references to this stub if we knew them.
-
- // What we will do is set the compiled function address to map to the
- // same GOT entry as the stub so that later clients may update the GOT
- // if they see it still using the stub address.
- // Note: this is done so the Resolver doesn't have to manage GOT memory
- // Do this without allocating map space if the target isn't using a GOT
- if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
- JR->revGOTMap[Result] = JR->revGOTMap[Stub];
-
- return Result;
-}
-
-//===----------------------------------------------------------------------===//
-// JITEmitter code.
-//
-
-static GlobalObject *getSimpleAliasee(Constant *C) {
- C = C->stripPointerCasts();
- return dyn_cast<GlobalObject>(C);
-}
-
-void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
- bool MayNeedFarStub) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return TheJIT->getOrEmitGlobalVariable(GV);
-
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- // We can only handle simple cases.
- if (GlobalValue *GV = getSimpleAliasee(GA->getAliasee()))
- return TheJIT->getPointerToGlobal(GV);
- return nullptr;
- }
-
- // If we have already compiled the function, return a pointer to its body.
- Function *F = cast<Function>(V);
-
- void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F);
- if (FnStub) {
- // Return the function stub if it's already created. We do this first so
- // that we're returning the same address for the function as any previous
- // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
- // close enough to call.
- return FnStub;
- }
-
- // If we know the target can handle arbitrary-distance calls, try to
- // return a direct pointer.
- if (!MayNeedFarStub) {
- // If we have code, go ahead and return that.
- void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
- if (ResultPtr) return ResultPtr;
-
- // If this is an external function pointer, we can force the JIT to
- // 'compile' it, which really just adds it to the map.
- if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage())
- return TheJIT->getPointerToFunction(F);
- }
-
- // Otherwise, we may need a to emit a stub, and, conservatively, we always do
- // so. Note that it's possible to return null from getLazyFunctionStub in the
- // case of a weak extern that fails to resolve.
- return Resolver.getLazyFunctionStub(F);
-}
-
-void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
- // Make sure GV is emitted first, and create a stub containing the fully
- // resolved address.
- void *GVAddress = getPointerToGlobal(V, Reference, false);
- void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
- return StubAddr;
-}
-
-void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
- if (DL.isUnknown()) return;
- if (!BeforePrintingInsn) return;
-
- const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
-
- if (DL.getScope(Context) != nullptr && PrevDL != DL) {
- JITEvent_EmittedFunctionDetails::LineStart NextLine;
- NextLine.Address = getCurrentPCValue();
- NextLine.Loc = DL;
- EmissionDetails.LineStarts.push_back(NextLine);
- }
-
- PrevDL = DL;
-}
-
-static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
- const DataLayout *TD) {
- const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
- if (Constants.empty()) return 0;
-
- unsigned Size = 0;
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- MachineConstantPoolEntry CPE = Constants[i];
- unsigned AlignMask = CPE.getAlignment() - 1;
- Size = (Size + AlignMask) & ~AlignMask;
- Type *Ty = CPE.getType();
- Size += TD->getTypeAllocSize(Ty);
- }
- return Size;
-}
-
-void JITEmitter::startFunction(MachineFunction &F) {
- DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
- << F.getName() << "\n");
-
- uintptr_t ActualSize = 0;
- // Set the memory writable, if it's not already
- MemMgr->setMemoryWritable();
-
- if (SizeEstimate > 0) {
- // SizeEstimate will be non-zero on reallocation attempts.
- ActualSize = SizeEstimate;
- }
-
- BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(),
- ActualSize);
- BufferEnd = BufferBegin+ActualSize;
- EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin;
-
- // Ensure the constant pool/jump table info is at least 4-byte aligned.
- emitAlignment(16);
-
- emitConstantPool(F.getConstantPool());
- if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
- initJumpTableInfo(MJTI);
-
- // About to start emitting the machine code for the function.
- emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
- TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
- EmittedFunctions[F.getFunction()].Code = CurBufferPtr;
-
- MBBLocations.clear();
-
- EmissionDetails.MF = &F;
- EmissionDetails.LineStarts.clear();
-}
-
-bool JITEmitter::finishFunction(MachineFunction &F) {
- if (CurBufferPtr == BufferEnd) {
- // We must call endFunctionBody before retrying, because
- // deallocateMemForFunction requires it.
- MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
- retryWithMoreMemory(F);
- return true;
- }
-
- if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
- emitJumpTableInfo(MJTI);
-
- // FnStart is the start of the text, not the start of the constant pool and
- // other per-function data.
- uint8_t *FnStart =
- (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
-
- // FnEnd is the end of the function's machine code.
- uint8_t *FnEnd = CurBufferPtr;
-
- if (!Relocations.empty()) {
- CurFn = F.getFunction();
- NumRelos += Relocations.size();
-
- // Resolve the relocations to concrete pointers.
- for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
- MachineRelocation &MR = Relocations[i];
- void *ResultPtr = nullptr;
- if (!MR.letTargetResolve()) {
- if (MR.isExternalSymbol()) {
- ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
- false);
- DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
- << ResultPtr << "]\n");
-
- // If the target REALLY wants a stub for this function, emit it now.
- if (MR.mayNeedFarStub()) {
- ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
- }
- } else if (MR.isGlobalValue()) {
- ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
- BufferBegin+MR.getMachineCodeOffset(),
- MR.mayNeedFarStub());
- } else if (MR.isIndirectSymbol()) {
- ResultPtr = getPointerToGVIndirectSym(
- MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset());
- } else if (MR.isBasicBlock()) {
- ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
- } else if (MR.isConstantPoolIndex()) {
- ResultPtr =
- (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
- } else {
- assert(MR.isJumpTableIndex());
- ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex());
- }
-
- MR.setResultPointer(ResultPtr);
- }
-
- // if we are managing the GOT and the relocation wants an index,
- // give it one
- if (MR.isGOTRelative() && MemMgr->isManagingGOT()) {
- unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
- MR.setGOTIndex(idx);
- if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
- DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr
- << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
- << "\n");
- ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
- }
- }
- }
-
- CurFn = nullptr;
- TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0],
- Relocations.size(), MemMgr->getGOTBase());
- }
-
- // Update the GOT entry for F to point to the new code.
- if (MemMgr->isManagingGOT()) {
- unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
- if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
- DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin
- << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
- << "\n");
- ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
- }
- }
-
- // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for
- // global variables that were referenced in the relocations.
- MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
-
- if (CurBufferPtr == BufferEnd) {
- retryWithMoreMemory(F);
- return true;
- } else {
- // Now that we've succeeded in emitting the function, reset the
- // SizeEstimate back down to zero.
- SizeEstimate = 0;
- }
-
- BufferBegin = CurBufferPtr = nullptr;
- NumBytes += FnEnd-FnStart;
-
- // Invalidate the icache if necessary.
- sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
-
- TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
- EmissionDetails);
-
- // Reset the previous debug location.
- PrevDL = DebugLoc();
-
- DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
- << "] Function: " << F.getName()
- << ": " << (FnEnd-FnStart) << " bytes of text, "
- << Relocations.size() << " relocations\n");
-
- Relocations.clear();
- ConstPoolAddresses.clear();
-
- // Mark code region readable and executable if it's not so already.
- MemMgr->setMemoryExecutable();
-
- DEBUG({
- if (sys::hasDisassembler()) {
- dbgs() << "JIT: Disassembled code:\n";
- dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
- (uintptr_t)FnStart);
- } else {
- dbgs() << "JIT: Binary code:\n";
- uint8_t* q = FnStart;
- for (int i = 0; q < FnEnd; q += 4, ++i) {
- if (i == 4)
- i = 0;
- if (i == 0)
- dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
- bool Done = false;
- for (int j = 3; j >= 0; --j) {
- if (q + j >= FnEnd)
- Done = true;
- else
- dbgs() << (unsigned short)q[j];
- }
- if (Done)
- break;
- dbgs() << ' ';
- if (i == 3)
- dbgs() << '\n';
- }
- dbgs()<< '\n';
- }
- });
-
- if (MMI)
- MMI->EndFunction();
-
- return false;
-}
-
-void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
- DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n");
- Relocations.clear(); // Clear the old relocations or we'll reapply them.
- ConstPoolAddresses.clear();
- ++NumRetries;
- deallocateMemForFunction(F.getFunction());
- // Try again with at least twice as much free space.
- SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin));
-
- for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){
- if (MBB->hasAddressTaken())
- TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock());
- }
-}
-
-/// deallocateMemForFunction - Deallocate all memory for the specified
-/// function body. Also drop any references the function has to stubs.
-/// May be called while the Function is being destroyed inside ~Value().
-void JITEmitter::deallocateMemForFunction(const Function *F) {
- ValueMap<const Function *, EmittedCode, EmittedFunctionConfig>::iterator
- Emitted = EmittedFunctions.find(F);
- if (Emitted != EmittedFunctions.end()) {
- MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody);
- TheJIT->NotifyFreeingMachineCode(Emitted->second.Code);
-
- EmittedFunctions.erase(Emitted);
- }
-}
-
-
-void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
- if (BufferBegin)
- return JITCodeEmitter::allocateSpace(Size, Alignment);
-
- // create a new memory block if there is no active one.
- // care must be taken so that BufferBegin is invalidated when a
- // block is trimmed
- BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment);
- BufferEnd = BufferBegin+Size;
- return CurBufferPtr;
-}
-
-void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
- // Delegate this call through the memory manager.
- return MemMgr->allocateGlobal(Size, Alignment);
-}
-
-void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
- if (TheJIT->getJITInfo().hasCustomConstantPool())
- return;
-
- const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
- if (Constants.empty()) return;
-
- unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getDataLayout());
- unsigned Align = MCP->getConstantPoolAlignment();
- ConstantPoolBase = allocateSpace(Size, Align);
- ConstantPool = MCP;
-
- if (!ConstantPoolBase) return; // Buffer overflow.
-
- DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
- << "] (size: " << Size << ", alignment: " << Align << ")\n");
-
- // Initialize the memory for all of the constant pool entries.
- unsigned Offset = 0;
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- MachineConstantPoolEntry CPE = Constants[i];
- unsigned AlignMask = CPE.getAlignment() - 1;
- Offset = (Offset + AlignMask) & ~AlignMask;
-
- uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset;
- ConstPoolAddresses.push_back(CAddr);
- if (CPE.isMachineConstantPoolEntry()) {
- // FIXME: add support to lower machine constant pool values into bytes!
- report_fatal_error("Initialize memory with machine specific constant pool"
- "entry has not been implemented!");
- }
- TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
- DEBUG(dbgs() << "JIT: CP" << i << " at [0x";
- dbgs().write_hex(CAddr) << "]\n");
-
- Type *Ty = CPE.Val.ConstVal->getType();
- Offset += TheJIT->getDataLayout()->getTypeAllocSize(Ty);
- }
-}
-
-void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
- if (TheJIT->getJITInfo().hasCustomJumpTables())
- return;
- if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline)
- return;
-
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty()) return;
-
- unsigned NumEntries = 0;
- for (unsigned i = 0, e = JT.size(); i != e; ++i)
- NumEntries += JT[i].MBBs.size();
-
- unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getDataLayout());
-
- // Just allocate space for all the jump tables now. We will fix up the actual
- // MBB entries in the tables after we emit the code for each block, since then
- // we will know the final locations of the MBBs in memory.
- JumpTable = MJTI;
- JumpTableBase = allocateSpace(NumEntries * EntrySize,
- MJTI->getEntryAlignment(*TheJIT->getDataLayout()));
-}
-
-void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
- if (TheJIT->getJITInfo().hasCustomJumpTables())
- return;
-
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty() || !JumpTableBase) return;
-
-
- switch (MJTI->getEntryKind()) {
- case MachineJumpTableInfo::EK_Inline:
- return;
- case MachineJumpTableInfo::EK_BlockAddress: {
- // EK_BlockAddress - Each entry is a plain address of block, e.g.:
- // .word LBB123
- assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == sizeof(void*) &&
- "Cross JIT'ing?");
-
- // For each jump table, map each target in the jump table to the address of
- // an emitted MachineBasicBlock.
- intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
-
- for (unsigned i = 0, e = JT.size(); i != e; ++i) {
- const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
- // Store the address of the basic block for this jump table slot in the
- // memory we allocated for the jump table in 'initJumpTableInfo'
- for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
- *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
- }
- break;
- }
-
- case MachineJumpTableInfo::EK_Custom32:
- case MachineJumpTableInfo::EK_GPRel32BlockAddress:
- case MachineJumpTableInfo::EK_LabelDifference32: {
- assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == 4&&"Cross JIT'ing?");
- // For each jump table, place the offset from the beginning of the table
- // to the target address.
- int *SlotPtr = (int*)JumpTableBase;
-
- for (unsigned i = 0, e = JT.size(); i != e; ++i) {
- const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
- // Store the offset of the basic block for this jump table slot in the
- // memory we allocated for the jump table in 'initJumpTableInfo'
- uintptr_t Base = (uintptr_t)SlotPtr;
- for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
- uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]);
- /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook.
- *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base);
- }
- }
- break;
- }
- case MachineJumpTableInfo::EK_GPRel64BlockAddress:
- llvm_unreachable(
- "JT Info emission not implemented for GPRel64BlockAddress yet.");
- }
-}
-
-void JITEmitter::startGVStub(const GlobalValue* GV,
- unsigned StubSize, unsigned Alignment) {
- SavedBufferBegin = BufferBegin;
- SavedBufferEnd = BufferEnd;
- SavedCurBufferPtr = CurBufferPtr;
-
- BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
- BufferEnd = BufferBegin+StubSize+1;
-}
-
-void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) {
- SavedBufferBegin = BufferBegin;
- SavedBufferEnd = BufferEnd;
- SavedCurBufferPtr = CurBufferPtr;
-
- BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
- BufferEnd = BufferBegin+StubSize+1;
-}
-
-void JITEmitter::finishGVStub() {
- assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space.");
- NumBytes += getCurrentPCOffset();
- BufferBegin = SavedBufferBegin;
- BufferEnd = SavedBufferEnd;
- CurBufferPtr = SavedCurBufferPtr;
-}
-
-void *JITEmitter::allocIndirectGV(const GlobalValue *GV,
- const uint8_t *Buffer, size_t Size,
- unsigned Alignment) {
- uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment);
- memcpy(IndGV, Buffer, Size);
- return IndGV;
-}
-
-// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry
-// in the constant pool that was last emitted with the 'emitConstantPool'
-// method.
-//
-uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
- assert(ConstantNum < ConstantPool->getConstants().size() &&
- "Invalid ConstantPoolIndex!");
- return ConstPoolAddresses[ConstantNum];
-}
-
-// getJumpTableEntryAddress - Return the address of the JumpTable with index
-// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo'
-//
-uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
- const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
- assert(Index < JT.size() && "Invalid jump table index!");
-
- unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getDataLayout());
-
- unsigned Offset = 0;
- for (unsigned i = 0; i < Index; ++i)
- Offset += JT[i].MBBs.size();
-
- Offset *= EntrySize;
-
- return (uintptr_t)((char *)JumpTableBase + Offset);
-}
-
-void JITEmitter::EmittedFunctionConfig::onDelete(
- JITEmitter *Emitter, const Function *F) {
- Emitter->deallocateMemForFunction(F);
-}
-void JITEmitter::EmittedFunctionConfig::onRAUW(
- JITEmitter *, const Function*, const Function*) {
- llvm_unreachable("The JIT doesn't know how to handle a"
- " RAUW on a value it has emitted.");
-}
-
-
-//===----------------------------------------------------------------------===//
-// Public interface to this file
-//===----------------------------------------------------------------------===//
-
-JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
- TargetMachine &tm) {
- return new JITEmitter(jit, JMM, tm);
-}
-
-// getPointerToFunctionOrStub - If the specified function has been
-// code-gen'd, return a pointer to the function. If not, compile it, or use
-// a stub to implement lazy compilation if available.
-//
-void *JIT::getPointerToFunctionOrStub(Function *F) {
- // If we have already code generated the function, just return the address.
- if (void *Addr = getPointerToGlobalIfAvailable(F))
- return Addr;
-
- // Get a stub if the target supports it.
- JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
- return JE->getJITResolver().getLazyFunctionStub(F);
-}
-
-void JIT::updateFunctionStubUnlocked(Function *F) {
- // Get the empty stub we generated earlier.
- JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
- void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
- void *Addr = getPointerToGlobalIfAvailable(F);
- assert(Addr != Stub && "Function must have non-stub address to be updated.");
-
- // Tell the target jit info to rewrite the stub at the specified address,
- // rather than creating a new one.
- TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout();
- JE->startGVStub(Stub, layout.Size);
- getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter());
- JE->finishGVStub();
-}
-
-/// freeMachineCodeForFunction - release machine code memory for given Function.
-///
-void JIT::freeMachineCodeForFunction(Function *F) {
- // Delete translation for this from the ExecutionEngine, so it will get
- // retranslated next time it is used.
- updateGlobalMapping(F, nullptr);
-
- // Free the actual memory for the function body and related stuff.
- static_cast<JITEmitter*>(JCE)->deallocateMemForFunction(F);
-}
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
deleted file mode 100644
index 584b93f..0000000
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ /dev/null
@@ -1,904 +0,0 @@
-//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the DefaultJITMemoryManager class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Config/config.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <climits>
-#include <cstring>
-#include <vector>
-
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
-
-JITMemoryManager::~JITMemoryManager() {}
-
-//===----------------------------------------------------------------------===//
-// Memory Block Implementation.
-//===----------------------------------------------------------------------===//
-
-namespace {
- /// MemoryRangeHeader - For a range of memory, this is the header that we put
- /// on the block of memory. It is carefully crafted to be one word of memory.
- /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader
- /// which starts with this.
- struct FreeRangeHeader;
- struct MemoryRangeHeader {
- /// ThisAllocated - This is true if this block is currently allocated. If
- /// not, this can be converted to a FreeRangeHeader.
- unsigned ThisAllocated : 1;
-
- /// PrevAllocated - Keep track of whether the block immediately before us is
- /// allocated. If not, the word immediately before this header is the size
- /// of the previous block.
- unsigned PrevAllocated : 1;
-
- /// BlockSize - This is the size in bytes of this memory block,
- /// including this header.
- uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
-
-
- /// getBlockAfter - Return the memory block immediately after this one.
- ///
- MemoryRangeHeader &getBlockAfter() const {
- return *reinterpret_cast<MemoryRangeHeader *>(
- reinterpret_cast<char*>(
- const_cast<MemoryRangeHeader *>(this))+BlockSize);
- }
-
- /// getFreeBlockBefore - If the block before this one is free, return it,
- /// otherwise return null.
- FreeRangeHeader *getFreeBlockBefore() const {
- if (PrevAllocated) return nullptr;
- intptr_t PrevSize = reinterpret_cast<intptr_t *>(
- const_cast<MemoryRangeHeader *>(this))[-1];
- return reinterpret_cast<FreeRangeHeader *>(
- reinterpret_cast<char*>(
- const_cast<MemoryRangeHeader *>(this))-PrevSize);
- }
-
- /// FreeBlock - Turn an allocated block into a free block, adjusting
- /// bits in the object headers, and adding an end of region memory block.
- FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
-
- /// TrimAllocationToSize - If this allocated block is significantly larger
- /// than NewSize, split it into two pieces (where the former is NewSize
- /// bytes, including the header), and add the new block to the free list.
- FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
- uint64_t NewSize);
- };
-
- /// FreeRangeHeader - For a memory block that isn't already allocated, this
- /// keeps track of the current block and has a pointer to the next free block.
- /// Free blocks are kept on a circularly linked list.
- struct FreeRangeHeader : public MemoryRangeHeader {
- FreeRangeHeader *Prev;
- FreeRangeHeader *Next;
-
- /// getMinBlockSize - Get the minimum size for a memory block. Blocks
- /// smaller than this size cannot be created.
- static unsigned getMinBlockSize() {
- return sizeof(FreeRangeHeader)+sizeof(intptr_t);
- }
-
- /// SetEndOfBlockSizeMarker - The word at the end of every free block is
- /// known to be the size of the free block. Set it for this block.
- void SetEndOfBlockSizeMarker() {
- void *EndOfBlock = (char*)this + BlockSize;
- ((intptr_t *)EndOfBlock)[-1] = BlockSize;
- }
-
- FreeRangeHeader *RemoveFromFreeList() {
- assert(Next->Prev == this && Prev->Next == this && "Freelist broken!");
- Next->Prev = Prev;
- return Prev->Next = Next;
- }
-
- void AddToFreeList(FreeRangeHeader *FreeList) {
- Next = FreeList;
- Prev = FreeList->Prev;
- Prev->Next = this;
- Next->Prev = this;
- }
-
- /// GrowBlock - The block after this block just got deallocated. Merge it
- /// into the current block.
- void GrowBlock(uintptr_t NewSize);
-
- /// AllocateBlock - Mark this entire block allocated, updating freelists
- /// etc. This returns a pointer to the circular free-list.
- FreeRangeHeader *AllocateBlock();
- };
-}
-
-
-/// AllocateBlock - Mark this entire block allocated, updating freelists
-/// etc. This returns a pointer to the circular free-list.
-FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
- assert(!ThisAllocated && !getBlockAfter().PrevAllocated &&
- "Cannot allocate an allocated block!");
- // Mark this block allocated.
- ThisAllocated = 1;
- getBlockAfter().PrevAllocated = 1;
-
- // Remove it from the free list.
- return RemoveFromFreeList();
-}
-
-/// FreeBlock - Turn an allocated block into a free block, adjusting
-/// bits in the object headers, and adding an end of region memory block.
-/// If possible, coalesce this block with neighboring blocks. Return the
-/// FreeRangeHeader to allocate from.
-FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
- MemoryRangeHeader *FollowingBlock = &getBlockAfter();
- assert(ThisAllocated && "This block is already free!");
- assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
-
- FreeRangeHeader *FreeListToReturn = FreeList;
-
- // If the block after this one is free, merge it into this block.
- if (!FollowingBlock->ThisAllocated) {
- FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
- // "FreeList" always needs to be a valid free block. If we're about to
- // coalesce with it, update our notion of what the free list is.
- if (&FollowingFreeBlock == FreeList) {
- FreeList = FollowingFreeBlock.Next;
- FreeListToReturn = nullptr;
- assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
- }
- FollowingFreeBlock.RemoveFromFreeList();
-
- // Include the following block into this one.
- BlockSize += FollowingFreeBlock.BlockSize;
- FollowingBlock = &FollowingFreeBlock.getBlockAfter();
-
- // Tell the block after the block we are coalescing that this block is
- // allocated.
- FollowingBlock->PrevAllocated = 1;
- }
-
- assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
-
- if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
- PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
- return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
- }
-
- // Otherwise, mark this block free.
- FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this;
- FollowingBlock->PrevAllocated = 0;
- FreeBlock.ThisAllocated = 0;
-
- // Link this into the linked list of free blocks.
- FreeBlock.AddToFreeList(FreeList);
-
- // Add a marker at the end of the block, indicating the size of this free
- // block.
- FreeBlock.SetEndOfBlockSizeMarker();
- return FreeListToReturn ? FreeListToReturn : &FreeBlock;
-}
-
-/// GrowBlock - The block after this block just got deallocated. Merge it
-/// into the current block.
-void FreeRangeHeader::GrowBlock(uintptr_t NewSize) {
- assert(NewSize > BlockSize && "Not growing block?");
- BlockSize = NewSize;
- SetEndOfBlockSizeMarker();
- getBlockAfter().PrevAllocated = 0;
-}
-
-/// TrimAllocationToSize - If this allocated block is significantly larger
-/// than NewSize, split it into two pieces (where the former is NewSize
-/// bytes, including the header), and add the new block to the free list.
-FreeRangeHeader *MemoryRangeHeader::
-TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
- assert(ThisAllocated && getBlockAfter().PrevAllocated &&
- "Cannot deallocate part of an allocated block!");
-
- // Don't allow blocks to be trimmed below minimum required size
- NewSize = std::max<uint64_t>(FreeRangeHeader::getMinBlockSize(), NewSize);
-
- // Round up size for alignment of header.
- unsigned HeaderAlign = __alignof(FreeRangeHeader);
- NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
-
- // Size is now the size of the block we will remove from the start of the
- // current block.
- assert(NewSize <= BlockSize &&
- "Allocating more space from this block than exists!");
-
- // If splitting this block will cause the remainder to be too small, do not
- // split the block.
- if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
- return FreeList;
-
- // Otherwise, we splice the required number of bytes out of this block, form
- // a new block immediately after it, then mark this block allocated.
- MemoryRangeHeader &FormerNextBlock = getBlockAfter();
-
- // Change the size of this block.
- BlockSize = NewSize;
-
- // Get the new block we just sliced out and turn it into a free block.
- FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
- NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
- NewNextBlock.ThisAllocated = 0;
- NewNextBlock.PrevAllocated = 1;
- NewNextBlock.SetEndOfBlockSizeMarker();
- FormerNextBlock.PrevAllocated = 0;
- NewNextBlock.AddToFreeList(FreeList);
- return &NewNextBlock;
-}
-
-//===----------------------------------------------------------------------===//
-// Memory Block Implementation.
-//===----------------------------------------------------------------------===//
-
-namespace {
-
- class DefaultJITMemoryManager;
-
- class JITAllocator {
- DefaultJITMemoryManager &JMM;
- public:
- JITAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
- void *Allocate(size_t Size, size_t /*Alignment*/);
- void Deallocate(void *Slab, size_t Size);
- };
-
- /// DefaultJITMemoryManager - Manage memory for the JIT code generation.
- /// This splits a large block of MAP_NORESERVE'd memory into two
- /// sections, one for function stubs, one for the functions themselves. We
- /// have to do this because we may need to emit a function stub while in the
- /// middle of emitting a function, and we don't know how large the function we
- /// are emitting is.
- class DefaultJITMemoryManager : public JITMemoryManager {
- public:
- /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at
- /// least this much unless more is requested. Currently, in 512k slabs.
- static const size_t DefaultCodeSlabSize = 512 * 1024;
-
- /// DefaultSlabSize - Allocate globals and stubs into slabs of 64K (probably
- /// 16 pages) unless we get an allocation above SizeThreshold.
- static const size_t DefaultSlabSize = 64 * 1024;
-
- /// DefaultSizeThreshold - For any allocation larger than 16K (probably
- /// 4 pages), we should allocate a separate slab to avoid wasted space at
- /// the end of a normal slab.
- static const size_t DefaultSizeThreshold = 16 * 1024;
-
- private:
- // Whether to poison freed memory.
- bool PoisonMemory;
-
- /// LastSlab - This points to the last slab allocated and is used as the
- /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all
- /// stubs, data, and code contiguously in memory. In general, however, this
- /// is not possible because the NearBlock parameter is ignored on Windows
- /// platforms and even on Unix it works on a best-effort pasis.
- sys::MemoryBlock LastSlab;
-
- // Memory slabs allocated by the JIT. We refer to them as slabs so we don't
- // confuse them with the blocks of memory described above.
- std::vector<sys::MemoryBlock> CodeSlabs;
- BumpPtrAllocatorImpl<JITAllocator, DefaultSlabSize,
- DefaultSizeThreshold> StubAllocator;
- BumpPtrAllocatorImpl<JITAllocator, DefaultSlabSize,
- DefaultSizeThreshold> DataAllocator;
-
- // Circular list of free blocks.
- FreeRangeHeader *FreeMemoryList;
-
- // When emitting code into a memory block, this is the block.
- MemoryRangeHeader *CurBlock;
-
- uint8_t *GOTBase; // Target Specific reserved memory
- public:
- DefaultJITMemoryManager();
- ~DefaultJITMemoryManager();
-
- /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the
- /// last slab it allocated, so that subsequent allocations follow it.
- sys::MemoryBlock allocateNewSlab(size_t size);
-
- /// getPointerToNamedFunction - This method returns the address of the
- /// specified function by using the dlsym function call.
- void *getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure = true) override;
-
- void AllocateGOT() override;
-
- // Testing methods.
- bool CheckInvariants(std::string &ErrorStr) override;
- size_t GetDefaultCodeSlabSize() override { return DefaultCodeSlabSize; }
- size_t GetDefaultDataSlabSize() override { return DefaultSlabSize; }
- size_t GetDefaultStubSlabSize() override { return DefaultSlabSize; }
- unsigned GetNumCodeSlabs() override { return CodeSlabs.size(); }
- unsigned GetNumDataSlabs() override { return DataAllocator.GetNumSlabs(); }
- unsigned GetNumStubSlabs() override { return StubAllocator.GetNumSlabs(); }
-
- /// startFunctionBody - When a function starts, allocate a block of free
- /// executable memory, returning a pointer to it and its actual size.
- uint8_t *startFunctionBody(const Function *F,
- uintptr_t &ActualSize) override {
-
- FreeRangeHeader* candidateBlock = FreeMemoryList;
- FreeRangeHeader* head = FreeMemoryList;
- FreeRangeHeader* iter = head->Next;
-
- uintptr_t largest = candidateBlock->BlockSize;
-
- // Search for the largest free block
- while (iter != head) {
- if (iter->BlockSize > largest) {
- largest = iter->BlockSize;
- candidateBlock = iter;
- }
- iter = iter->Next;
- }
-
- largest = largest - sizeof(MemoryRangeHeader);
-
- // If this block isn't big enough for the allocation desired, allocate
- // another block of memory and add it to the free list.
- if (largest < ActualSize ||
- largest <= FreeRangeHeader::getMinBlockSize()) {
- DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
- candidateBlock = allocateNewCodeSlab((size_t)ActualSize);
- }
-
- // Select this candidate block for allocation
- CurBlock = candidateBlock;
-
- // Allocate the entire memory block.
- FreeMemoryList = candidateBlock->AllocateBlock();
- ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader);
- return (uint8_t *)(CurBlock + 1);
- }
-
- /// allocateNewCodeSlab - Helper method to allocate a new slab of code
- /// memory from the OS and add it to the free list. Returns the new
- /// FreeRangeHeader at the base of the slab.
- FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) {
- // If the user needs at least MinSize free memory, then we account for
- // two MemoryRangeHeaders: the one in the user's block, and the one at the
- // end of the slab.
- size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader);
- size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin);
- sys::MemoryBlock B = allocateNewSlab(SlabSize);
- CodeSlabs.push_back(B);
- char *MemBase = (char*)(B.base());
-
- // Put a tiny allocated block at the end of the memory chunk, so when
- // FreeBlock calls getBlockAfter it doesn't fall off the end.
- MemoryRangeHeader *EndBlock =
- (MemoryRangeHeader*)(MemBase + B.size()) - 1;
- EndBlock->ThisAllocated = 1;
- EndBlock->PrevAllocated = 0;
- EndBlock->BlockSize = sizeof(MemoryRangeHeader);
-
- // Start out with a vast new block of free memory.
- FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase;
- NewBlock->ThisAllocated = 0;
- // Make sure getFreeBlockBefore doesn't look into unmapped memory.
- NewBlock->PrevAllocated = 1;
- NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock;
- NewBlock->SetEndOfBlockSizeMarker();
- NewBlock->AddToFreeList(FreeMemoryList);
-
- assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize &&
- "The block was too small!");
- return NewBlock;
- }
-
- /// endFunctionBody - The function F is now allocated, and takes the memory
- /// in the range [FunctionStart,FunctionEnd).
- void endFunctionBody(const Function *F, uint8_t *FunctionStart,
- uint8_t *FunctionEnd) override {
- assert(FunctionEnd > FunctionStart);
- assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
- "Mismatched function start/end!");
-
- uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
-
- // Release the memory at the end of this block that isn't needed.
- FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
- }
-
- /// allocateSpace - Allocate a memory block of the given size. This method
- /// cannot be called between calls to startFunctionBody and endFunctionBody.
- uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) override {
- CurBlock = FreeMemoryList;
- FreeMemoryList = FreeMemoryList->AllocateBlock();
-
- uint8_t *result = (uint8_t *)(CurBlock + 1);
-
- if (Alignment == 0) Alignment = 1;
- result = (uint8_t*)(((intptr_t)result+Alignment-1) &
- ~(intptr_t)(Alignment-1));
-
- uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
- FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
-
- return result;
- }
-
- /// allocateStub - Allocate memory for a function stub.
- uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
- unsigned Alignment) override {
- return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment);
- }
-
- /// allocateGlobal - Allocate memory for a global.
- uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) override {
- return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
- }
-
- /// allocateCodeSection - Allocate memory for a code section.
- uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID,
- StringRef SectionName) override {
- // Grow the required block size to account for the block header
- Size += sizeof(*CurBlock);
-
- // Alignment handling.
- if (!Alignment)
- Alignment = 16;
- Size += Alignment - 1;
-
- FreeRangeHeader* candidateBlock = FreeMemoryList;
- FreeRangeHeader* head = FreeMemoryList;
- FreeRangeHeader* iter = head->Next;
-
- uintptr_t largest = candidateBlock->BlockSize;
-
- // Search for the largest free block.
- while (iter != head) {
- if (iter->BlockSize > largest) {
- largest = iter->BlockSize;
- candidateBlock = iter;
- }
- iter = iter->Next;
- }
-
- largest = largest - sizeof(MemoryRangeHeader);
-
- // If this block isn't big enough for the allocation desired, allocate
- // another block of memory and add it to the free list.
- if (largest < Size || largest <= FreeRangeHeader::getMinBlockSize()) {
- DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
- candidateBlock = allocateNewCodeSlab((size_t)Size);
- }
-
- // Select this candidate block for allocation
- CurBlock = candidateBlock;
-
- // Allocate the entire memory block.
- FreeMemoryList = candidateBlock->AllocateBlock();
- // Release the memory at the end of this block that isn't needed.
- FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size);
- uintptr_t unalignedAddr = (uintptr_t)CurBlock + sizeof(*CurBlock);
- return (uint8_t*)RoundUpToAlignment((uint64_t)unalignedAddr, Alignment);
- }
-
- /// allocateDataSection - Allocate memory for a data section.
- uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID, StringRef SectionName,
- bool IsReadOnly) override {
- return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
- }
-
- bool finalizeMemory(std::string *ErrMsg) override {
- return false;
- }
-
- uint8_t *getGOTBase() const override {
- return GOTBase;
- }
-
- void deallocateBlock(void *Block) {
- // Find the block that is allocated for this function.
- MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1;
- assert(MemRange->ThisAllocated && "Block isn't allocated!");
-
- // Fill the buffer with garbage!
- if (PoisonMemory) {
- memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
- }
-
- // Free the memory.
- FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
- }
-
- /// deallocateFunctionBody - Deallocate all memory for the specified
- /// function body.
- void deallocateFunctionBody(void *Body) override {
- if (Body) deallocateBlock(Body);
- }
-
- /// setMemoryWritable - When code generation is in progress,
- /// the code pages may need permissions changed.
- void setMemoryWritable() override {
- for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
- sys::Memory::setWritable(CodeSlabs[i]);
- }
- /// setMemoryExecutable - When code generation is done and we're ready to
- /// start execution, the code pages may need permissions changed.
- void setMemoryExecutable() override {
- for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
- sys::Memory::setExecutable(CodeSlabs[i]);
- }
-
- /// setPoisonMemory - Controls whether we write garbage over freed memory.
- ///
- void setPoisonMemory(bool poison) override {
- PoisonMemory = poison;
- }
- };
-}
-
-void *JITAllocator::Allocate(size_t Size, size_t /*Alignment*/) {
- sys::MemoryBlock B = JMM.allocateNewSlab(Size);
- return B.base();
-}
-
-void JITAllocator::Deallocate(void *Slab, size_t Size) {
- sys::MemoryBlock B(Slab, Size);
- sys::Memory::ReleaseRWX(B);
-}
-
-DefaultJITMemoryManager::DefaultJITMemoryManager()
- :
-#ifdef NDEBUG
- PoisonMemory(false),
-#else
- PoisonMemory(true),
-#endif
- LastSlab(nullptr, 0), StubAllocator(*this), DataAllocator(*this) {
-
- // Allocate space for code.
- sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize);
- CodeSlabs.push_back(MemBlock);
- uint8_t *MemBase = (uint8_t*)MemBlock.base();
-
- // We set up the memory chunk with 4 mem regions, like this:
- // [ START
- // [ Free #0 ] -> Large space to allocate functions from.
- // [ Allocated #1 ] -> Tiny space to separate regions.
- // [ Free #2 ] -> Tiny space so there is always at least 1 free block.
- // [ Allocated #3 ] -> Tiny space to prevent looking past end of block.
- // END ]
- //
- // The last three blocks are never deallocated or touched.
-
- // Add MemoryRangeHeader to the end of the memory region, indicating that
- // the space after the block of memory is allocated. This is block #3.
- MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
- Mem3->ThisAllocated = 1;
- Mem3->PrevAllocated = 0;
- Mem3->BlockSize = sizeof(MemoryRangeHeader);
-
- /// Add a tiny free region so that the free list always has one entry.
- FreeRangeHeader *Mem2 =
- (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
- Mem2->ThisAllocated = 0;
- Mem2->PrevAllocated = 1;
- Mem2->BlockSize = FreeRangeHeader::getMinBlockSize();
- Mem2->SetEndOfBlockSizeMarker();
- Mem2->Prev = Mem2; // Mem2 *is* the free list for now.
- Mem2->Next = Mem2;
-
- /// Add a tiny allocated region so that Mem2 is never coalesced away.
- MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1;
- Mem1->ThisAllocated = 1;
- Mem1->PrevAllocated = 0;
- Mem1->BlockSize = sizeof(MemoryRangeHeader);
-
- // Add a FreeRangeHeader to the start of the function body region, indicating
- // that the space is free. Mark the previous block allocated so we never look
- // at it.
- FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase;
- Mem0->ThisAllocated = 0;
- Mem0->PrevAllocated = 1;
- Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
- Mem0->SetEndOfBlockSizeMarker();
- Mem0->AddToFreeList(Mem2);
-
- // Start out with the freelist pointing to Mem0.
- FreeMemoryList = Mem0;
-
- GOTBase = nullptr;
-}
-
-void DefaultJITMemoryManager::AllocateGOT() {
- assert(!GOTBase && "Cannot allocate the got multiple times");
- GOTBase = new uint8_t[sizeof(void*) * 8192];
- HasGOT = true;
-}
-
-DefaultJITMemoryManager::~DefaultJITMemoryManager() {
- for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
- sys::Memory::ReleaseRWX(CodeSlabs[i]);
-
- delete[] GOTBase;
-}
-
-sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
- // Allocate a new block close to the last one.
- std::string ErrMsg;
- sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : nullptr;
- sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
- if (!B.base()) {
- report_fatal_error("Allocation failed when allocating new memory in the"
- " JIT\n" + Twine(ErrMsg));
- }
- LastSlab = B;
- ++NumSlabs;
- // Initialize the slab to garbage when debugging.
- if (PoisonMemory) {
- memset(B.base(), 0xCD, B.size());
- }
- return B;
-}
-
-/// CheckInvariants - For testing only. Return "" if all internal invariants
-/// are preserved, and a helpful error message otherwise. For free and
-/// allocated blocks, make sure that adding BlockSize gives a valid block.
-/// For free blocks, make sure they're in the free list and that their end of
-/// block size marker is correct. This function should return an error before
-/// accessing bad memory. This function is defined here instead of in
-/// JITMemoryManagerTest.cpp so that we don't have to expose all of the
-/// implementation details of DefaultJITMemoryManager.
-bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
- raw_string_ostream Err(ErrorStr);
-
- // Construct a the set of FreeRangeHeader pointers so we can query it
- // efficiently.
- llvm::SmallPtrSet<MemoryRangeHeader*, 16> FreeHdrSet;
- FreeRangeHeader* FreeHead = FreeMemoryList;
- FreeRangeHeader* FreeRange = FreeHead;
-
- do {
- // Check that the free range pointer is in the blocks we've allocated.
- bool Found = false;
- for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
- E = CodeSlabs.end(); I != E && !Found; ++I) {
- char *Start = (char*)I->base();
- char *End = Start + I->size();
- Found = (Start <= (char*)FreeRange && (char*)FreeRange < End);
- }
- if (!Found) {
- Err << "Corrupt free list; points to " << FreeRange;
- return false;
- }
-
- if (FreeRange->Next->Prev != FreeRange) {
- Err << "Next and Prev pointers do not match.";
- return false;
- }
-
- // Otherwise, add it to the set.
- FreeHdrSet.insert(FreeRange);
- FreeRange = FreeRange->Next;
- } while (FreeRange != FreeHead);
-
- // Go over each block, and look at each MemoryRangeHeader.
- for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
- E = CodeSlabs.end(); I != E; ++I) {
- char *Start = (char*)I->base();
- char *End = Start + I->size();
-
- // Check each memory range.
- for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = nullptr;
- Start <= (char*)Hdr && (char*)Hdr < End;
- Hdr = &Hdr->getBlockAfter()) {
- if (Hdr->ThisAllocated == 0) {
- // Check that this range is in the free list.
- if (!FreeHdrSet.count(Hdr)) {
- Err << "Found free header at " << Hdr << " that is not in free list.";
- return false;
- }
-
- // Now make sure the size marker at the end of the block is correct.
- uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1;
- if (!(Start <= (char*)Marker && (char*)Marker < End)) {
- Err << "Block size in header points out of current MemoryBlock.";
- return false;
- }
- if (Hdr->BlockSize != *Marker) {
- Err << "End of block size marker (" << *Marker << ") "
- << "and BlockSize (" << Hdr->BlockSize << ") don't match.";
- return false;
- }
- }
-
- if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) {
- Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != "
- << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")";
- return false;
- } else if (!LastHdr && !Hdr->PrevAllocated) {
- Err << "The first header should have PrevAllocated true.";
- return false;
- }
-
- // Remember the last header.
- LastHdr = Hdr;
- }
- }
-
- // All invariants are preserved.
- return true;
-}
-
-//===----------------------------------------------------------------------===//
-// getPointerToNamedFunction() implementation.
-//===----------------------------------------------------------------------===//
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
- while (!AtExitHandlers.empty()) {
- void (*Fn)() = AtExitHandlers.back();
- AtExitHandlers.pop_back();
- Fn();
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__) && defined(__GLIBC__)
-/* stat functions are redirecting to __xstat with a version number. On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
- StatSymbols() {
- sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
- sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
- sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
- sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
- sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
- sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
- sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
- sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
- sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
- sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
- }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
- runAtExitHandlers(); // Run atexit handlers...
- exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
- AtExitHandlers.push_back(Fn); // Take note of atexit handler...
- return 0; // Always successful
-}
-
-static int jit_noop() {
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface. As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure) {
- // Check to see if this is one of the functions we want to intercept. Note,
- // we cast to intptr_t here to silence a -pedantic warning that complains
- // about casting a function pointer to a normal pointer.
- if (Name == "exit") return (void*)(intptr_t)&jit_exit;
- if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
- // We should not invoke parent's ctors/dtors from generated main()!
- // On Mingw and Cygwin, the symbol __main is resolved to
- // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
- // (and register wrong callee's dtors with atexit(3)).
- // We expect ExecutionEngine::runStaticConstructorsDestructors()
- // is called before ExecutionEngine::runFunctionAsMain() is called.
- if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
- const char *NameStr = Name.c_str();
- // If this is an asm specifier, skip the sentinal.
- if (NameStr[0] == 1) ++NameStr;
-
- // If it's an external function, look it up in the process image...
- void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
- if (Ptr) return Ptr;
-
- // If it wasn't found and if it starts with an underscore ('_') character,
- // try again without the underscore.
- if (NameStr[0] == '_') {
- Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
- if (Ptr) return Ptr;
- }
-
- // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
- // are references to hidden visibility symbols that dlsym cannot resolve.
- // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
- if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
- memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
- // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
- // This mirrors logic in libSystemStubs.a.
- std::string Prefix = std::string(Name.begin(), Name.end()-9);
- if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
- return Ptr;
- if (void *Ptr = getPointerToNamedFunction(Prefix, false))
- return Ptr;
- }
-#endif
-
- if (AbortOnFailure) {
- report_fatal_error("Program used external function '"+Name+
- "' which could not be resolved!");
- }
- return nullptr;
-}
-
-
-
-JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
- return new DefaultJITMemoryManager();
-}
-
-const size_t DefaultJITMemoryManager::DefaultCodeSlabSize;
-const size_t DefaultJITMemoryManager::DefaultSlabSize;
-const size_t DefaultJITMemoryManager::DefaultSizeThreshold;
diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile
deleted file mode 100644
index aafa3d9..0000000
--- a/lib/ExecutionEngine/JIT/Makefile
+++ /dev/null
@@ -1,38 +0,0 @@
-##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMJIT
-
-# Get the $(ARCH) setting
-include $(LEVEL)/Makefile.config
-
-# Enable the X86 JIT if compiling on X86
-ifeq ($(ARCH), x86)
- ENABLE_X86_JIT = 1
-endif
-
-# This flag can also be used on the command line to force inclusion
-# of the X86 JIT on non-X86 hosts
-ifdef ENABLE_X86_JIT
- CPPFLAGS += -DENABLE_X86_JIT
-endif
-
-# Enable the Sparc JIT if compiling on Sparc
-ifeq ($(ARCH), Sparc)
- ENABLE_SPARC_JIT = 1
-endif
-
-# This flag can also be used on the command line to force inclusion
-# of the Sparc JIT on non-Sparc hosts
-ifdef ENABLE_SPARC_JIT
- CPPFLAGS += -DENABLE_SPARC_JIT
-endif
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/TargetJITInfo.cpp b/lib/ExecutionEngine/JITEventListener.cpp
index aafedf8..2a6a007 100644
--- a/lib/Target/TargetJITInfo.cpp
+++ b/lib/ExecutionEngine/JITEventListener.cpp
@@ -1,4 +1,4 @@
-//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===//
+//===-- JITEventListener.cpp ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
using namespace llvm;
-void TargetJITInfo::anchor() { }
+// Out-of-line definition of the virtual destructor as this is the key function.
+JITEventListener::~JITEventListener() {}
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index 6dc75af..ecae078 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT
[component_0]
type = Library
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index e9ba96a..da5f037 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -10,7 +10,6 @@
#include "MCJIT.h"
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/ExecutionEngine/ObjectBuffer.h"
#include "llvm/ExecutionEngine/ObjectImage.h"
@@ -28,6 +27,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -42,31 +42,23 @@ static struct RegisterJIT {
extern "C" void LLVMLinkInMCJIT() {
}
-ExecutionEngine *MCJIT::createJIT(Module *M,
+ExecutionEngine *MCJIT::createJIT(std::unique_ptr<Module> M,
std::string *ErrorStr,
RTDyldMemoryManager *MemMgr,
- bool GVsWithCode,
- TargetMachine *TM) {
+ std::unique_ptr<TargetMachine> TM) {
// Try to register the program as a source of symbols to resolve against.
//
// FIXME: Don't do this here.
sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr);
- return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(),
- GVsWithCode);
+ return new MCJIT(std::move(M), std::move(TM),
+ MemMgr ? MemMgr : new SectionMemoryManager());
}
-MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
- bool AllocateGVsWithCode)
- : ExecutionEngine(m), TM(tm), Ctx(nullptr), MemMgr(this, MM), Dyld(&MemMgr),
- ObjCache(nullptr) {
-
- OwnedModules.addModule(m);
- setDataLayout(TM->getDataLayout());
-}
-
-MCJIT::~MCJIT() {
- MutexGuard locked(lock);
+MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
+ RTDyldMemoryManager *MM)
+ : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr),
+ MemMgr(this, MM), Dyld(&MemMgr), ObjCache(nullptr) {
// FIXME: We are managing our modules, so we do not want the base class
// ExecutionEngine to manage them as well. To avoid double destruction
// of the first (and only) module added in ExecutionEngine constructor
@@ -77,33 +69,28 @@ MCJIT::~MCJIT() {
// If so, additional functions: addModule, removeModule, FindFunctionNamed,
// runStaticConstructorsDestructors could be moved back to EE as well.
//
+ std::unique_ptr<Module> First = std::move(Modules[0]);
Modules.clear();
+
+ OwnedModules.addModule(std::move(First));
+ setDataLayout(TM->getSubtargetImpl()->getDataLayout());
+}
+
+MCJIT::~MCJIT() {
+ MutexGuard locked(lock);
+
Dyld.deregisterEHFrames();
- LoadedObjectList::iterator it, end;
- for (it = LoadedObjects.begin(), end = LoadedObjects.end(); it != end; ++it) {
- ObjectImage *Obj = *it;
- if (Obj) {
+ for (auto &Obj : LoadedObjects)
+ if (Obj)
NotifyFreeingObject(*Obj);
- delete Obj;
- }
- }
- LoadedObjects.clear();
-
- SmallVector<object::Archive *, 2>::iterator ArIt, ArEnd;
- for (ArIt = Archives.begin(), ArEnd = Archives.end(); ArIt != ArEnd; ++ArIt) {
- object::Archive *A = *ArIt;
- delete A;
- }
Archives.clear();
-
- delete TM;
}
-void MCJIT::addModule(Module *M) {
+void MCJIT::addModule(std::unique_ptr<Module> M) {
MutexGuard locked(lock);
- OwnedModules.addModule(M);
+ OwnedModules.addModule(std::move(M));
}
bool MCJIT::removeModule(Module *M) {
@@ -111,29 +98,34 @@ bool MCJIT::removeModule(Module *M) {
return OwnedModules.removeModule(M);
}
-
-
void MCJIT::addObjectFile(std::unique_ptr<object::ObjectFile> Obj) {
- ObjectImage *LoadedObject = Dyld.loadObject(std::move(Obj));
+ std::unique_ptr<ObjectImage> LoadedObject = Dyld.loadObject(std::move(Obj));
if (!LoadedObject || Dyld.hasError())
report_fatal_error(Dyld.getErrorString());
- LoadedObjects.push_back(LoadedObject);
-
NotifyObjectEmitted(*LoadedObject);
+
+ LoadedObjects.push_back(std::move(LoadedObject));
}
-void MCJIT::addArchive(object::Archive *A) {
- Archives.push_back(A);
+void MCJIT::addObjectFile(object::OwningBinary<object::ObjectFile> Obj) {
+ std::unique_ptr<object::ObjectFile> ObjFile;
+ std::unique_ptr<MemoryBuffer> MemBuf;
+ std::tie(ObjFile, MemBuf) = Obj.takeBinary();
+ addObjectFile(std::move(ObjFile));
+ Buffers.push_back(std::move(MemBuf));
}
+void MCJIT::addArchive(object::OwningBinary<object::Archive> A) {
+ Archives.push_back(std::move(A));
+}
void MCJIT::setObjectCache(ObjectCache* NewCache) {
MutexGuard locked(lock);
ObjCache = NewCache;
}
-ObjectBufferStream* MCJIT::emitObject(Module *M) {
+std::unique_ptr<ObjectBufferStream> MCJIT::emitObject(Module *M) {
MutexGuard locked(lock);
// This must be a module which has already been added but not loaded to this
@@ -142,8 +134,8 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) {
PassManager PM;
- M->setDataLayout(TM->getDataLayout());
- PM.add(new DataLayoutPass(M));
+ M->setDataLayout(TM->getSubtargetImpl()->getDataLayout());
+ PM.add(new DataLayoutPass());
// The RuntimeDyld will take ownership of this shortly
std::unique_ptr<ObjectBufferStream> CompiledObject(new ObjectBufferStream());
@@ -165,11 +157,11 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) {
if (ObjCache) {
// MemoryBuffer is a thin wrapper around the actual memory, so it's OK
// to create a temporary object here and delete it after the call.
- std::unique_ptr<MemoryBuffer> MB(CompiledObject->getMemBuffer());
- ObjCache->notifyObjectCompiled(M, MB.get());
+ MemoryBufferRef MB = CompiledObject->getMemBuffer();
+ ObjCache->notifyObjectCompiled(M, MB);
}
- return CompiledObject.release();
+ return CompiledObject;
}
void MCJIT::generateCodeForModule(Module *M) {
@@ -187,21 +179,22 @@ void MCJIT::generateCodeForModule(Module *M) {
std::unique_ptr<ObjectBuffer> ObjectToLoad;
// Try to load the pre-compiled object from cache if possible
if (ObjCache) {
- std::unique_ptr<MemoryBuffer> PreCompiledObject(ObjCache->getObject(M));
- if (PreCompiledObject.get())
- ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.release()));
+ if (std::unique_ptr<MemoryBuffer> PreCompiledObject =
+ ObjCache->getObject(M))
+ ObjectToLoad =
+ llvm::make_unique<ObjectBuffer>(std::move(PreCompiledObject));
}
// If the cache did not contain a suitable object, compile the object
if (!ObjectToLoad) {
- ObjectToLoad.reset(emitObject(M));
- assert(ObjectToLoad.get() && "Compilation did not produce an object.");
+ ObjectToLoad = emitObject(M);
+ assert(ObjectToLoad && "Compilation did not produce an object.");
}
// Load the object into the dynamic linker.
// MCJIT now owns the ObjectImage pointer (via its LoadedObjects list).
- ObjectImage *LoadedObject = Dyld.loadObject(ObjectToLoad.release());
- LoadedObjects.push_back(LoadedObject);
+ std::unique_ptr<ObjectImage> LoadedObject =
+ Dyld.loadObject(std::move(ObjectToLoad));
if (!LoadedObject)
report_fatal_error(Dyld.getErrorString());
@@ -210,6 +203,8 @@ void MCJIT::generateCodeForModule(Module *M) {
NotifyObjectEmitted(*LoadedObject);
+ LoadedObjects.push_back(std::move(LoadedObject));
+
OwnedModules.markModuleAsLoaded(M);
}
@@ -232,12 +227,14 @@ void MCJIT::finalizeLoadedModules() {
void MCJIT::finalizeObject() {
MutexGuard locked(lock);
- for (ModulePtrSet::iterator I = OwnedModules.begin_added(),
- E = OwnedModules.end_added();
- I != E; ++I) {
- Module *M = *I;
+ // Generate code for module is going to move objects out of the 'added' list,
+ // so we need to copy that out before using it:
+ SmallVector<Module*, 16> ModsToAdd;
+ for (auto M : OwnedModules.added())
+ ModsToAdd.push_back(M);
+
+ for (auto M : ModsToAdd)
generateCodeForModule(M);
- }
finalizeLoadedModules();
}
@@ -255,12 +252,8 @@ void MCJIT::finalizeModule(Module *M) {
finalizeLoadedModules();
}
-void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
- report_fatal_error("not yet implemented");
-}
-
uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
- Mangler Mang(TM->getDataLayout());
+ Mangler Mang(TM->getSubtargetImpl()->getDataLayout());
SmallString<128> FullName;
Mang.getNameWithPrefix(FullName, Name);
return Dyld.getSymbolLoadAddress(FullName);
@@ -299,9 +292,8 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
if (Addr)
return Addr;
- SmallVector<object::Archive*, 2>::iterator I, E;
- for (I = Archives.begin(), E = Archives.end(); I != E; ++I) {
- object::Archive *A = *I;
+ for (object::OwningBinary<object::Archive> &OB : Archives) {
+ object::Archive *A = OB.getBinary();
// Look for our symbols in each Archive
object::Archive::child_iterator ChildIt = A->findSym(Name);
if (ChildIt != A->child_end()) {
@@ -310,7 +302,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
ChildIt->getAsBinary();
if (ChildBinOrErr.getError())
continue;
- std::unique_ptr<object::Binary> ChildBin = std::move(ChildBinOrErr.get());
+ std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
if (ChildBin->isObject()) {
std::unique_ptr<object::ObjectFile> OF(
static_cast<object::ObjectFile *>(ChildBin.release()));
@@ -326,13 +318,19 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
// If it hasn't already been generated, see if it's in one of our modules.
Module *M = findModuleForSymbol(Name, CheckFunctionsOnly);
- if (!M)
- return 0;
+ if (M) {
+ generateCodeForModule(M);
+
+ // Check the RuntimeDyld table again, it should be there now.
+ return getExistingSymbolAddress(Name);
+ }
- generateCodeForModule(M);
+ // If a LazyFunctionCreator is installed, use it to get/create the function.
+ // FIXME: Should we instead have a LazySymbolCreator callback?
+ if (LazyFunctionCreator)
+ Addr = (uint64_t)LazyFunctionCreator(Name);
- // Check the RuntimeDyld table again, it should be there now.
- return getExistingSymbolAddress(Name);
+ return Addr;
}
uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
@@ -355,10 +353,14 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
void *MCJIT::getPointerToFunction(Function *F) {
MutexGuard locked(lock);
+ Mangler Mang(TM->getSubtargetImpl()->getDataLayout());
+ SmallString<128> Name;
+ TM->getNameWithPrefix(Name, F, Mang);
+
if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
bool AbortOnFailure = !F->hasExternalWeakLinkage();
- void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
- addGlobalMapping(F, Addr);
+ void *Addr = getPointerToNamedFunction(Name, AbortOnFailure);
+ updateGlobalMapping(F, Addr);
return Addr;
}
@@ -368,32 +370,25 @@ void *MCJIT::getPointerToFunction(Function *F) {
// Make sure the relevant module has been compiled and loaded.
if (HasBeenAddedButNotLoaded)
generateCodeForModule(M);
- else if (!OwnedModules.hasModuleBeenLoaded(M))
+ else if (!OwnedModules.hasModuleBeenLoaded(M)) {
// If this function doesn't belong to one of our modules, we're done.
+ // FIXME: Asking for the pointer to a function that hasn't been registered,
+ // and isn't a declaration (which is handled above) should probably
+ // be an assertion.
return nullptr;
+ }
// FIXME: Should the Dyld be retaining module information? Probably not.
//
// This is the accessor for the target address, so make sure to check the
// load address of the symbol, not the local address.
- Mangler Mang(TM->getDataLayout());
- SmallString<128> Name;
- TM->getNameWithPrefix(Name, F, Mang);
return (void*)Dyld.getSymbolLoadAddress(Name);
}
-void *MCJIT::recompileAndRelinkFunction(Function *F) {
- report_fatal_error("not yet implemented");
-}
-
-void MCJIT::freeMachineCodeForFunction(Function *F) {
- report_fatal_error("not yet implemented");
-}
-
void MCJIT::runStaticConstructorsDestructorsInModulePtrSet(
bool isDtors, ModulePtrSet::iterator I, ModulePtrSet::iterator E) {
for (; I != E; ++I) {
- ExecutionEngine::runStaticConstructorsDestructors(*I, isDtors);
+ ExecutionEngine::runStaticConstructorsDestructors(**I, isDtors);
}
}
@@ -529,8 +524,7 @@ GenericValue MCJIT::runFunction(Function *F,
llvm_unreachable("Full-featured argument passing not supported yet!");
}
-void *MCJIT::getPointerToNamedFunction(const std::string &Name,
- bool AbortOnFailure) {
+void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) {
if (!isSymbolSearchingDisabled()) {
void *ptr = MemMgr.getPointerToNamedFunction(Name, false);
if (ptr)
@@ -559,8 +553,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
if (!L)
return;
MutexGuard locked(lock);
- SmallVector<JITEventListener*, 2>::reverse_iterator I=
- std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+ auto I = std::find(EventListeners.rbegin(), EventListeners.rend(), L);
if (I != EventListeners.rend()) {
std::swap(*I, EventListeners.back());
EventListeners.pop_back();
@@ -575,9 +568,8 @@ void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) {
}
void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) {
MutexGuard locked(lock);
- for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
- EventListeners[I]->NotifyFreeingObject(Obj);
- }
+ for (JITEventListener *L : EventListeners)
+ L->NotifyFreeingObject(Obj);
}
uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) {
@@ -588,5 +580,7 @@ uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) {
Result = ParentEngine->getSymbolAddress(Name.substr(1), false);
if (Result)
return Result;
+ if (ParentEngine->isSymbolSearchingDisabled())
+ return 0;
return ClientMM->getSymbolAddress(Name);
}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 100e9a2..bc943b9 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
-#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -101,8 +101,8 @@ private:
// called.
class MCJIT : public ExecutionEngine {
- MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr,
- bool AllocateGVsWithCode);
+ MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
+ RTDyldMemoryManager *MemMgr);
typedef llvm::SmallPtrSet<Module *, 4> ModulePtrSet;
@@ -118,6 +118,9 @@ class MCJIT : public ExecutionEngine {
ModulePtrSet::iterator begin_added() { return AddedModules.begin(); }
ModulePtrSet::iterator end_added() { return AddedModules.end(); }
+ iterator_range<ModulePtrSet::iterator> added() {
+ return iterator_range<ModulePtrSet::iterator>(begin_added(), end_added());
+ }
ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); }
ModulePtrSet::iterator end_loaded() { return LoadedModules.end(); }
@@ -125,8 +128,8 @@ class MCJIT : public ExecutionEngine {
ModulePtrSet::iterator begin_finalized() { return FinalizedModules.begin(); }
ModulePtrSet::iterator end_finalized() { return FinalizedModules.end(); }
- void addModule(Module *M) {
- AddedModules.insert(M);
+ void addModule(std::unique_ptr<Module> M) {
+ AddedModules.insert(M.release());
}
bool removeModule(Module *M) {
@@ -208,18 +211,18 @@ class MCJIT : public ExecutionEngine {
}
};
- TargetMachine *TM;
+ std::unique_ptr<TargetMachine> TM;
MCContext *Ctx;
LinkingMemoryManager MemMgr;
RuntimeDyld Dyld;
- SmallVector<JITEventListener*, 2> EventListeners;
+ std::vector<JITEventListener*> EventListeners;
OwningModuleContainer OwnedModules;
- SmallVector<object::Archive*, 2> Archives;
+ SmallVector<object::OwningBinary<object::Archive>, 2> Archives;
+ SmallVector<std::unique_ptr<MemoryBuffer>, 2> Buffers;
- typedef SmallVector<ObjectImage *, 2> LoadedObjectList;
- LoadedObjectList LoadedObjects;
+ SmallVector<std::unique_ptr<ObjectImage>, 2> LoadedObjects;
// An optional ObjectCache to be notified of compiled objects and used to
// perform lookup of pre-compiled code to avoid re-compilation.
@@ -238,9 +241,10 @@ public:
/// @name ExecutionEngine interface implementation
/// @{
- void addModule(Module *M) override;
+ void addModule(std::unique_ptr<Module> M) override;
void addObjectFile(std::unique_ptr<object::ObjectFile> O) override;
- void addArchive(object::Archive *O) override;
+ void addObjectFile(object::OwningBinary<object::ObjectFile> O) override;
+ void addArchive(object::OwningBinary<object::Archive> O) override;
bool removeModule(Module *M) override;
/// FindFunctionNamed - Search all of the active modules to find the one that
@@ -276,14 +280,8 @@ public:
/// \param isDtors - Run the destructors instead of constructors.
void runStaticConstructorsDestructors(bool isDtors) override;
- void *getPointerToBasicBlock(BasicBlock *BB) override;
-
void *getPointerToFunction(Function *F) override;
- void *recompileAndRelinkFunction(Function *F) override;
-
- void freeMachineCodeForFunction(Function *F) override;
-
GenericValue runFunction(Function *F,
const std::vector<GenericValue> &ArgValues) override;
@@ -295,7 +293,7 @@ public:
/// found, this function silently returns a null pointer. Otherwise,
/// it prints a message to stderr and aborts.
///
- void *getPointerToNamedFunction(const std::string &Name,
+ void *getPointerToNamedFunction(StringRef Name,
bool AbortOnFailure = true) override;
/// mapSectionAddress - map a section to its target address space value.
@@ -315,7 +313,7 @@ public:
uint64_t getGlobalValueAddress(const std::string &Name) override;
uint64_t getFunctionAddress(const std::string &Name) override;
- TargetMachine *getTargetMachine() override { return TM; }
+ TargetMachine *getTargetMachine() override { return TM.get(); }
/// @}
/// @name (Private) Registration Interfaces
@@ -325,11 +323,10 @@ public:
MCJITCtor = createJIT;
}
- static ExecutionEngine *createJIT(Module *M,
+ static ExecutionEngine *createJIT(std::unique_ptr<Module> M,
std::string *ErrorStr,
RTDyldMemoryManager *MemMgr,
- bool GVsWithCode,
- TargetMachine *TM);
+ std::unique_ptr<TargetMachine> TM);
// @}
@@ -344,7 +341,7 @@ protected:
/// this function call is expected to be the contained module. The module
/// is passed as a parameter here to prepare for multiple module support in
/// the future.
- ObjectBufferStream* emitObject(Module *M);
+ std::unique_ptr<ObjectBufferStream> emitObject(Module *M);
void NotifyObjectEmitted(const ObjectImage& Obj);
void NotifyFreeingObject(const ObjectImage& Obj);
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index c26e0ad..cf71432 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -11,7 +11,7 @@ LIBRARYNAME = LLVMExecutionEngine
include $(LEVEL)/Makefile.config
-PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld
+PARALLEL_DIRS = Interpreter MCJIT RuntimeDyld
ifeq ($(USE_INTEL_JITEVENTS), 1)
PARALLEL_DIRS += IntelJITEvents
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index fd37a13..5a8ccb6 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -49,12 +49,6 @@ public:
~OProfileJITEventListener();
- virtual void NotifyFunctionEmitted(const Function &F,
- void *FnStart, size_t FnSize,
- const JITEvent_EmittedFunctionDetails &Details);
-
- virtual void NotifyFreeingMachineCode(void *OldPtr);
-
virtual void NotifyObjectEmitted(const ObjectImage &Obj);
virtual void NotifyFreeingObject(const ObjectImage &Obj);
@@ -81,90 +75,6 @@ OProfileJITEventListener::~OProfileJITEventListener() {
}
}
-static debug_line_info LineStartToOProfileFormat(
- const MachineFunction &MF, FilenameCache &Filenames,
- uintptr_t Address, DebugLoc Loc) {
- debug_line_info Result;
- Result.vma = Address;
- Result.lineno = Loc.getLine();
- Result.filename = Filenames.getFilename(
- Loc.getScope(MF.getFunction()->getContext()));
- DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
- << Result.filename << ":" << Result.lineno << "\n");
- return Result;
-}
-
-// Adds the just-emitted function to the symbol table.
-void OProfileJITEventListener::NotifyFunctionEmitted(
- const Function &F, void *FnStart, size_t FnSize,
- const JITEvent_EmittedFunctionDetails &Details) {
- assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
- if (Wrapper.op_write_native_code(F.getName().data(),
- reinterpret_cast<uint64_t>(FnStart),
- FnStart, FnSize) == -1) {
- DEBUG(dbgs() << "Failed to tell OProfile about native function "
- << F.getName() << " at ["
- << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
- return;
- }
-
- if (!Details.LineStarts.empty()) {
- // Now we convert the line number information from the address/DebugLoc
- // format in Details to the address/filename/lineno format that OProfile
- // expects. Note that OProfile 0.9.4 has a bug that causes it to ignore
- // line numbers for addresses above 4G.
- FilenameCache Filenames;
- std::vector<debug_line_info> LineInfo;
- LineInfo.reserve(1 + Details.LineStarts.size());
-
- DebugLoc FirstLoc = Details.LineStarts[0].Loc;
- assert(!FirstLoc.isUnknown()
- && "LineStarts should not contain unknown DebugLocs");
- MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
- DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
- if (FunctionDI.Verify()) {
- // If we have debug info for the function itself, use that as the line
- // number of the first several instructions. Otherwise, after filling
- // LineInfo, we'll adjust the address of the first line number to point at
- // the start of the function.
- debug_line_info line_info;
- line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
- line_info.lineno = FunctionDI.getLineNumber();
- line_info.filename = Filenames.getFilename(FirstLocScope);
- LineInfo.push_back(line_info);
- }
-
- for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
- I = Details.LineStarts.begin(), E = Details.LineStarts.end();
- I != E; ++I) {
- LineInfo.push_back(LineStartToOProfileFormat(
- *Details.MF, Filenames, I->Address, I->Loc));
- }
-
- // In case the function didn't have line info of its own, adjust the first
- // line info's address to include the start of the function.
- LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
-
- if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(),
- &*LineInfo.begin()) == -1) {
- DEBUG(dbgs()
- << "Failed to tell OProfile about line numbers for native function "
- << F.getName() << " at ["
- << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
- }
- }
-}
-
-// Removes the being-deleted function from the symbol table.
-void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
- assert(FnStart && "Invalid function pointer");
- if (Wrapper.op_unload_native_code(reinterpret_cast<uint64_t>(FnStart)) == -1) {
- DEBUG(dbgs()
- << "Failed to tell OProfile about unload of native function at "
- << FnStart << "\n");
- }
-}
-
void OProfileJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
if (!Wrapper.isAgentAvailable()) {
return;
diff --git a/lib/ExecutionEngine/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RTDyldMemoryManager.cpp
index 1646937..51b2d0f 100644
--- a/lib/ExecutionEngine/RTDyldMemoryManager.cpp
+++ b/lib/ExecutionEngine/RTDyldMemoryManager.cpp
@@ -210,7 +210,8 @@ ARM_MATH_IMPORTS(ARM_MATH_DECL)
#undef ARM_MATH_DECL
#endif
-uint64_t RTDyldMemoryManager::getSymbolAddress(const std::string &Name) {
+uint64_t
+RTDyldMemoryManager::getSymbolAddressInProcess(const std::string &Name) {
// This implementation assumes that the host program is the target.
// Clients generating code for a remote target should implement their own
// memory manager.
@@ -253,19 +254,19 @@ uint64_t RTDyldMemoryManager::getSymbolAddress(const std::string &Name) {
// is called before ExecutionEngine::runFunctionAsMain() is called.
if (Name == "__main") return (uint64_t)&jit_noop;
+ // Try to demangle Name before looking it up in the process, otherwise symbol
+ // '_<Name>' (if present) will shadow '<Name>', and there will be no way to
+ // refer to the latter.
+
const char *NameStr = Name.c_str();
- void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
- if (Ptr)
- return (uint64_t)Ptr;
- // If it wasn't found and if it starts with an underscore ('_') character,
- // try again without the underscore.
- if (NameStr[0] == '_') {
- Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
- if (Ptr)
+ if (NameStr[0] == '_')
+ if (void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr + 1))
return (uint64_t)Ptr;
- }
- return 0;
+
+ // If we Name did not require demangling, or we failed to find the demangled
+ // name, try again without demangling.
+ return (uint64_t)sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
}
void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
index 8546571..dfa3a20 100644
--- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
@@ -101,7 +101,7 @@ private:
/// Lock used to serialize all jit registration events, since they
/// modify global variables.
-llvm::sys::Mutex JITDebugLock;
+ManagedStatic<sys::Mutex> JITDebugLock;
/// Do the registration.
void NotifyDebugger(jit_code_entry* JITCodeEntry) {
@@ -121,7 +121,7 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) {
GDBJITRegistrar::~GDBJITRegistrar() {
// Free all registered object files.
- llvm::MutexGuard locked(JITDebugLock);
+ llvm::MutexGuard locked(*JITDebugLock);
for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(), E = ObjectBufferMap.end();
I != E; ++I) {
// Call the private method that doesn't update the map so our iterator
@@ -137,7 +137,7 @@ void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) {
size_t Size = Object.getBufferSize();
assert(Buffer && "Attempt to register a null object with a debugger.");
- llvm::MutexGuard locked(JITDebugLock);
+ llvm::MutexGuard locked(*JITDebugLock);
assert(ObjectBufferMap.find(Buffer) == ObjectBufferMap.end() &&
"Second attempt to perform debug registration.");
jit_code_entry* JITCodeEntry = new jit_code_entry();
@@ -156,7 +156,7 @@ void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) {
bool GDBJITRegistrar::deregisterObject(const ObjectBuffer& Object) {
const char *Buffer = Object.getBufferStart();
- llvm::MutexGuard locked(JITDebugLock);
+ llvm::MutexGuard locked(*JITDebugLock);
RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(Buffer);
if (I != ObjectBufferMap.end()) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
index 6a514ea..636011f 100644
--- a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
+++ b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H
-#define LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_JITREGISTRAR_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_JITREGISTRAR_H
#include "llvm/ExecutionEngine/ObjectBuffer.h"
@@ -41,4 +41,4 @@ public:
} // end namespace llvm
-#endif // LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
index c3a2182..9bbf6a0d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
+++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
-#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
#include "llvm/ExecutionEngine/ObjectBuffer.h"
#include "llvm/ExecutionEngine/ObjectImage.h"
@@ -36,20 +36,17 @@ protected:
// This form of the constructor allows subclasses to use
// format-specific subclasses of ObjectFile directly
- ObjectImageCommon(ObjectBuffer *Input, std::unique_ptr<object::ObjectFile> Obj)
- : ObjectImage(Input), // saves Input as Buffer and takes ownership
- ObjFile(std::move(Obj))
- {
- }
+ ObjectImageCommon(std::unique_ptr<ObjectBuffer> Input,
+ std::unique_ptr<object::ObjectFile> Obj)
+ : ObjectImage(std::move(Input)), ObjFile(std::move(Obj)) {}
public:
- ObjectImageCommon(ObjectBuffer* Input)
- : ObjectImage(Input) // saves Input as Buffer and takes ownership
- {
+ ObjectImageCommon(std::unique_ptr<ObjectBuffer> Input)
+ : ObjectImage(std::move(Input)) {
// FIXME: error checking? createObjectFile returns an ErrorOr<ObjectFile*>
// and should probably be checked for failure.
- std::unique_ptr<MemoryBuffer> Buf(Buffer->getMemBuffer());
- ObjFile.reset(object::ObjectFile::createObjectFile(Buf).get());
+ MemoryBufferRef Buf = Buffer->getMemBuffer();
+ ObjFile = std::move(object::ObjectFile::createObjectFile(Buf).get());
}
ObjectImageCommon(std::unique_ptr<object::ObjectFile> Input)
: ObjectImage(nullptr), ObjFile(std::move(Input)) {}
@@ -86,4 +83,4 @@ public:
} // end namespace llvm
-#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 9dfd167..c7c67f6 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -14,6 +14,7 @@
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "JITRegistrar.h"
#include "ObjectImageCommon.h"
+#include "RuntimeDyldCheckerImpl.h"
#include "RuntimeDyldELF.h"
#include "RuntimeDyldImpl.h"
#include "RuntimeDyldMachO.h"
@@ -40,6 +41,44 @@ void RuntimeDyldImpl::registerEHFrames() {}
void RuntimeDyldImpl::deregisterEHFrames() {}
+#ifndef NDEBUG
+static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
+ dbgs() << "----- Contents of section " << S.Name << " " << State << " -----";
+
+ if (S.Address == nullptr) {
+ dbgs() << "\n <section not emitted>\n";
+ return;
+ }
+
+ const unsigned ColsPerRow = 16;
+
+ uint8_t *DataAddr = S.Address;
+ uint64_t LoadAddr = S.LoadAddress;
+
+ unsigned StartPadding = LoadAddr & (ColsPerRow - 1);
+ unsigned BytesRemaining = S.Size;
+
+ if (StartPadding) {
+ dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr & ~(ColsPerRow - 1)) << ":";
+ while (StartPadding--)
+ dbgs() << " ";
+ }
+
+ while (BytesRemaining > 0) {
+ if ((LoadAddr & (ColsPerRow - 1)) == 0)
+ dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr) << ":";
+
+ dbgs() << " " << format("%02x", *DataAddr);
+
+ ++DataAddr;
+ ++LoadAddr;
+ --BytesRemaining;
+ }
+
+ dbgs() << "\n";
+}
+#endif
+
// Resolve the relocations for all symbols we currently know about.
void RuntimeDyldImpl::resolveRelocations() {
MutexGuard locked(lock);
@@ -55,8 +94,10 @@ void RuntimeDyldImpl::resolveRelocations() {
// entry provides the section to which the relocation will be applied.
uint64_t Addr = Sections[i].LoadAddress;
DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t"
- << format("%p", (uint8_t *)Addr) << "\n");
+ << format("0x%x", Addr) << "\n");
+ DEBUG(dumpSectionMemory(Sections[i], "before relocations"));
resolveRelocationList(Relocations[i], Addr);
+ DEBUG(dumpSectionMemory(Sections[i], "after relocations"));
Relocations.erase(i);
}
}
@@ -88,23 +129,20 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
if (std::error_code EC = Sym.getSection(SecI))
return EC;
- if (SecI == Obj->section_end()) {
- Result = UnknownAddressOrSize;
- return object_error::success;
- }
-
- uint64_t SectionAddress;
- if (std::error_code EC = SecI->getAddress(SectionAddress))
- return EC;
+ if (SecI == Obj->section_end()) {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ uint64_t SectionAddress = SecI->getAddress();
Result = Address - SectionAddress;
return object_error::success;
}
-ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
+std::unique_ptr<ObjectImage>
+RuntimeDyldImpl::loadObject(std::unique_ptr<ObjectImage> Obj) {
MutexGuard locked(lock);
- std::unique_ptr<ObjectImage> Obj(InputObject);
if (!Obj)
return nullptr;
@@ -158,14 +196,13 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
SymType == object::SymbolRef::ST_Unknown) {
uint64_t SectOffset;
StringRef SectionData;
- bool IsCode;
section_iterator SI = Obj->end_sections();
Check(getOffset(*I, SectOffset));
Check(I->getSection(SI));
if (SI == Obj->end_sections())
continue;
Check(SI->getContents(SectionData));
- Check(SI->isText(IsCode));
+ bool IsCode = SI->isText();
unsigned SectionID =
findOrEmitSection(*Obj, *SI, IsCode, LocalSections);
LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
@@ -195,8 +232,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
if (I == E && !ProcessAllSections)
continue;
- bool IsCode = false;
- Check(RelocatedSection->isText(IsCode));
+ bool IsCode = RelocatedSection->isText();
SectionID =
findOrEmitSection(*Obj, *RelocatedSection, IsCode, LocalSections);
DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
@@ -204,12 +240,17 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
for (; I != E;)
I = processRelocationRef(SectionID, I, *Obj, LocalSections, LocalSymbols,
Stubs);
+
+ // If there is an attached checker, notify it about the stubs for this
+ // section so that they can be verified.
+ if (Checker)
+ Checker->registerStubMap(Obj->getImageName(), SectionID, Stubs);
}
// Give the subclasses a chance to tie-up any loose ends.
finalizeLoad(*Obj, LocalSections);
- return Obj.release();
+ return Obj;
}
// A helper method for computeTotalAllocSize.
@@ -245,20 +286,15 @@ void RuntimeDyldImpl::computeTotalAllocSize(ObjectImage &Obj,
SI != SE; ++SI) {
const SectionRef &Section = *SI;
- bool IsRequired;
- Check(Section.isRequiredForExecution(IsRequired));
+ bool IsRequired = Section.isRequiredForExecution();
// Consider only the sections that are required to be loaded for execution
if (IsRequired) {
- uint64_t DataSize = 0;
- uint64_t Alignment64 = 0;
- bool IsCode = false;
- bool IsReadOnly = false;
StringRef Name;
- Check(Section.getSize(DataSize));
- Check(Section.getAlignment(Alignment64));
- Check(Section.isText(IsCode));
- Check(Section.isReadOnlyData(IsReadOnly));
+ uint64_t DataSize = Section.getSize();
+ uint64_t Alignment64 = Section.getAlignment();
+ bool IsCode = Section.isText();
+ bool IsReadOnly = Section.isReadOnlyData();
Check(Section.getName(Name));
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
@@ -340,10 +376,8 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(ObjectImage &Obj,
}
// Get section data size and alignment
- uint64_t Alignment64;
- uint64_t DataSize;
- Check(Section.getSize(DataSize));
- Check(Section.getAlignment(Alignment64));
+ uint64_t DataSize = Section.getSize();
+ uint64_t Alignment64 = Section.getAlignment();
// Add stubbuf size alignment
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
@@ -354,6 +388,36 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(ObjectImage &Obj,
return StubBufSize;
}
+uint64_t RuntimeDyldImpl::readBytesUnaligned(uint8_t *Src,
+ unsigned Size) const {
+ uint64_t Result = 0;
+ if (IsTargetLittleEndian) {
+ Src += Size - 1;
+ while (Size--)
+ Result = (Result << 8) | *Src--;
+ } else
+ while (Size--)
+ Result = (Result << 8) | *Src++;
+
+ return Result;
+}
+
+void RuntimeDyldImpl::writeBytesUnaligned(uint64_t Value, uint8_t *Dst,
+ unsigned Size) const {
+ if (IsTargetLittleEndian) {
+ while (Size--) {
+ *Dst++ = Value & 0xFF;
+ Value >>= 8;
+ }
+ } else {
+ Dst += Size - 1;
+ while (Size--) {
+ *Dst-- = Value & 0xFF;
+ Value >>= 8;
+ }
+ }
+}
+
void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
const CommonSymbolMap &CommonSymbols,
uint64_t TotalSize,
@@ -365,7 +429,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
if (!Addr)
report_fatal_error("Unable to allocate memory for common symbols!");
uint64_t Offset = 0;
- Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, 0));
+ Sections.push_back(SectionEntry("<common symbols>", Addr, TotalSize, 0));
memset(Addr, 0, TotalSize);
DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID << " new addr: "
@@ -397,24 +461,18 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
const SectionRef &Section, bool IsCode) {
StringRef data;
- uint64_t Alignment64;
Check(Section.getContents(data));
- Check(Section.getAlignment(Alignment64));
+ uint64_t Alignment64 = Section.getAlignment();
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
- bool IsRequired;
- bool IsVirtual;
- bool IsZeroInit;
- bool IsReadOnly;
- uint64_t DataSize;
unsigned PaddingSize = 0;
unsigned StubBufSize = 0;
StringRef Name;
- Check(Section.isRequiredForExecution(IsRequired));
- Check(Section.isVirtual(IsVirtual));
- Check(Section.isZeroInit(IsZeroInit));
- Check(Section.isReadOnlyData(IsReadOnly));
- Check(Section.getSize(DataSize));
+ bool IsRequired = Section.isRequiredForExecution();
+ bool IsVirtual = Section.isVirtual();
+ bool IsZeroInit = Section.isZeroInit();
+ bool IsReadOnly = Section.isReadOnlyData();
+ uint64_t DataSize = Section.getSize();
Check(Section.getName(Name));
StubBufSize = computeSectionStubBufSize(Obj, Section);
@@ -477,6 +535,10 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
}
Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData));
+
+ if (Checker)
+ Checker->registerSection(Obj.getImageName(), SectionID);
+
return SectionID;
}
@@ -517,34 +579,26 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
}
}
-uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
- if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be ||
- Arch == Triple::arm64 || Arch == Triple::arm64_be) {
+uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr,
+ unsigned AbiVariant) {
+ if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) {
// This stub has to be able to access the full address space,
// since symbol lookup won't necessarily find a handy, in-range,
// PLT stub for functions which could be anywhere.
- uint32_t *StubAddr = (uint32_t *)Addr;
-
// Stub can use ip0 (== x16) to calculate address
- *StubAddr = 0xd2e00010; // movz ip0, #:abs_g3:<addr>
- StubAddr++;
- *StubAddr = 0xf2c00010; // movk ip0, #:abs_g2_nc:<addr>
- StubAddr++;
- *StubAddr = 0xf2a00010; // movk ip0, #:abs_g1_nc:<addr>
- StubAddr++;
- *StubAddr = 0xf2800010; // movk ip0, #:abs_g0_nc:<addr>
- StubAddr++;
- *StubAddr = 0xd61f0200; // br ip0
+ writeBytesUnaligned(0xd2e00010, Addr, 4); // movz ip0, #:abs_g3:<addr>
+ writeBytesUnaligned(0xf2c00010, Addr+4, 4); // movk ip0, #:abs_g2_nc:<addr>
+ writeBytesUnaligned(0xf2a00010, Addr+8, 4); // movk ip0, #:abs_g1_nc:<addr>
+ writeBytesUnaligned(0xf2800010, Addr+12, 4); // movk ip0, #:abs_g0_nc:<addr>
+ writeBytesUnaligned(0xd61f0200, Addr+16, 4); // br ip0
return Addr;
} else if (Arch == Triple::arm || Arch == Triple::armeb) {
// TODO: There is only ARM far stub now. We should add the Thumb stub,
// and stubs for branches Thumb - ARM and ARM - Thumb.
- uint32_t *StubAddr = (uint32_t *)Addr;
- *StubAddr = 0xe51ff004; // ldr pc,<label>
- return (uint8_t *)++StubAddr;
+ writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc,<label>
+ return Addr + 4;
} else if (Arch == Triple::mipsel || Arch == Triple::mips) {
- uint32_t *StubAddr = (uint32_t *)Addr;
// 0: 3c190000 lui t9,%hi(addr).
// 4: 27390000 addiu t9,t9,%lo(addr).
// 8: 03200008 jr t9.
@@ -552,31 +606,37 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
const unsigned LuiT9Instr = 0x3c190000, AdduiT9Instr = 0x27390000;
const unsigned JrT9Instr = 0x03200008, NopInstr = 0x0;
- *StubAddr = LuiT9Instr;
- StubAddr++;
- *StubAddr = AdduiT9Instr;
- StubAddr++;
- *StubAddr = JrT9Instr;
- StubAddr++;
- *StubAddr = NopInstr;
+ writeBytesUnaligned(LuiT9Instr, Addr, 4);
+ writeBytesUnaligned(AdduiT9Instr, Addr+4, 4);
+ writeBytesUnaligned(JrT9Instr, Addr+8, 4);
+ writeBytesUnaligned(NopInstr, Addr+12, 4);
return Addr;
} else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
- // PowerPC64 stub: the address points to a function descriptor
- // instead of the function itself. Load the function address
- // on r11 and sets it to control register. Also loads the function
- // TOC in r2 and environment pointer to r11.
+ // Depending on which version of the ELF ABI is in use, we need to
+ // generate one of two variants of the stub. They both start with
+ // the same sequence to load the target address into r12.
writeInt32BE(Addr, 0x3D800000); // lis r12, highest(addr)
writeInt32BE(Addr+4, 0x618C0000); // ori r12, higher(addr)
writeInt32BE(Addr+8, 0x798C07C6); // sldi r12, r12, 32
writeInt32BE(Addr+12, 0x658C0000); // oris r12, r12, h(addr)
writeInt32BE(Addr+16, 0x618C0000); // ori r12, r12, l(addr)
- writeInt32BE(Addr+20, 0xF8410028); // std r2, 40(r1)
- writeInt32BE(Addr+24, 0xE96C0000); // ld r11, 0(r12)
- writeInt32BE(Addr+28, 0xE84C0008); // ld r2, 0(r12)
- writeInt32BE(Addr+32, 0x7D6903A6); // mtctr r11
- writeInt32BE(Addr+36, 0xE96C0010); // ld r11, 16(r2)
- writeInt32BE(Addr+40, 0x4E800420); // bctr
-
+ if (AbiVariant == 2) {
+ // PowerPC64 stub ELFv2 ABI: The address points to the function itself.
+ // The address is already in r12 as required by the ABI. Branch to it.
+ writeInt32BE(Addr+20, 0xF8410018); // std r2, 24(r1)
+ writeInt32BE(Addr+24, 0x7D8903A6); // mtctr r12
+ writeInt32BE(Addr+28, 0x4E800420); // bctr
+ } else {
+ // PowerPC64 stub ELFv1 ABI: The address points to a function descriptor.
+ // Load the function address on r11 and sets it to control register. Also
+ // loads the function TOC in r2 and environment pointer to r11.
+ writeInt32BE(Addr+20, 0xF8410028); // std r2, 40(r1)
+ writeInt32BE(Addr+24, 0xE96C0000); // ld r11, 0(r12)
+ writeInt32BE(Addr+28, 0xE84C0008); // ld r2, 0(r12)
+ writeInt32BE(Addr+32, 0x7D6903A6); // mtctr r11
+ writeInt32BE(Addr+36, 0xE96C0010); // ld r11, 16(r2)
+ writeInt32BE(Addr+40, 0x4E800420); // bctr
+ }
return Addr;
} else if (Arch == Triple::systemz) {
writeInt16BE(Addr, 0xC418); // lgrl %r1,.+8
@@ -609,6 +669,10 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
// Addr is a uint64_t because we can't assume the pointer width
// of the target is the same as that of the host. Just use a generic
// "big enough" type.
+ DEBUG(dbgs() << "Reassigning address for section "
+ << SectionID << " (" << Sections[SectionID].Name << "): "
+ << format("0x%016" PRIx64, Sections[SectionID].LoadAddress) << " -> "
+ << format("0x%016" PRIx64, Addr) << "\n");
Sections[SectionID].LoadAddress = Addr;
}
@@ -685,25 +749,31 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
Dyld = nullptr;
MM = mm;
ProcessAllSections = false;
+ Checker = nullptr;
}
-RuntimeDyld::~RuntimeDyld() { delete Dyld; }
+RuntimeDyld::~RuntimeDyld() {}
static std::unique_ptr<RuntimeDyldELF>
-createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections) {
+createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections,
+ RuntimeDyldCheckerImpl *Checker) {
std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM));
Dyld->setProcessAllSections(ProcessAllSections);
+ Dyld->setRuntimeDyldChecker(Checker);
return Dyld;
}
static std::unique_ptr<RuntimeDyldMachO>
-createRuntimeDyldMachO(RTDyldMemoryManager *MM, bool ProcessAllSections) {
- std::unique_ptr<RuntimeDyldMachO> Dyld(new RuntimeDyldMachO(MM));
+createRuntimeDyldMachO(Triple::ArchType Arch, RTDyldMemoryManager *MM,
+ bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
+ std::unique_ptr<RuntimeDyldMachO> Dyld(RuntimeDyldMachO::create(Arch, MM));
Dyld->setProcessAllSections(ProcessAllSections);
+ Dyld->setRuntimeDyldChecker(Checker);
return Dyld;
}
-ObjectImage *RuntimeDyld::loadObject(std::unique_ptr<ObjectFile> InputObject) {
+std::unique_ptr<ObjectImage>
+RuntimeDyld::loadObject(std::unique_ptr<ObjectFile> InputObject) {
std::unique_ptr<ObjectImage> InputImage;
ObjectFile &Obj = *InputObject;
@@ -711,33 +781,37 @@ ObjectImage *RuntimeDyld::loadObject(std::unique_ptr<ObjectFile> InputObject) {
if (InputObject->isELF()) {
InputImage.reset(RuntimeDyldELF::createObjectImageFromFile(std::move(InputObject)));
if (!Dyld)
- Dyld = createRuntimeDyldELF(MM, ProcessAllSections).release();
+ Dyld = createRuntimeDyldELF(MM, ProcessAllSections, Checker);
} else if (InputObject->isMachO()) {
InputImage.reset(RuntimeDyldMachO::createObjectImageFromFile(std::move(InputObject)));
if (!Dyld)
- Dyld = createRuntimeDyldMachO(MM, ProcessAllSections).release();
+ Dyld = createRuntimeDyldMachO(
+ static_cast<Triple::ArchType>(InputImage->getArch()), MM,
+ ProcessAllSections, Checker);
} else
report_fatal_error("Incompatible object format!");
if (!Dyld->isCompatibleFile(&Obj))
report_fatal_error("Incompatible object format!");
- Dyld->loadObject(InputImage.get());
- return InputImage.release();
+ return Dyld->loadObject(std::move(InputImage));
}
-ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
+std::unique_ptr<ObjectImage>
+RuntimeDyld::loadObject(std::unique_ptr<ObjectBuffer> InputBuffer) {
std::unique_ptr<ObjectImage> InputImage;
sys::fs::file_magic Type = sys::fs::identify_magic(InputBuffer->getBuffer());
+ auto *InputBufferPtr = InputBuffer.get();
switch (Type) {
+ case sys::fs::file_magic::elf:
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::elf_executable:
case sys::fs::file_magic::elf_shared_object:
case sys::fs::file_magic::elf_core:
- InputImage.reset(RuntimeDyldELF::createObjectImage(InputBuffer));
+ InputImage = RuntimeDyldELF::createObjectImage(std::move(InputBuffer));
if (!Dyld)
- Dyld = createRuntimeDyldELF(MM, ProcessAllSections).release();
+ Dyld = createRuntimeDyldELF(MM, ProcessAllSections, Checker);
break;
case sys::fs::file_magic::macho_object:
case sys::fs::file_magic::macho_executable:
@@ -749,9 +823,11 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
case sys::fs::file_magic::macho_bundle:
case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
case sys::fs::file_magic::macho_dsym_companion:
- InputImage.reset(RuntimeDyldMachO::createObjectImage(InputBuffer));
+ InputImage = RuntimeDyldMachO::createObjectImage(std::move(InputBuffer));
if (!Dyld)
- Dyld = createRuntimeDyldMachO(MM, ProcessAllSections).release();
+ Dyld = createRuntimeDyldMachO(
+ static_cast<Triple::ArchType>(InputImage->getArch()), MM,
+ ProcessAllSections, Checker);
break;
case sys::fs::file_magic::unknown:
case sys::fs::file_magic::bitcode:
@@ -764,20 +840,19 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
report_fatal_error("Incompatible object format!");
}
- if (!Dyld->isCompatibleFormat(InputBuffer))
+ if (!Dyld->isCompatibleFormat(InputBufferPtr))
report_fatal_error("Incompatible object format!");
- Dyld->loadObject(InputImage.get());
- return InputImage.release();
+ return Dyld->loadObject(std::move(InputImage));
}
-void *RuntimeDyld::getSymbolAddress(StringRef Name) {
+void *RuntimeDyld::getSymbolAddress(StringRef Name) const {
if (!Dyld)
return nullptr;
return Dyld->getSymbolAddress(Name);
}
-uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) {
+uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) const {
if (!Dyld)
return 0;
return Dyld->getSymbolLoadAddress(Name);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 190bbbf..8818349 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -7,11 +7,13 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/StringRefMemoryObject.h"
+#include "llvm/Support/Path.h"
+#include "RuntimeDyldCheckerImpl.h"
#include "RuntimeDyldImpl.h"
#include <cctype>
#include <memory>
@@ -22,579 +24,696 @@ using namespace llvm;
namespace llvm {
- // Helper class that implements the language evaluated by RuntimeDyldChecker.
- class RuntimeDyldCheckerExprEval {
- public:
-
- RuntimeDyldCheckerExprEval(const RuntimeDyldChecker &Checker,
- llvm::raw_ostream &ErrStream)
- : Checker(Checker), ErrStream(ErrStream) {}
-
- bool evaluate(StringRef Expr) const {
- // Expect equality expression of the form 'LHS = RHS'.
- Expr = Expr.trim();
- size_t EQIdx = Expr.find('=');
-
- // Evaluate LHS.
- StringRef LHSExpr = Expr.substr(0, EQIdx).rtrim();
- StringRef RemainingExpr;
- EvalResult LHSResult;
- std::tie(LHSResult, RemainingExpr) =
- evalComplexExpr(evalSimpleExpr(LHSExpr));
- if (LHSResult.hasError())
- return handleError(Expr, LHSResult);
- if (RemainingExpr != "")
- return handleError(Expr, unexpectedToken(RemainingExpr, LHSExpr, ""));
-
- // Evaluate RHS.
- StringRef RHSExpr = Expr.substr(EQIdx + 1).ltrim();
- EvalResult RHSResult;
- std::tie(RHSResult, RemainingExpr) =
- evalComplexExpr(evalSimpleExpr(RHSExpr));
- if (RHSResult.hasError())
- return handleError(Expr, RHSResult);
- if (RemainingExpr != "")
- return handleError(Expr, unexpectedToken(RemainingExpr, RHSExpr, ""));
-
- if (LHSResult.getValue() != RHSResult.getValue()) {
- ErrStream << "Expression '" << Expr << "' is false: "
- << format("0x%lx", LHSResult.getValue()) << " != "
- << format("0x%lx", RHSResult.getValue()) << "\n";
- return false;
- }
- return true;
+// Helper class that implements the language evaluated by RuntimeDyldChecker.
+class RuntimeDyldCheckerExprEval {
+public:
+ RuntimeDyldCheckerExprEval(const RuntimeDyldCheckerImpl &Checker,
+ raw_ostream &ErrStream)
+ : Checker(Checker) {}
+
+ bool evaluate(StringRef Expr) const {
+ // Expect equality expression of the form 'LHS = RHS'.
+ Expr = Expr.trim();
+ size_t EQIdx = Expr.find('=');
+
+ ParseContext OutsideLoad(false);
+
+ // Evaluate LHS.
+ StringRef LHSExpr = Expr.substr(0, EQIdx).rtrim();
+ StringRef RemainingExpr;
+ EvalResult LHSResult;
+ std::tie(LHSResult, RemainingExpr) =
+ evalComplexExpr(evalSimpleExpr(LHSExpr, OutsideLoad), OutsideLoad);
+ if (LHSResult.hasError())
+ return handleError(Expr, LHSResult);
+ if (RemainingExpr != "")
+ return handleError(Expr, unexpectedToken(RemainingExpr, LHSExpr, ""));
+
+ // Evaluate RHS.
+ StringRef RHSExpr = Expr.substr(EQIdx + 1).ltrim();
+ EvalResult RHSResult;
+ std::tie(RHSResult, RemainingExpr) =
+ evalComplexExpr(evalSimpleExpr(RHSExpr, OutsideLoad), OutsideLoad);
+ if (RHSResult.hasError())
+ return handleError(Expr, RHSResult);
+ if (RemainingExpr != "")
+ return handleError(Expr, unexpectedToken(RemainingExpr, RHSExpr, ""));
+
+ if (LHSResult.getValue() != RHSResult.getValue()) {
+ Checker.ErrStream << "Expression '" << Expr << "' is false: "
+ << format("0x%" PRIx64, LHSResult.getValue())
+ << " != " << format("0x%" PRIx64, RHSResult.getValue())
+ << "\n";
+ return false;
}
+ return true;
+ }
+
+private:
+ // RuntimeDyldCheckerExprEval requires some context when parsing exprs. In
+ // particular, it needs to know whether a symbol is being evaluated in the
+ // context of a load, in which case we want the linker's local address for
+ // the symbol, or outside of a load, in which case we want the symbol's
+ // address in the remote target.
+
+ struct ParseContext {
+ bool IsInsideLoad;
+ ParseContext(bool IsInsideLoad) : IsInsideLoad(IsInsideLoad) {}
+ };
+
+ const RuntimeDyldCheckerImpl &Checker;
+
+ enum class BinOpToken : unsigned {
+ Invalid,
+ Add,
+ Sub,
+ BitwiseAnd,
+ BitwiseOr,
+ ShiftLeft,
+ ShiftRight
+ };
+
+ class EvalResult {
+ public:
+ EvalResult() : Value(0), ErrorMsg("") {}
+ EvalResult(uint64_t Value) : Value(Value), ErrorMsg("") {}
+ EvalResult(std::string ErrorMsg) : Value(0), ErrorMsg(ErrorMsg) {}
+ uint64_t getValue() const { return Value; }
+ bool hasError() const { return ErrorMsg != ""; }
+ const std::string &getErrorMsg() const { return ErrorMsg; }
private:
- const RuntimeDyldChecker &Checker;
- llvm::raw_ostream &ErrStream;
-
- enum class BinOpToken : unsigned { Invalid, Add, Sub, BitwiseAnd,
- BitwiseOr, ShiftLeft, ShiftRight };
-
- class EvalResult {
- public:
- EvalResult()
- : Value(0), ErrorMsg("") {}
- EvalResult(uint64_t Value)
- : Value(Value), ErrorMsg("") {}
- EvalResult(std::string ErrorMsg)
- : Value(0), ErrorMsg(ErrorMsg) {}
- uint64_t getValue() const { return Value; }
- bool hasError() const { return ErrorMsg != ""; }
- const std::string& getErrorMsg() const { return ErrorMsg; }
- private:
- uint64_t Value;
- std::string ErrorMsg;
- };
-
- StringRef getTokenForError(StringRef Expr) const {
- if (Expr.empty())
- return "";
-
- StringRef Token, Remaining;
- if (isalpha(Expr[0]))
- std::tie(Token, Remaining) = parseSymbol(Expr);
- else if (isdigit(Expr[0]))
- std::tie(Token, Remaining) = parseNumberString(Expr);
- else {
- unsigned TokLen = 1;
- if (Expr.startswith("<<") || Expr.startswith(">>"))
- TokLen = 2;
- Token = Expr.substr(0, TokLen);
- }
- return Token;
- }
+ uint64_t Value;
+ std::string ErrorMsg;
+ };
- EvalResult unexpectedToken(StringRef TokenStart,
- StringRef SubExpr,
- StringRef ErrText) const {
- std::string ErrorMsg("Encountered unexpected token '");
- ErrorMsg += getTokenForError(TokenStart);
- if (SubExpr != "") {
- ErrorMsg += "' while parsing subexpression '";
- ErrorMsg += SubExpr;
- }
- ErrorMsg += "'";
- if (ErrText != "") {
- ErrorMsg += " ";
- ErrorMsg += ErrText;
- }
- return EvalResult(std::move(ErrorMsg));
+ StringRef getTokenForError(StringRef Expr) const {
+ if (Expr.empty())
+ return "";
+
+ StringRef Token, Remaining;
+ if (isalpha(Expr[0]))
+ std::tie(Token, Remaining) = parseSymbol(Expr);
+ else if (isdigit(Expr[0]))
+ std::tie(Token, Remaining) = parseNumberString(Expr);
+ else {
+ unsigned TokLen = 1;
+ if (Expr.startswith("<<") || Expr.startswith(">>"))
+ TokLen = 2;
+ Token = Expr.substr(0, TokLen);
}
+ return Token;
+ }
- bool handleError(StringRef Expr, const EvalResult &R) const {
- assert(R.hasError() && "Not an error result.");
- ErrStream << "Error evaluating expression '" << Expr << "': "
- << R.getErrorMsg() << "\n";
- return false;
+ EvalResult unexpectedToken(StringRef TokenStart, StringRef SubExpr,
+ StringRef ErrText) const {
+ std::string ErrorMsg("Encountered unexpected token '");
+ ErrorMsg += getTokenForError(TokenStart);
+ if (SubExpr != "") {
+ ErrorMsg += "' while parsing subexpression '";
+ ErrorMsg += SubExpr;
}
+ ErrorMsg += "'";
+ if (ErrText != "") {
+ ErrorMsg += " ";
+ ErrorMsg += ErrText;
+ }
+ return EvalResult(std::move(ErrorMsg));
+ }
- std::pair<BinOpToken, StringRef> parseBinOpToken(StringRef Expr) const {
- if (Expr.empty())
- return std::make_pair(BinOpToken::Invalid, "");
-
- // Handle the two 2-character tokens.
- if (Expr.startswith("<<"))
- return std::make_pair(BinOpToken::ShiftLeft,
- Expr.substr(2).ltrim());
- if (Expr.startswith(">>"))
- return std::make_pair(BinOpToken::ShiftRight,
- Expr.substr(2).ltrim());
-
- // Handle one-character tokens.
- BinOpToken Op;
- switch (Expr[0]) {
- default: return std::make_pair(BinOpToken::Invalid, Expr);
- case '+': Op = BinOpToken::Add; break;
- case '-': Op = BinOpToken::Sub; break;
- case '&': Op = BinOpToken::BitwiseAnd; break;
- case '|': Op = BinOpToken::BitwiseOr; break;
- }
+ bool handleError(StringRef Expr, const EvalResult &R) const {
+ assert(R.hasError() && "Not an error result.");
+ Checker.ErrStream << "Error evaluating expression '" << Expr
+ << "': " << R.getErrorMsg() << "\n";
+ return false;
+ }
- return std::make_pair(Op, Expr.substr(1).ltrim());
+ std::pair<BinOpToken, StringRef> parseBinOpToken(StringRef Expr) const {
+ if (Expr.empty())
+ return std::make_pair(BinOpToken::Invalid, "");
+
+ // Handle the two 2-character tokens.
+ if (Expr.startswith("<<"))
+ return std::make_pair(BinOpToken::ShiftLeft, Expr.substr(2).ltrim());
+ if (Expr.startswith(">>"))
+ return std::make_pair(BinOpToken::ShiftRight, Expr.substr(2).ltrim());
+
+ // Handle one-character tokens.
+ BinOpToken Op;
+ switch (Expr[0]) {
+ default:
+ return std::make_pair(BinOpToken::Invalid, Expr);
+ case '+':
+ Op = BinOpToken::Add;
+ break;
+ case '-':
+ Op = BinOpToken::Sub;
+ break;
+ case '&':
+ Op = BinOpToken::BitwiseAnd;
+ break;
+ case '|':
+ Op = BinOpToken::BitwiseOr;
+ break;
}
- EvalResult computeBinOpResult(BinOpToken Op, const EvalResult &LHSResult,
- const EvalResult &RHSResult) const {
- switch (Op) {
- default: llvm_unreachable("Tried to evaluate unrecognized operation.");
- case BinOpToken::Add:
- return EvalResult(LHSResult.getValue() + RHSResult.getValue());
- case BinOpToken::Sub:
- return EvalResult(LHSResult.getValue() - RHSResult.getValue());
- case BinOpToken::BitwiseAnd:
- return EvalResult(LHSResult.getValue() & RHSResult.getValue());
- case BinOpToken::BitwiseOr:
- return EvalResult(LHSResult.getValue() | RHSResult.getValue());
- case BinOpToken::ShiftLeft:
- return EvalResult(LHSResult.getValue() << RHSResult.getValue());
- case BinOpToken::ShiftRight:
- return EvalResult(LHSResult.getValue() >> RHSResult.getValue());
- }
- }
+ return std::make_pair(Op, Expr.substr(1).ltrim());
+ }
- // Parse a symbol and return a (string, string) pair representing the symbol
- // name and expression remaining to be parsed.
- std::pair<StringRef, StringRef> parseSymbol(StringRef Expr) const {
- size_t FirstNonSymbol =
- Expr.find_first_not_of("0123456789"
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- ":_");
- return std::make_pair(Expr.substr(0, FirstNonSymbol),
- Expr.substr(FirstNonSymbol).ltrim());
+ EvalResult computeBinOpResult(BinOpToken Op, const EvalResult &LHSResult,
+ const EvalResult &RHSResult) const {
+ switch (Op) {
+ default:
+ llvm_unreachable("Tried to evaluate unrecognized operation.");
+ case BinOpToken::Add:
+ return EvalResult(LHSResult.getValue() + RHSResult.getValue());
+ case BinOpToken::Sub:
+ return EvalResult(LHSResult.getValue() - RHSResult.getValue());
+ case BinOpToken::BitwiseAnd:
+ return EvalResult(LHSResult.getValue() & RHSResult.getValue());
+ case BinOpToken::BitwiseOr:
+ return EvalResult(LHSResult.getValue() | RHSResult.getValue());
+ case BinOpToken::ShiftLeft:
+ return EvalResult(LHSResult.getValue() << RHSResult.getValue());
+ case BinOpToken::ShiftRight:
+ return EvalResult(LHSResult.getValue() >> RHSResult.getValue());
}
+ }
- // Evaluate a call to decode_operand. Decode the instruction operand at the
- // given symbol and get the value of the requested operand.
- // Returns an error if the instruction cannot be decoded, or the requested
- // operand is not an immediate.
- // On success, retuns a pair containing the value of the operand, plus
- // the expression remaining to be evaluated.
- std::pair<EvalResult, StringRef> evalDecodeOperand(StringRef Expr) const {
- if (!Expr.startswith("("))
- return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
- StringRef RemainingExpr = Expr.substr(1).ltrim();
- StringRef Symbol;
- std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
-
- if (!Checker.checkSymbolIsValidForLoad(Symbol))
- return std::make_pair(EvalResult(("Cannot decode unknown symbol '" +
- Symbol + "'").str()),
- "");
-
- if (!RemainingExpr.startswith(","))
- return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
- "expected ','"),
- "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
- EvalResult OpIdxExpr;
- std::tie(OpIdxExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
- if (OpIdxExpr.hasError())
- return std::make_pair(OpIdxExpr, "");
-
- if (!RemainingExpr.startswith(")"))
- return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
- "expected ')'"),
- "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
- MCInst Inst;
- uint64_t Size;
- if (!decodeInst(Symbol, Inst, Size))
- return std::make_pair(EvalResult(("Couldn't decode instruction at '" +
- Symbol + "'").str()),
- "");
-
- unsigned OpIdx = OpIdxExpr.getValue();
- if (OpIdx >= Inst.getNumOperands()) {
- std::string ErrMsg;
- raw_string_ostream ErrMsgStream(ErrMsg);
- ErrMsgStream << "Invalid operand index '" << format("%i", OpIdx)
- << " for instruction '" << Symbol
- << ". Instruction has only "
- << format("%i", Inst.getNumOperands()) << " operands.";
- return std::make_pair(EvalResult(ErrMsgStream.str()), "");
- }
+ // Parse a symbol and return a (string, string) pair representing the symbol
+ // name and expression remaining to be parsed.
+ std::pair<StringRef, StringRef> parseSymbol(StringRef Expr) const {
+ size_t FirstNonSymbol = Expr.find_first_not_of("0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ ":_.$");
+ return std::make_pair(Expr.substr(0, FirstNonSymbol),
+ Expr.substr(FirstNonSymbol).ltrim());
+ }
- const MCOperand &Op = Inst.getOperand(OpIdx);
- if (!Op.isImm()) {
- std::string ErrMsg;
- raw_string_ostream ErrMsgStream(ErrMsg);
- ErrMsgStream << "Operand '" << format("%i", OpIdx)
- << "' of instruction '" << Symbol
- << "' is not an immediate.\nInstruction is:\n ";
- Inst.dump_pretty(ErrMsgStream,
- Checker.Disassembler->getContext().getAsmInfo(),
- Checker.InstPrinter);
-
- return std::make_pair(EvalResult(ErrMsgStream.str()), "");
- }
+ // Evaluate a call to decode_operand. Decode the instruction operand at the
+ // given symbol and get the value of the requested operand.
+ // Returns an error if the instruction cannot be decoded, or the requested
+ // operand is not an immediate.
+ // On success, retuns a pair containing the value of the operand, plus
+ // the expression remaining to be evaluated.
+ std::pair<EvalResult, StringRef> evalDecodeOperand(StringRef Expr) const {
+ if (!Expr.startswith("("))
+ return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+ StringRef Symbol;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
+
+ if (!Checker.isSymbolValid(Symbol))
+ return std::make_pair(
+ EvalResult(("Cannot decode unknown symbol '" + Symbol + "'").str()),
+ "");
- return std::make_pair(EvalResult(Op.getImm()), RemainingExpr);
- }
+ if (!RemainingExpr.startswith(","))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr, "expected ','"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
- // Evaluate a call to next_pc. Decode the instruction at the given
- // symbol and return the following program counter..
- // Returns an error if the instruction cannot be decoded.
- // On success, returns a pair containing the next PC, plus the length of the
- // expression remaining to be evaluated.
- std::pair<EvalResult, StringRef> evalNextPC(StringRef Expr) const {
- if (!Expr.startswith("("))
- return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
- StringRef RemainingExpr = Expr.substr(1).ltrim();
- StringRef Symbol;
- std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
-
- if (!Checker.checkSymbolIsValidForLoad(Symbol))
- return std::make_pair(EvalResult(("Cannot decode unknown symbol '"
- + Symbol + "'").str()),
- "");
-
- if (!RemainingExpr.startswith(")"))
- return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
- "expected ')'"),
- "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
- MCInst Inst;
- uint64_t Size;
- if (!decodeInst(Symbol, Inst, Size))
- return std::make_pair(EvalResult(("Couldn't decode instruction at '" +
- Symbol + "'").str()),
- "");
- uint64_t NextPC = Checker.getSymbolAddress(Symbol) + Size;
-
- return std::make_pair(EvalResult(NextPC), RemainingExpr);
- }
+ EvalResult OpIdxExpr;
+ std::tie(OpIdxExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+ if (OpIdxExpr.hasError())
+ return std::make_pair(OpIdxExpr, "");
- // Evaluate an identiefer expr, which may be a symbol, or a call to
- // one of the builtin functions: get_insn_opcode or get_insn_length.
- // Return the result, plus the expression remaining to be parsed.
- std::pair<EvalResult, StringRef> evalIdentifierExpr(StringRef Expr) const {
- StringRef Symbol;
- StringRef RemainingExpr;
- std::tie(Symbol, RemainingExpr) = parseSymbol(Expr);
-
- // Check for builtin function calls.
- if (Symbol == "decode_operand")
- return evalDecodeOperand(RemainingExpr);
- else if (Symbol == "next_pc")
- return evalNextPC(RemainingExpr);
-
- // Looks like a plain symbol reference.
- return std::make_pair(EvalResult(Checker.getSymbolAddress(Symbol)),
- RemainingExpr);
- }
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr, "expected ')'"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
- // Parse a number (hexadecimal or decimal) and return a (string, string)
- // pair representing the number and the expression remaining to be parsed.
- std::pair<StringRef, StringRef> parseNumberString(StringRef Expr) const {
- size_t FirstNonDigit = StringRef::npos;
- if (Expr.startswith("0x")) {
- FirstNonDigit = Expr.find_first_not_of("0123456789abcdefABCDEF", 2);
- if (FirstNonDigit == StringRef::npos)
- FirstNonDigit = Expr.size();
- } else {
- FirstNonDigit = Expr.find_first_not_of("0123456789");
- if (FirstNonDigit == StringRef::npos)
- FirstNonDigit = Expr.size();
- }
- return std::make_pair(Expr.substr(0, FirstNonDigit),
- Expr.substr(FirstNonDigit));
+ MCInst Inst;
+ uint64_t Size;
+ if (!decodeInst(Symbol, Inst, Size))
+ return std::make_pair(
+ EvalResult(("Couldn't decode instruction at '" + Symbol + "'").str()),
+ "");
+
+ unsigned OpIdx = OpIdxExpr.getValue();
+ if (OpIdx >= Inst.getNumOperands()) {
+ std::string ErrMsg;
+ raw_string_ostream ErrMsgStream(ErrMsg);
+ ErrMsgStream << "Invalid operand index '" << format("%i", OpIdx)
+ << "' for instruction '" << Symbol
+ << "'. Instruction has only "
+ << format("%i", Inst.getNumOperands())
+ << " operands.\nInstruction is:\n ";
+ Inst.dump_pretty(ErrMsgStream,
+ Checker.Disassembler->getContext().getAsmInfo(),
+ Checker.InstPrinter);
+ return std::make_pair(EvalResult(ErrMsgStream.str()), "");
}
- // Evaluate a constant numeric expression (hexidecimal or decimal) and
- // return a pair containing the result, and the expression remaining to be
- // evaluated.
- std::pair<EvalResult, StringRef> evalNumberExpr(StringRef Expr) const {
- StringRef ValueStr;
- StringRef RemainingExpr;
- std::tie(ValueStr, RemainingExpr) = parseNumberString(Expr);
-
- if (ValueStr.empty() || !isdigit(ValueStr[0]))
- return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
- "expected number"),
- "");
- uint64_t Value;
- ValueStr.getAsInteger(0, Value);
- return std::make_pair(EvalResult(Value), RemainingExpr);
+ const MCOperand &Op = Inst.getOperand(OpIdx);
+ if (!Op.isImm()) {
+ std::string ErrMsg;
+ raw_string_ostream ErrMsgStream(ErrMsg);
+ ErrMsgStream << "Operand '" << format("%i", OpIdx) << "' of instruction '"
+ << Symbol << "' is not an immediate.\nInstruction is:\n ";
+ Inst.dump_pretty(ErrMsgStream,
+ Checker.Disassembler->getContext().getAsmInfo(),
+ Checker.InstPrinter);
+
+ return std::make_pair(EvalResult(ErrMsgStream.str()), "");
}
- // Evaluate an expression of the form "(<expr>)" and return a pair
- // containing the result of evaluating <expr>, plus the expression
- // remaining to be parsed.
- std::pair<EvalResult, StringRef> evalParensExpr(StringRef Expr) const {
- assert(Expr.startswith("(") && "Not a parenthesized expression");
- EvalResult SubExprResult;
- StringRef RemainingExpr;
- std::tie(SubExprResult, RemainingExpr) =
- evalComplexExpr(evalSimpleExpr(Expr.substr(1).ltrim()));
- if (SubExprResult.hasError())
- return std::make_pair(SubExprResult, "");
- if (!RemainingExpr.startswith(")"))
- return std::make_pair(unexpectedToken(RemainingExpr, Expr,
- "expected ')'"),
- "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
- return std::make_pair(SubExprResult, RemainingExpr);
- }
+ return std::make_pair(EvalResult(Op.getImm()), RemainingExpr);
+ }
- // Evaluate an expression in one of the following forms:
- // *{<number>}<symbol>
- // *{<number>}(<symbol> + <number>)
- // *{<number>}(<symbol> - <number>)
- // Return a pair containing the result, plus the expression remaining to be
- // parsed.
- std::pair<EvalResult, StringRef> evalLoadExpr(StringRef Expr) const {
- assert(Expr.startswith("*") && "Not a load expression");
- StringRef RemainingExpr = Expr.substr(1).ltrim();
- // Parse read size.
- if (!RemainingExpr.startswith("{"))
- return std::make_pair(EvalResult("Expected '{' following '*'."), "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
- EvalResult ReadSizeExpr;
- std::tie(ReadSizeExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
- if (ReadSizeExpr.hasError())
- return std::make_pair(ReadSizeExpr, RemainingExpr);
- uint64_t ReadSize = ReadSizeExpr.getValue();
- if (ReadSize < 1 || ReadSize > 8)
- return std::make_pair(EvalResult("Invalid size for dereference."), "");
- if (!RemainingExpr.startswith("}"))
- return std::make_pair(EvalResult("Missing '}' for dereference."), "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
- // Check for '(symbol +/- constant)' form.
- bool SymbolPlusConstant = false;
- if (RemainingExpr.startswith("(")) {
- SymbolPlusConstant = true;
- RemainingExpr = RemainingExpr.substr(1).ltrim();
- }
+ // Evaluate a call to next_pc.
+ // Decode the instruction at the given symbol and return the following program
+ // counter.
+ // Returns an error if the instruction cannot be decoded.
+ // On success, returns a pair containing the next PC, plus of the
+ // expression remaining to be evaluated.
+ std::pair<EvalResult, StringRef> evalNextPC(StringRef Expr,
+ ParseContext PCtx) const {
+ if (!Expr.startswith("("))
+ return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+ StringRef Symbol;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
+
+ if (!Checker.isSymbolValid(Symbol))
+ return std::make_pair(
+ EvalResult(("Cannot decode unknown symbol '" + Symbol + "'").str()),
+ "");
+
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr, "expected ')'"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ MCInst Inst;
+ uint64_t InstSize;
+ if (!decodeInst(Symbol, Inst, InstSize))
+ return std::make_pair(
+ EvalResult(("Couldn't decode instruction at '" + Symbol + "'").str()),
+ "");
+
+ uint64_t SymbolAddr = PCtx.IsInsideLoad
+ ? Checker.getSymbolLinkerAddr(Symbol)
+ : Checker.getSymbolRemoteAddr(Symbol);
+ uint64_t NextPC = SymbolAddr + InstSize;
- // Read symbol.
- StringRef Symbol;
- std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
+ return std::make_pair(EvalResult(NextPC), RemainingExpr);
+ }
- if (!Checker.checkSymbolIsValidForLoad(Symbol))
- return std::make_pair(EvalResult(("Cannot dereference unknown symbol '"
- + Symbol + "'").str()),
- "");
+ // Evaluate a call to stub_addr.
+ // Look up and return the address of the stub for the given
+ // (<file name>, <section name>, <symbol name>) tuple.
+ // On success, returns a pair containing the stub address, plus the expression
+ // remaining to be evaluated.
+ std::pair<EvalResult, StringRef> evalStubAddr(StringRef Expr,
+ ParseContext PCtx) const {
+ if (!Expr.startswith("("))
+ return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+
+ // Handle file-name specially, as it may contain characters that aren't
+ // legal for symbols.
+ StringRef FileName;
+ size_t ComaIdx = RemainingExpr.find(',');
+ FileName = RemainingExpr.substr(0, ComaIdx).rtrim();
+ RemainingExpr = RemainingExpr.substr(ComaIdx).ltrim();
+
+ if (!RemainingExpr.startswith(","))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
- // Set up defaut offset.
- int64_t Offset = 0;
+ StringRef SectionName;
+ std::tie(SectionName, RemainingExpr) = parseSymbol(RemainingExpr);
- // Handle "+/- constant)" portion if necessary.
- if (SymbolPlusConstant) {
- char OpChar = RemainingExpr[0];
- if (OpChar != '+' && OpChar != '-')
- return std::make_pair(EvalResult("Invalid operator in load address."),
- "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
+ if (!RemainingExpr.startswith(","))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
- EvalResult OffsetExpr;
- std::tie(OffsetExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+ StringRef Symbol;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
- Offset = (OpChar == '+') ?
- OffsetExpr.getValue() : -1 * OffsetExpr.getValue();
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, Expr, "expected ')'"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
- if (!RemainingExpr.startswith(")"))
- return std::make_pair(EvalResult("Missing ')' in load address."),
- "");
+ uint64_t StubAddr;
+ std::string ErrorMsg = "";
+ std::tie(StubAddr, ErrorMsg) = Checker.getStubAddrFor(
+ FileName, SectionName, Symbol, PCtx.IsInsideLoad);
- RemainingExpr = RemainingExpr.substr(1).ltrim();
- }
+ if (ErrorMsg != "")
+ return std::make_pair(EvalResult(ErrorMsg), "");
+ return std::make_pair(EvalResult(StubAddr), RemainingExpr);
+ }
+
+ std::pair<EvalResult, StringRef> evalSectionAddr(StringRef Expr,
+ ParseContext PCtx) const {
+ if (!Expr.startswith("("))
+ return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+
+ // Handle file-name specially, as it may contain characters that aren't
+ // legal for symbols.
+ StringRef FileName;
+ size_t ComaIdx = RemainingExpr.find(',');
+ FileName = RemainingExpr.substr(0, ComaIdx).rtrim();
+ RemainingExpr = RemainingExpr.substr(ComaIdx).ltrim();
+
+ if (!RemainingExpr.startswith(","))
return std::make_pair(
- EvalResult(Checker.readMemoryAtSymbol(Symbol, Offset, ReadSize)),
- RemainingExpr);
+ unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ StringRef SectionName;
+ std::tie(SectionName, RemainingExpr) = parseSymbol(RemainingExpr);
+
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, Expr, "expected ')'"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ uint64_t StubAddr;
+ std::string ErrorMsg = "";
+ std::tie(StubAddr, ErrorMsg) = Checker.getSectionAddr(
+ FileName, SectionName, PCtx.IsInsideLoad);
+
+ if (ErrorMsg != "")
+ return std::make_pair(EvalResult(ErrorMsg), "");
+
+ return std::make_pair(EvalResult(StubAddr), RemainingExpr);
+ }
+
+ // Evaluate an identiefer expr, which may be a symbol, or a call to
+ // one of the builtin functions: get_insn_opcode or get_insn_length.
+ // Return the result, plus the expression remaining to be parsed.
+ std::pair<EvalResult, StringRef> evalIdentifierExpr(StringRef Expr,
+ ParseContext PCtx) const {
+ StringRef Symbol;
+ StringRef RemainingExpr;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(Expr);
+
+ // Check for builtin function calls.
+ if (Symbol == "decode_operand")
+ return evalDecodeOperand(RemainingExpr);
+ else if (Symbol == "next_pc")
+ return evalNextPC(RemainingExpr, PCtx);
+ else if (Symbol == "stub_addr")
+ return evalStubAddr(RemainingExpr, PCtx);
+ else if (Symbol == "section_addr")
+ return evalSectionAddr(RemainingExpr, PCtx);
+
+ if (!Checker.isSymbolValid(Symbol)) {
+ std::string ErrMsg("No known address for symbol '");
+ ErrMsg += Symbol;
+ ErrMsg += "'";
+ if (Symbol.startswith("L"))
+ ErrMsg += " (this appears to be an assembler local label - "
+ " perhaps drop the 'L'?)";
+
+ return std::make_pair(EvalResult(ErrMsg), "");
}
- // Evaluate a "simple" expression. This is any expression that _isn't_ an
- // un-parenthesized binary expression.
- //
- // "Simple" expressions can be optionally bit-sliced. See evalSlicedExpr.
- //
- // Returns a pair containing the result of the evaluation, plus the
- // expression remaining to be parsed.
- std::pair<EvalResult, StringRef> evalSimpleExpr(StringRef Expr) const {
- EvalResult SubExprResult;
- StringRef RemainingExpr;
-
- if (Expr.empty())
- return std::make_pair(EvalResult("Unexpected end of expression"), "");
-
- if (Expr[0] == '(')
- std::tie(SubExprResult, RemainingExpr) = evalParensExpr(Expr);
- else if (Expr[0] == '*')
- std::tie(SubExprResult, RemainingExpr) = evalLoadExpr(Expr);
- else if (isalpha(Expr[0]))
- std::tie(SubExprResult, RemainingExpr) = evalIdentifierExpr(Expr);
- else if (isdigit(Expr[0]))
- std::tie(SubExprResult, RemainingExpr) = evalNumberExpr(Expr);
-
- if (SubExprResult.hasError())
- return std::make_pair(SubExprResult, RemainingExpr);
-
- // Evaluate bit-slice if present.
- if (RemainingExpr.startswith("["))
- std::tie(SubExprResult, RemainingExpr) =
- evalSliceExpr(std::make_pair(SubExprResult, RemainingExpr));
+ // The value for the symbol depends on the context we're evaluating in:
+ // Inside a load this is the address in the linker's memory, outside a
+ // load it's the address in the target processes memory.
+ uint64_t Value = PCtx.IsInsideLoad ? Checker.getSymbolLinkerAddr(Symbol)
+ : Checker.getSymbolRemoteAddr(Symbol);
- return std::make_pair(SubExprResult, RemainingExpr);
+ // Looks like a plain symbol reference.
+ return std::make_pair(EvalResult(Value), RemainingExpr);
+ }
+
+ // Parse a number (hexadecimal or decimal) and return a (string, string)
+ // pair representing the number and the expression remaining to be parsed.
+ std::pair<StringRef, StringRef> parseNumberString(StringRef Expr) const {
+ size_t FirstNonDigit = StringRef::npos;
+ if (Expr.startswith("0x")) {
+ FirstNonDigit = Expr.find_first_not_of("0123456789abcdefABCDEF", 2);
+ if (FirstNonDigit == StringRef::npos)
+ FirstNonDigit = Expr.size();
+ } else {
+ FirstNonDigit = Expr.find_first_not_of("0123456789");
+ if (FirstNonDigit == StringRef::npos)
+ FirstNonDigit = Expr.size();
}
+ return std::make_pair(Expr.substr(0, FirstNonDigit),
+ Expr.substr(FirstNonDigit));
+ }
+
+ // Evaluate a constant numeric expression (hexidecimal or decimal) and
+ // return a pair containing the result, and the expression remaining to be
+ // evaluated.
+ std::pair<EvalResult, StringRef> evalNumberExpr(StringRef Expr) const {
+ StringRef ValueStr;
+ StringRef RemainingExpr;
+ std::tie(ValueStr, RemainingExpr) = parseNumberString(Expr);
+
+ if (ValueStr.empty() || !isdigit(ValueStr[0]))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr, "expected number"), "");
+ uint64_t Value;
+ ValueStr.getAsInteger(0, Value);
+ return std::make_pair(EvalResult(Value), RemainingExpr);
+ }
+
+ // Evaluate an expression of the form "(<expr>)" and return a pair
+ // containing the result of evaluating <expr>, plus the expression
+ // remaining to be parsed.
+ std::pair<EvalResult, StringRef> evalParensExpr(StringRef Expr,
+ ParseContext PCtx) const {
+ assert(Expr.startswith("(") && "Not a parenthesized expression");
+ EvalResult SubExprResult;
+ StringRef RemainingExpr;
+ std::tie(SubExprResult, RemainingExpr) =
+ evalComplexExpr(evalSimpleExpr(Expr.substr(1).ltrim(), PCtx), PCtx);
+ if (SubExprResult.hasError())
+ return std::make_pair(SubExprResult, "");
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, Expr, "expected ')'"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+ return std::make_pair(SubExprResult, RemainingExpr);
+ }
- // Evaluate a bit-slice of an expression.
- // A bit-slice has the form "<expr>[high:low]". The result of evaluating a
- // slice is the bits between high and low (inclusive) in the original
- // expression, right shifted so that the "low" bit is in position 0 in the
+ // Evaluate an expression in one of the following forms:
+ // *{<number>}<expr>
+ // Return a pair containing the result, plus the expression remaining to be
+ // parsed.
+ std::pair<EvalResult, StringRef> evalLoadExpr(StringRef Expr) const {
+ assert(Expr.startswith("*") && "Not a load expression");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+
+ // Parse read size.
+ if (!RemainingExpr.startswith("{"))
+ return std::make_pair(EvalResult("Expected '{' following '*'."), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+ EvalResult ReadSizeExpr;
+ std::tie(ReadSizeExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+ if (ReadSizeExpr.hasError())
+ return std::make_pair(ReadSizeExpr, RemainingExpr);
+ uint64_t ReadSize = ReadSizeExpr.getValue();
+ if (ReadSize < 1 || ReadSize > 8)
+ return std::make_pair(EvalResult("Invalid size for dereference."), "");
+ if (!RemainingExpr.startswith("}"))
+ return std::make_pair(EvalResult("Missing '}' for dereference."), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ // Evaluate the expression representing the load address.
+ ParseContext LoadCtx(true);
+ EvalResult LoadAddrExprResult;
+ std::tie(LoadAddrExprResult, RemainingExpr) =
+ evalComplexExpr(evalSimpleExpr(RemainingExpr, LoadCtx), LoadCtx);
+
+ if (LoadAddrExprResult.hasError())
+ return std::make_pair(LoadAddrExprResult, "");
+
+ uint64_t LoadAddr = LoadAddrExprResult.getValue();
+
+ return std::make_pair(
+ EvalResult(Checker.readMemoryAtAddr(LoadAddr, ReadSize)),
+ RemainingExpr);
+ }
+
+ // Evaluate a "simple" expression. This is any expression that _isn't_ an
+ // un-parenthesized binary expression.
+ //
+ // "Simple" expressions can be optionally bit-sliced. See evalSlicedExpr.
+ //
+ // Returns a pair containing the result of the evaluation, plus the
+ // expression remaining to be parsed.
+ std::pair<EvalResult, StringRef> evalSimpleExpr(StringRef Expr,
+ ParseContext PCtx) const {
+ EvalResult SubExprResult;
+ StringRef RemainingExpr;
+
+ if (Expr.empty())
+ return std::make_pair(EvalResult("Unexpected end of expression"), "");
+
+ if (Expr[0] == '(')
+ std::tie(SubExprResult, RemainingExpr) = evalParensExpr(Expr, PCtx);
+ else if (Expr[0] == '*')
+ std::tie(SubExprResult, RemainingExpr) = evalLoadExpr(Expr);
+ else if (isalpha(Expr[0]) || Expr[0] == '_')
+ std::tie(SubExprResult, RemainingExpr) = evalIdentifierExpr(Expr, PCtx);
+ else if (isdigit(Expr[0]))
+ std::tie(SubExprResult, RemainingExpr) = evalNumberExpr(Expr);
+ else
+ return std::make_pair(
+ unexpectedToken(Expr, Expr,
+ "expected '(', '*', identifier, or number"), "");
+
+ if (SubExprResult.hasError())
+ return std::make_pair(SubExprResult, RemainingExpr);
+
+ // Evaluate bit-slice if present.
+ if (RemainingExpr.startswith("["))
+ std::tie(SubExprResult, RemainingExpr) =
+ evalSliceExpr(std::make_pair(SubExprResult, RemainingExpr));
+
+ return std::make_pair(SubExprResult, RemainingExpr);
+ }
+
+ // Evaluate a bit-slice of an expression.
+ // A bit-slice has the form "<expr>[high:low]". The result of evaluating a
+ // slice is the bits between high and low (inclusive) in the original
+ // expression, right shifted so that the "low" bit is in position 0 in the
+ // result.
+ // Returns a pair containing the result of the slice operation, plus the
+ // expression remaining to be parsed.
+ std::pair<EvalResult, StringRef>
+ evalSliceExpr(std::pair<EvalResult, StringRef> Ctx) const {
+ EvalResult SubExprResult;
+ StringRef RemainingExpr;
+ std::tie(SubExprResult, RemainingExpr) = Ctx;
+
+ assert(RemainingExpr.startswith("[") && "Not a slice expr.");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ EvalResult HighBitExpr;
+ std::tie(HighBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+
+ if (HighBitExpr.hasError())
+ return std::make_pair(HighBitExpr, RemainingExpr);
+
+ if (!RemainingExpr.startswith(":"))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr, "expected ':'"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ EvalResult LowBitExpr;
+ std::tie(LowBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+
+ if (LowBitExpr.hasError())
+ return std::make_pair(LowBitExpr, RemainingExpr);
+
+ if (!RemainingExpr.startswith("]"))
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr, "expected ']'"), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ unsigned HighBit = HighBitExpr.getValue();
+ unsigned LowBit = LowBitExpr.getValue();
+ uint64_t Mask = ((uint64_t)1 << (HighBit - LowBit + 1)) - 1;
+ uint64_t SlicedValue = (SubExprResult.getValue() >> LowBit) & Mask;
+ return std::make_pair(EvalResult(SlicedValue), RemainingExpr);
+ }
+
+ // Evaluate a "complex" expression.
+ // Takes an already evaluated subexpression and checks for the presence of a
+ // binary operator, computing the result of the binary operation if one is
+ // found. Used to make arithmetic expressions left-associative.
+ // Returns a pair containing the ultimate result of evaluating the
+ // expression, plus the expression remaining to be evaluated.
+ std::pair<EvalResult, StringRef>
+ evalComplexExpr(std::pair<EvalResult, StringRef> LHSAndRemaining,
+ ParseContext PCtx) const {
+ EvalResult LHSResult;
+ StringRef RemainingExpr;
+ std::tie(LHSResult, RemainingExpr) = LHSAndRemaining;
+
+ // If there was an error, or there's nothing left to evaluate, return the
// result.
- // Returns a pair containing the result of the slice operation, plus the
- // expression remaining to be parsed.
- std::pair<EvalResult, StringRef> evalSliceExpr(
- std::pair<EvalResult, StringRef> Ctx) const{
- EvalResult SubExprResult;
- StringRef RemainingExpr;
- std::tie(SubExprResult, RemainingExpr) = Ctx;
-
- assert(RemainingExpr.startswith("[") && "Not a slice expr.");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
- EvalResult HighBitExpr;
- std::tie(HighBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
-
- if (HighBitExpr.hasError())
- return std::make_pair(HighBitExpr, RemainingExpr);
-
- if (!RemainingExpr.startswith(":"))
- return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
- "expected ':'"),
- "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
- EvalResult LowBitExpr;
- std::tie(LowBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
-
- if (LowBitExpr.hasError())
- return std::make_pair(LowBitExpr, RemainingExpr);
-
- if (!RemainingExpr.startswith("]"))
- return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
- "expected ']'"),
- "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
- unsigned HighBit = HighBitExpr.getValue();
- unsigned LowBit = LowBitExpr.getValue();
- uint64_t Mask = ((uint64_t)1 << (HighBit - LowBit + 1)) - 1;
- uint64_t SlicedValue = (SubExprResult.getValue() >> LowBit) & Mask;
- return std::make_pair(EvalResult(SlicedValue), RemainingExpr);
- }
+ if (LHSResult.hasError() || RemainingExpr == "")
+ return std::make_pair(LHSResult, RemainingExpr);
- // Evaluate a "complex" expression.
- // Takes an already evaluated subexpression and checks for the presence of a
- // binary operator, computing the result of the binary operation if one is
- // found. Used to make arithmetic expressions left-associative.
- // Returns a pair containing the ultimate result of evaluating the
- // expression, plus the expression remaining to be evaluated.
- std::pair<EvalResult, StringRef> evalComplexExpr(
- std::pair<EvalResult, StringRef> Ctx) const {
- EvalResult LHSResult;
- StringRef RemainingExpr;
- std::tie(LHSResult, RemainingExpr) = Ctx;
-
- // If there was an error, or there's nothing left to evaluate, return the
- // result.
- if (LHSResult.hasError() || RemainingExpr == "")
- return std::make_pair(LHSResult, RemainingExpr);
-
- // Otherwise check if this is a binary expressioan.
- BinOpToken BinOp;
- std::tie(BinOp, RemainingExpr) = parseBinOpToken(RemainingExpr);
-
- // If this isn't a recognized expression just return.
- if (BinOp == BinOpToken::Invalid)
- return std::make_pair(LHSResult, RemainingExpr);
-
- // This is a recognized bin-op. Evaluate the RHS, then evaluate the binop.
- EvalResult RHSResult;
- std::tie(RHSResult, RemainingExpr) = evalSimpleExpr(RemainingExpr);
-
- // If there was an error evaluating the RHS, return it.
- if (RHSResult.hasError())
- return std::make_pair(RHSResult, RemainingExpr);
-
- // This is a binary expression - evaluate and try to continue as a
- // complex expr.
- EvalResult ThisResult(computeBinOpResult(BinOp, LHSResult, RHSResult));
-
- return evalComplexExpr(std::make_pair(ThisResult, RemainingExpr));
- }
+ // Otherwise check if this is a binary expressioan.
+ BinOpToken BinOp;
+ std::tie(BinOp, RemainingExpr) = parseBinOpToken(RemainingExpr);
- bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const {
- MCDisassembler *Dis = Checker.Disassembler;
- StringRef SectionMem = Checker.getSubsectionStartingAt(Symbol);
- StringRefMemoryObject SectionBytes(SectionMem, 0);
+ // If this isn't a recognized expression just return.
+ if (BinOp == BinOpToken::Invalid)
+ return std::make_pair(LHSResult, RemainingExpr);
- MCDisassembler::DecodeStatus S =
- Dis->getInstruction(Inst, Size, SectionBytes, 0, nulls(), nulls());
+ // This is a recognized bin-op. Evaluate the RHS, then evaluate the binop.
+ EvalResult RHSResult;
+ std::tie(RHSResult, RemainingExpr) = evalSimpleExpr(RemainingExpr, PCtx);
- return (S == MCDisassembler::Success);
- }
+ // If there was an error evaluating the RHS, return it.
+ if (RHSResult.hasError())
+ return std::make_pair(RHSResult, RemainingExpr);
- };
+ // This is a binary expression - evaluate and try to continue as a
+ // complex expr.
+ EvalResult ThisResult(computeBinOpResult(BinOp, LHSResult, RHSResult));
+
+ return evalComplexExpr(std::make_pair(ThisResult, RemainingExpr), PCtx);
+ }
+ bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const {
+ MCDisassembler *Dis = Checker.Disassembler;
+ StringRef SectionMem = Checker.getSubsectionStartingAt(Symbol);
+ ArrayRef<uint8_t> SectionBytes(
+ reinterpret_cast<const uint8_t *>(SectionMem.data()),
+ SectionMem.size());
+
+ MCDisassembler::DecodeStatus S =
+ Dis->getInstruction(Inst, Size, SectionBytes, 0, nulls(), nulls());
+
+ return (S == MCDisassembler::Success);
+ }
+};
}
-bool RuntimeDyldChecker::check(StringRef CheckExpr) const {
+RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld,
+ MCDisassembler *Disassembler,
+ MCInstPrinter *InstPrinter,
+ raw_ostream &ErrStream)
+ : RTDyld(RTDyld), Disassembler(Disassembler), InstPrinter(InstPrinter),
+ ErrStream(ErrStream) {
+ RTDyld.Checker = this;
+}
+
+bool RuntimeDyldCheckerImpl::check(StringRef CheckExpr) const {
CheckExpr = CheckExpr.trim();
- DEBUG(llvm::dbgs() << "RuntimeDyldChecker: Checking '" << CheckExpr
- << "'...\n");
+ DEBUG(dbgs() << "RuntimeDyldChecker: Checking '" << CheckExpr << "'...\n");
RuntimeDyldCheckerExprEval P(*this, ErrStream);
bool Result = P.evaluate(CheckExpr);
(void)Result;
- DEBUG(llvm::dbgs() << "RuntimeDyldChecker: '" << CheckExpr << "' "
- << (Result ? "passed" : "FAILED") << ".\n");
+ DEBUG(dbgs() << "RuntimeDyldChecker: '" << CheckExpr << "' "
+ << (Result ? "passed" : "FAILED") << ".\n");
return Result;
}
-bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix,
- MemoryBuffer* MemBuf) const {
+bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
+ MemoryBuffer *MemBuf) const {
bool DidAllTestsPass = true;
unsigned NumRules = 0;
const char *LineStart = MemBuf->getBufferStart();
// Eat whitespace.
- while (LineStart != MemBuf->getBufferEnd() &&
- std::isspace(*LineStart))
+ while (LineStart != MemBuf->getBufferEnd() && std::isspace(*LineStart))
++LineStart;
while (LineStart != MemBuf->getBufferEnd() && *LineStart != '\0') {
const char *LineEnd = LineStart;
- while (LineEnd != MemBuf->getBufferEnd() &&
- *LineEnd != '\r' && *LineEnd != '\n')
+ while (LineEnd != MemBuf->getBufferEnd() && *LineEnd != '\r' &&
+ *LineEnd != '\n')
++LineEnd;
StringRef Line(LineStart, LineEnd - LineStart);
@@ -605,37 +724,210 @@ bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix,
// Eat whitespace.
LineStart = LineEnd;
- while (LineStart != MemBuf->getBufferEnd() &&
- std::isspace(*LineStart))
+ while (LineStart != MemBuf->getBufferEnd() && std::isspace(*LineStart))
++LineStart;
}
return DidAllTestsPass && (NumRules != 0);
}
-bool RuntimeDyldChecker::checkSymbolIsValidForLoad(StringRef Symbol) const {
- return RTDyld.getSymbolAddress(Symbol) != nullptr;
+bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
+ return getRTDyld().getSymbolAddress(Symbol) != nullptr;
}
-uint64_t RuntimeDyldChecker::getSymbolAddress(StringRef Symbol) const {
- return RTDyld.getAnySymbolRemoteAddress(Symbol);
+uint64_t RuntimeDyldCheckerImpl::getSymbolLinkerAddr(StringRef Symbol) const {
+ return static_cast<uint64_t>(
+ reinterpret_cast<uintptr_t>(getRTDyld().getSymbolAddress(Symbol)));
}
-uint64_t RuntimeDyldChecker::readMemoryAtSymbol(StringRef Symbol,
- int64_t Offset,
- unsigned Size) const {
- uint8_t *Src = RTDyld.getSymbolAddress(Symbol);
- uint64_t Result = 0;
- memcpy(&Result, Src + Offset, Size);
- return Result;
+uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
+ return getRTDyld().getAnySymbolRemoteAddress(Symbol);
+}
+
+uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr,
+ unsigned Size) const {
+ uintptr_t PtrSizedAddr = static_cast<uintptr_t>(SrcAddr);
+ assert(PtrSizedAddr == SrcAddr && "Linker memory pointer out-of-range.");
+ uint8_t *Src = reinterpret_cast<uint8_t*>(PtrSizedAddr);
+ return getRTDyld().readBytesUnaligned(Src, Size);
}
-StringRef RuntimeDyldChecker::getSubsectionStartingAt(StringRef Name) const {
+
+std::pair<const RuntimeDyldCheckerImpl::SectionAddressInfo*, std::string>
+RuntimeDyldCheckerImpl::findSectionAddrInfo(StringRef FileName,
+ StringRef SectionName) const {
+
+ auto SectionMapItr = Stubs.find(FileName);
+ if (SectionMapItr == Stubs.end()) {
+ std::string ErrorMsg = "File '";
+ ErrorMsg += FileName;
+ ErrorMsg += "' not found. ";
+ if (Stubs.empty())
+ ErrorMsg += "No stubs registered.";
+ else {
+ ErrorMsg += "Available files are:";
+ for (const auto& StubEntry : Stubs) {
+ ErrorMsg += " '";
+ ErrorMsg += StubEntry.first;
+ ErrorMsg += "'";
+ }
+ }
+ ErrorMsg += "\n";
+ return std::make_pair(nullptr, ErrorMsg);
+ }
+
+ auto SectionInfoItr = SectionMapItr->second.find(SectionName);
+ if (SectionInfoItr == SectionMapItr->second.end())
+ return std::make_pair(nullptr,
+ ("Section '" + SectionName + "' not found in file '" +
+ FileName + "'\n").str());
+
+ return std::make_pair(&SectionInfoItr->second, std::string(""));
+}
+
+std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getSectionAddr(
+ StringRef FileName, StringRef SectionName, bool IsInsideLoad) const {
+
+ const SectionAddressInfo *SectionInfo = nullptr;
+ {
+ std::string ErrorMsg;
+ std::tie(SectionInfo, ErrorMsg) =
+ findSectionAddrInfo(FileName, SectionName);
+ if (ErrorMsg != "")
+ return std::make_pair(0, ErrorMsg);
+ }
+
+ unsigned SectionID = SectionInfo->SectionID;
+ uint64_t Addr;
+ if (IsInsideLoad)
+ Addr =
+ static_cast<uint64_t>(
+ reinterpret_cast<uintptr_t>(getRTDyld().Sections[SectionID].Address));
+ else
+ Addr = getRTDyld().Sections[SectionID].LoadAddress;
+
+ return std::make_pair(Addr, std::string(""));
+}
+
+std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubAddrFor(
+ StringRef FileName, StringRef SectionName, StringRef SymbolName,
+ bool IsInsideLoad) const {
+
+ const SectionAddressInfo *SectionInfo = nullptr;
+ {
+ std::string ErrorMsg;
+ std::tie(SectionInfo, ErrorMsg) =
+ findSectionAddrInfo(FileName, SectionName);
+ if (ErrorMsg != "")
+ return std::make_pair(0, ErrorMsg);
+ }
+
+ unsigned SectionID = SectionInfo->SectionID;
+ const StubOffsetsMap &SymbolStubs = SectionInfo->StubOffsets;
+ auto StubOffsetItr = SymbolStubs.find(SymbolName);
+ if (StubOffsetItr == SymbolStubs.end())
+ return std::make_pair(0,
+ ("Stub for symbol '" + SymbolName + "' not found. "
+ "If '" + SymbolName + "' is an internal symbol this "
+ "may indicate that the stub target offset is being "
+ "computed incorrectly.\n").str());
+
+ uint64_t StubOffset = StubOffsetItr->second;
+
+ uint64_t Addr;
+ if (IsInsideLoad) {
+ uintptr_t SectionBase =
+ reinterpret_cast<uintptr_t>(getRTDyld().Sections[SectionID].Address);
+ Addr = static_cast<uint64_t>(SectionBase) + StubOffset;
+ } else {
+ uint64_t SectionBase = getRTDyld().Sections[SectionID].LoadAddress;
+ Addr = SectionBase + StubOffset;
+ }
+
+ return std::make_pair(Addr, std::string(""));
+}
+
+StringRef
+RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const {
RuntimeDyldImpl::SymbolTableMap::const_iterator pos =
- RTDyld.GlobalSymbolTable.find(Name);
- if (pos == RTDyld.GlobalSymbolTable.end())
+ getRTDyld().GlobalSymbolTable.find(Name);
+ if (pos == getRTDyld().GlobalSymbolTable.end())
return StringRef();
RuntimeDyldImpl::SymbolLoc Loc = pos->second;
- uint8_t *SectionAddr = RTDyld.getSectionAddress(Loc.first);
- return StringRef(reinterpret_cast<const char*>(SectionAddr) + Loc.second,
- RTDyld.Sections[Loc.first].Size - Loc.second);
+ uint8_t *SectionAddr = getRTDyld().getSectionAddress(Loc.first);
+ return StringRef(reinterpret_cast<const char *>(SectionAddr) + Loc.second,
+ getRTDyld().Sections[Loc.first].Size - Loc.second);
+}
+
+void RuntimeDyldCheckerImpl::registerSection(
+ StringRef FilePath, unsigned SectionID) {
+ StringRef FileName = sys::path::filename(FilePath);
+ const SectionEntry &Section = getRTDyld().Sections[SectionID];
+ StringRef SectionName = Section.Name;
+
+ Stubs[FileName][SectionName].SectionID = SectionID;
+}
+
+void RuntimeDyldCheckerImpl::registerStubMap(
+ StringRef FilePath, unsigned SectionID,
+ const RuntimeDyldImpl::StubMap &RTDyldStubs) {
+ StringRef FileName = sys::path::filename(FilePath);
+ const SectionEntry &Section = getRTDyld().Sections[SectionID];
+ StringRef SectionName = Section.Name;
+
+ Stubs[FileName][SectionName].SectionID = SectionID;
+
+ for (auto &StubMapEntry : RTDyldStubs) {
+ std::string SymbolName = "";
+
+ if (StubMapEntry.first.SymbolName)
+ SymbolName = StubMapEntry.first.SymbolName;
+ else {
+ // If this is a (Section, Offset) pair, do a reverse lookup in the
+ // global symbol table to find the name.
+ for (auto &GSTEntry : getRTDyld().GlobalSymbolTable) {
+ if (GSTEntry.second.first == StubMapEntry.first.SectionID &&
+ GSTEntry.second.second ==
+ static_cast<uint64_t>(StubMapEntry.first.Offset)) {
+ SymbolName = GSTEntry.first();
+ break;
+ }
+ }
+ }
+
+ if (SymbolName != "")
+ Stubs[FileName][SectionName].StubOffsets[SymbolName] =
+ StubMapEntry.second;
+ }
+}
+
+RuntimeDyldChecker::RuntimeDyldChecker(RuntimeDyld &RTDyld,
+ MCDisassembler *Disassembler,
+ MCInstPrinter *InstPrinter,
+ raw_ostream &ErrStream)
+ : Impl(make_unique<RuntimeDyldCheckerImpl>(RTDyld, Disassembler,
+ InstPrinter, ErrStream)) {}
+
+RuntimeDyldChecker::~RuntimeDyldChecker() {}
+
+RuntimeDyld& RuntimeDyldChecker::getRTDyld() {
+ return Impl->RTDyld;
+}
+
+const RuntimeDyld& RuntimeDyldChecker::getRTDyld() const {
+ return Impl->RTDyld;
+}
+
+bool RuntimeDyldChecker::check(StringRef CheckExpr) const {
+ return Impl->check(CheckExpr);
+}
+
+bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix,
+ MemoryBuffer *MemBuf) const {
+ return Impl->checkAllRulesInBuffer(RulePrefix, MemBuf);
+}
+
+std::pair<uint64_t, std::string>
+RuntimeDyldChecker::getSectionAddr(StringRef FileName, StringRef SectionName,
+ bool LinkerAddress) {
+ return Impl->getSectionAddr(FileName, SectionName, LinkerAddress);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
new file mode 100644
index 0000000..de20c1e
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -0,0 +1,76 @@
+//===-- RuntimeDyldCheckerImpl.h -- RuntimeDyld test framework --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDCHECKERIMPL_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDCHECKERIMPL_H
+
+#include "RuntimeDyldImpl.h"
+#include <set>
+
+namespace llvm {
+
+class RuntimeDyldCheckerImpl {
+ friend class RuntimeDyldChecker;
+ friend class RuntimeDyldImpl;
+ friend class RuntimeDyldCheckerExprEval;
+
+public:
+ RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
+ MCInstPrinter *InstPrinter,
+ llvm::raw_ostream &ErrStream);
+
+ bool check(StringRef CheckExpr) const;
+ bool checkAllRulesInBuffer(StringRef RulePrefix, MemoryBuffer *MemBuf) const;
+
+private:
+
+ // StubMap typedefs.
+ typedef std::map<std::string, uint64_t> StubOffsetsMap;
+ struct SectionAddressInfo {
+ uint64_t SectionID;
+ StubOffsetsMap StubOffsets;
+ };
+ typedef std::map<std::string, SectionAddressInfo> SectionMap;
+ typedef std::map<std::string, SectionMap> StubMap;
+
+ RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; }
+
+ bool isSymbolValid(StringRef Symbol) const;
+ uint64_t getSymbolLinkerAddr(StringRef Symbol) const;
+ uint64_t getSymbolRemoteAddr(StringRef Symbol) const;
+ uint64_t readMemoryAtAddr(uint64_t Addr, unsigned Size) const;
+
+ std::pair<const SectionAddressInfo*, std::string> findSectionAddrInfo(
+ StringRef FileName,
+ StringRef SectionName) const;
+
+ std::pair<uint64_t, std::string> getSectionAddr(StringRef FileName,
+ StringRef SectionName,
+ bool IsInsideLoad) const;
+
+ std::pair<uint64_t, std::string> getStubAddrFor(StringRef FileName,
+ StringRef SectionName,
+ StringRef Symbol,
+ bool IsInsideLoad) const;
+ StringRef getSubsectionStartingAt(StringRef Name) const;
+
+ void registerSection(StringRef FilePath, unsigned SectionID);
+ void registerStubMap(StringRef FilePath, unsigned SectionID,
+ const RuntimeDyldImpl::StubMap &RTDyldStubs);
+
+ RuntimeDyld &RTDyld;
+ MCDisassembler *Disassembler;
+ MCInstPrinter *InstPrinter;
+ llvm::raw_ostream &ErrStream;
+
+ StubMap Stubs;
+};
+}
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 80e489c..d95cffe 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -23,6 +23,7 @@
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
@@ -55,9 +56,9 @@ template <class ELFT> class DyldELFObject : public ELFObjectFile<ELFT> {
public:
DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile,
- std::unique_ptr<MemoryBuffer> Wrapper, std::error_code &ec);
+ MemoryBufferRef Wrapper, std::error_code &ec);
- DyldELFObject(std::unique_ptr<MemoryBuffer> Wrapper, std::error_code &ec);
+ DyldELFObject(MemoryBufferRef Wrapper, std::error_code &ec);
void updateSectionAddress(const SectionRef &Sec, uint64_t Addr);
void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr);
@@ -76,8 +77,10 @@ template <class ELFT> class ELFObjectImage : public ObjectImageCommon {
bool Registered;
public:
- ELFObjectImage(ObjectBuffer *Input, std::unique_ptr<DyldELFObject<ELFT>> Obj)
- : ObjectImageCommon(Input, std::move(Obj)), Registered(false) {}
+ ELFObjectImage(std::unique_ptr<ObjectBuffer> Input,
+ std::unique_ptr<DyldELFObject<ELFT>> Obj)
+ : ObjectImageCommon(std::move(Input), std::move(Obj)), Registered(false) {
+ }
virtual ~ELFObjectImage() {
if (Registered)
@@ -109,17 +112,15 @@ public:
// actual memory. Ultimately, the Binary parent class will take ownership of
// this MemoryBuffer object but not the underlying memory.
template <class ELFT>
-DyldELFObject<ELFT>::DyldELFObject(std::unique_ptr<MemoryBuffer> Wrapper,
- std::error_code &EC)
- : ELFObjectFile<ELFT>(std::move(Wrapper), EC) {
+DyldELFObject<ELFT>::DyldELFObject(MemoryBufferRef Wrapper, std::error_code &EC)
+ : ELFObjectFile<ELFT>(Wrapper, EC) {
this->isDyldELFObject = true;
}
template <class ELFT>
DyldELFObject<ELFT>::DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile,
- std::unique_ptr<MemoryBuffer> Wrapper,
- std::error_code &EC)
- : ELFObjectFile<ELFT>(std::move(Wrapper), EC),
+ MemoryBufferRef Wrapper, std::error_code &EC)
+ : ELFObjectFile<ELFT>(Wrapper, EC),
UnderlyingFile(std::move(UnderlyingFile)) {
this->isDyldELFObject = true;
}
@@ -185,36 +186,36 @@ RuntimeDyldELF::createObjectImageFromFile(std::unique_ptr<object::ObjectFile> Ob
return nullptr;
std::error_code ec;
- std::unique_ptr<MemoryBuffer> Buffer(
- MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false));
+ MemoryBufferRef Buffer = ObjFile->getMemoryBufferRef();
if (ObjFile->getBytesInAddress() == 4 && ObjFile->isLittleEndian()) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::little, 2, false>>>(
- std::move(ObjFile), std::move(Buffer), ec);
+ std::move(ObjFile), Buffer, ec);
return new ELFObjectImage<ELFType<support::little, 2, false>>(
nullptr, std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 4 && !ObjFile->isLittleEndian()) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::big, 2, false>>>(
- std::move(ObjFile), std::move(Buffer), ec);
+ std::move(ObjFile), Buffer, ec);
return new ELFObjectImage<ELFType<support::big, 2, false>>(nullptr, std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 8 && !ObjFile->isLittleEndian()) {
auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 2, true>>>(
- std::move(ObjFile), std::move(Buffer), ec);
+ std::move(ObjFile), Buffer, ec);
return new ELFObjectImage<ELFType<support::big, 2, true>>(nullptr,
std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 8 && ObjFile->isLittleEndian()) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::little, 2, true>>>(
- std::move(ObjFile), std::move(Buffer), ec);
+ std::move(ObjFile), Buffer, ec);
return new ELFObjectImage<ELFType<support::little, 2, true>>(
nullptr, std::move(Obj));
} else
llvm_unreachable("Unexpected ELF format");
}
-ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
+std::unique_ptr<ObjectImage>
+RuntimeDyldELF::createObjectImage(std::unique_ptr<ObjectBuffer> Buffer) {
if (Buffer->getBufferSize() < ELF::EI_NIDENT)
llvm_unreachable("Unexpected ELF object size");
std::pair<unsigned char, unsigned char> Ident =
@@ -222,34 +223,36 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
(uint8_t)Buffer->getBufferStart()[ELF::EI_DATA]);
std::error_code ec;
- std::unique_ptr<MemoryBuffer> Buf(Buffer->getMemBuffer());
+ MemoryBufferRef Buf = Buffer->getMemBuffer();
if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::little, 4, false>>>(
- std::move(Buf), ec);
- return new ELFObjectImage<ELFType<support::little, 4, false>>(
- Buffer, std::move(Obj));
- } else if (Ident.first == ELF::ELFCLASS32 &&
- Ident.second == ELF::ELFDATA2MSB) {
+ Buf, ec);
+ return llvm::make_unique<
+ ELFObjectImage<ELFType<support::little, 4, false>>>(std::move(Buffer),
+ std::move(Obj));
+ }
+ if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) {
auto Obj =
- llvm::make_unique<DyldELFObject<ELFType<support::big, 4, false>>>(
- std::move(Buf), ec);
- return new ELFObjectImage<ELFType<support::big, 4, false>>(Buffer,
- std::move(Obj));
- } else if (Ident.first == ELF::ELFCLASS64 &&
- Ident.second == ELF::ELFDATA2MSB) {
+ llvm::make_unique<DyldELFObject<ELFType<support::big, 4, false>>>(Buf,
+ ec);
+ return llvm::make_unique<ELFObjectImage<ELFType<support::big, 4, false>>>(
+ std::move(Buffer), std::move(Obj));
+ }
+ if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) {
auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 8, true>>>(
- std::move(Buf), ec);
- return new ELFObjectImage<ELFType<support::big, 8, true>>(Buffer, std::move(Obj));
- } else if (Ident.first == ELF::ELFCLASS64 &&
- Ident.second == ELF::ELFDATA2LSB) {
- auto Obj =
- llvm::make_unique<DyldELFObject<ELFType<support::little, 8, true>>>(
- std::move(Buf), ec);
- return new ELFObjectImage<ELFType<support::little, 8, true>>(Buffer, std::move(Obj));
- } else
- llvm_unreachable("Unexpected ELF format");
+ Buf, ec);
+ return llvm::make_unique<ELFObjectImage<ELFType<support::big, 8, true>>>(
+ std::move(Buffer), std::move(Obj));
+ }
+ assert(Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB &&
+ "Unexpected ELF format");
+ auto Obj =
+ llvm::make_unique<DyldELFObject<ELFType<support::little, 8, true>>>(Buf,
+ ec);
+ return llvm::make_unique<ELFObjectImage<ELFType<support::little, 8, true>>>(
+ std::move(Buffer), std::move(Obj));
}
RuntimeDyldELF::~RuntimeDyldELF() {}
@@ -263,10 +266,9 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
llvm_unreachable("Relocation type not implemented yet!");
break;
case ELF::R_X86_64_64: {
- uint64_t *Target = reinterpret_cast<uint64_t *>(Section.Address + Offset);
- *Target = Value + Addend;
+ support::ulittle64_t::ref(Section.Address + Offset) = Value + Addend;
DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at "
- << format("%p\n", Target));
+ << format("%p\n", Section.Address + Offset));
break;
}
case ELF::R_X86_64_32:
@@ -276,17 +278,15 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
(Type == ELF::R_X86_64_32S &&
((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN)));
uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
- uint32_t *Target = reinterpret_cast<uint32_t *>(Section.Address + Offset);
- *Target = TruncatedAddr;
+ support::ulittle32_t::ref(Section.Address + Offset) = TruncatedAddr;
DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr) << " at "
- << format("%p\n", Target));
+ << format("%p\n", Section.Address + Offset));
break;
}
case ELF::R_X86_64_GOTPCREL: {
// findGOTEntry returns the 'G + GOT' part of the relocation calculation
// based on the load/target address of the GOT (not the current/local addr).
uint64_t GOTAddr = findGOTEntry(Value, SymOffset);
- uint32_t *Target = reinterpret_cast<uint32_t *>(Section.Address + Offset);
uint64_t FinalAddress = Section.LoadAddress + Offset;
// The processRelocationRef method combines the symbol offset and the addend
// and in most cases that's what we want. For this relocation type, we need
@@ -294,30 +294,29 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
int64_t RealOffset = GOTAddr + Addend - SymOffset - FinalAddress;
assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
- *Target = TruncOffset;
+ support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
break;
}
case ELF::R_X86_64_PC32: {
// Get the placeholder value from the generated object since
// a previous relocation attempt may have overwritten the loaded version
- uint32_t *Placeholder =
- reinterpret_cast<uint32_t *>(Section.ObjAddress + Offset);
- uint32_t *Target = reinterpret_cast<uint32_t *>(Section.Address + Offset);
+ support::ulittle32_t::ref Placeholder(
+ (void *)(Section.ObjAddress + Offset));
uint64_t FinalAddress = Section.LoadAddress + Offset;
- int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+ int64_t RealOffset = Placeholder + Value + Addend - FinalAddress;
assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
- *Target = TruncOffset;
+ support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
break;
}
case ELF::R_X86_64_PC64: {
// Get the placeholder value from the generated object since
// a previous relocation attempt may have overwritten the loaded version
- uint64_t *Placeholder =
- reinterpret_cast<uint64_t *>(Section.ObjAddress + Offset);
- uint64_t *Target = reinterpret_cast<uint64_t *>(Section.Address + Offset);
+ support::ulittle64_t::ref Placeholder(
+ (void *)(Section.ObjAddress + Offset));
uint64_t FinalAddress = Section.LoadAddress + Offset;
- *Target = *Placeholder + Value + Addend - FinalAddress;
+ support::ulittle64_t::ref(Section.Address + Offset) =
+ Placeholder + Value + Addend - FinalAddress;
break;
}
}
@@ -330,21 +329,20 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
case ELF::R_386_32: {
// Get the placeholder value from the generated object since
// a previous relocation attempt may have overwritten the loaded version
- uint32_t *Placeholder =
- reinterpret_cast<uint32_t *>(Section.ObjAddress + Offset);
- uint32_t *Target = reinterpret_cast<uint32_t *>(Section.Address + Offset);
- *Target = *Placeholder + Value + Addend;
+ support::ulittle32_t::ref Placeholder(
+ (void *)(Section.ObjAddress + Offset));
+ support::ulittle32_t::ref(Section.Address + Offset) =
+ Placeholder + Value + Addend;
break;
}
case ELF::R_386_PC32: {
// Get the placeholder value from the generated object since
// a previous relocation attempt may have overwritten the loaded version
- uint32_t *Placeholder =
- reinterpret_cast<uint32_t *>(Section.ObjAddress + Offset);
- uint32_t *Target = reinterpret_cast<uint32_t *>(Section.Address + Offset);
+ support::ulittle32_t::ref Placeholder(
+ (void *)(Section.ObjAddress + Offset));
uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
- uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
- *Target = RealOffset;
+ uint32_t RealOffset = Placeholder + Value + Addend - FinalAddress;
+ support::ulittle32_t::ref(Section.Address + Offset) = RealOffset;
break;
}
default:
@@ -704,8 +702,7 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
section_iterator tsi(Obj.end_sections());
check(TargetSymbol->getSection(tsi));
- bool IsCode = false;
- tsi->isText(IsCode);
+ bool IsCode = tsi->isText();
Rel.SectionID = findOrEmitSection(Obj, (*tsi), IsCode, LocalSections);
Rel.Addend = (intptr_t)Addend;
return;
@@ -911,8 +908,6 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
break;
case Triple::aarch64:
case Triple::aarch64_be:
- case Triple::arm64:
- case Triple::arm64_be:
resolveAArch64Relocation(Section, Offset, Value, Type, Addend);
break;
case Triple::arm: // Fall through.
@@ -987,9 +982,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
if (si == Obj.end_sections())
llvm_unreachable("Symbol section not found, bad object file format!");
DEBUG(dbgs() << "\t\tThis is section symbol\n");
- // Default to 'true' in case isText fails (though it never does).
- bool isCode = true;
- si->isText(isCode);
+ bool isCode = si->isText();
Value.SectionID = findOrEmitSection(Obj, (*si), isCode, ObjSectionToID);
Value.Addend = Addend;
break;
@@ -1018,8 +1011,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
DEBUG(dbgs() << "\t\tSectionID: " << SectionID << " Offset: " << Offset
<< "\n");
- if ((Arch == Triple::aarch64 || Arch == Triple::aarch64_be ||
- Arch == Triple::arm64 || Arch == Triple::arm64_be) &&
+ if ((Arch == Triple::aarch64 || Arch == Triple::aarch64_be) &&
(RelType == ELF::R_AARCH64_CALL26 || RelType == ELF::R_AARCH64_JUMP26)) {
// This is an AArch64 branch relocation, need to use a stub function.
DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
@@ -1141,6 +1133,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
}
} else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
if (RelType == ELF::R_PPC64_REL24) {
+ // Determine ABI variant in use for this object.
+ unsigned AbiVariant;
+ Obj.getObjectFile()->getPlatformFlags(AbiVariant);
+ AbiVariant &= ELF::EF_PPC64_ABI;
// A PPC branch relocation will need a stub function if the target is
// an external symbol (Symbol::ST_Unknown) or if the target address
// is not within the signed 24-bits branch address.
@@ -1148,10 +1144,18 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
uint8_t *Target = Section.Address + Offset;
bool RangeOverflow = false;
if (SymType != SymbolRef::ST_Unknown) {
- // A function call may points to the .opd entry, so the final symbol
- // value
- // in calculated based in the relocation values in .opd section.
- findOPDEntrySection(Obj, ObjSectionToID, Value);
+ if (AbiVariant != 2) {
+ // In the ELFv1 ABI, a function call may point to the .opd entry,
+ // so the final symbol value is calculated based on the relocation
+ // values in the .opd section.
+ findOPDEntrySection(Obj, ObjSectionToID, Value);
+ } else {
+ // In the ELFv2 ABI, a function symbol may provide a local entry
+ // point, which must be used for direct calls.
+ uint8_t SymOther;
+ Symbol->getOther(SymOther);
+ Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
+ }
uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend;
int32_t delta = static_cast<int32_t>(Target - RelocTarget);
// If it is within 24-bits branch range, just set the branch target
@@ -1179,7 +1183,8 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
DEBUG(dbgs() << " Create a new stub function\n");
Stubs[Value] = Section.StubOffset;
uint8_t *StubTargetAddr =
- createStubFunction(Section.Address + Section.StubOffset);
+ createStubFunction(Section.Address + Section.StubOffset,
+ AbiVariant);
RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
ELF::R_PPC64_ADDR64, Value.Addend);
@@ -1217,9 +1222,13 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
RelType, 0);
Section.StubOffset += getMaxStubSize();
}
- if (SymType == SymbolRef::ST_Unknown)
+ if (SymType == SymbolRef::ST_Unknown) {
// Restore the TOC for external calls
- writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1)
+ if (AbiVariant == 2)
+ writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1)
+ else
+ writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1)
+ }
}
} else if (RelType == ELF::R_PPC64_TOC16 ||
RelType == ELF::R_PPC64_TOC16_DS ||
@@ -1306,7 +1315,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
Stubs[Value] = StubOffset;
createStubFunction((uint8_t *)StubAddress);
RelocationEntry RE(SectionID, StubOffset + 8, ELF::R_390_64,
- Value.Addend - Addend);
+ Value.Offset);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
@@ -1414,8 +1423,6 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
case Triple::x86_64:
case Triple::aarch64:
case Triple::aarch64_be:
- case Triple::arm64:
- case Triple::arm64_be:
case Triple::ppc64:
case Triple::ppc64le:
case Triple::systemz:
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 59fdfbe..4aeab81 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_RUNTIME_DYLD_ELF_H
-#define LLVM_RUNTIME_DYLD_ELF_H
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDELF_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDELF_H
#include "RuntimeDyldImpl.h"
#include "llvm/ADT/DenseMap.h"
@@ -58,8 +58,7 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
uint64_t Value, uint32_t Type, int64_t Addend);
unsigned getMaxStubSize() override {
- if (Arch == Triple::aarch64 || Arch == Triple::arm64 ||
- Arch == Triple::aarch64_be || Arch == Triple::arm64_be)
+ if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
return 20; // movz; movk; movk; movk; br
if (Arch == Triple::arm || Arch == Triple::thumb)
return 8; // 32-bit instruction and 32-bit address
@@ -120,7 +119,8 @@ public:
ObjSectionToIDMap &SectionMap) override;
virtual ~RuntimeDyldELF();
- static ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+ static std::unique_ptr<ObjectImage>
+ createObjectImage(std::unique_ptr<ObjectBuffer> InputBuffer);
static ObjectImage *createObjectImageFromFile(std::unique_ptr<object::ObjectFile> Obj);
};
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 0336cba..69ea3b4 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_RUNTIME_DYLD_IMPL_H
-#define LLVM_RUNTIME_DYLD_IMPL_H
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDIMPL_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDIMPL_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -70,7 +70,7 @@ public:
SectionEntry(StringRef name, uint8_t *address, size_t size,
uintptr_t objAddress)
: Name(name), Address(address), Size(size),
- LoadAddress((uintptr_t)address), StubOffset(size),
+ LoadAddress(reinterpret_cast<uintptr_t>(address)), StubOffset(size),
ObjAddress(objAddress) {}
};
@@ -159,7 +159,7 @@ public:
};
class RuntimeDyldImpl {
- friend class RuntimeDyldChecker;
+ friend class RuntimeDyldCheckerImpl;
private:
uint64_t getAnySymbolRemoteAddress(StringRef Symbol) {
@@ -172,6 +172,9 @@ protected:
// The MemoryManager to load objects into.
RTDyldMemoryManager *MemMgr;
+ // Attached RuntimeDyldChecker instance. Null if no instance attached.
+ RuntimeDyldCheckerImpl *Checker;
+
// A list of all sections emitted by the dynamic linker. These sections are
// referenced in the code by means of their index in this list - SectionID.
typedef SmallVector<SectionEntry, 64> SectionList;
@@ -211,6 +214,7 @@ protected:
// modules. This map is indexed by symbol name.
StringMap<RelocationList> ExternalSymbolRelocations;
+
typedef std::map<RelocationValueRef, uintptr_t> StubMap;
Triple::ArchType Arch;
@@ -245,11 +249,11 @@ protected:
return true;
}
- uint64_t getSectionLoadAddress(unsigned SectionID) {
+ uint64_t getSectionLoadAddress(unsigned SectionID) const {
return Sections[SectionID].LoadAddress;
}
- uint8_t *getSectionAddress(unsigned SectionID) {
+ uint8_t *getSectionAddress(unsigned SectionID) const {
return (uint8_t *)Sections[SectionID].Address;
}
@@ -282,6 +286,13 @@ protected:
*(Addr + 7) = Value & 0xFF;
}
+ /// Endian-aware read Read the least significant Size bytes from Src.
+ uint64_t readBytesUnaligned(uint8_t *Src, unsigned Size) const;
+
+ /// Endian-aware write. Write the least significant Size bytes from Value to
+ /// Dst.
+ void writeBytesUnaligned(uint64_t Value, uint8_t *Dst, unsigned Size) const;
+
/// \brief Given the common symbols discovered in the object file, emit a
/// new section for them and update the symbol mappings in the object and
/// symbol table.
@@ -312,7 +323,7 @@ protected:
/// \brief Emits long jump instruction to Addr.
/// \return Pointer to the memory area for emitting target address.
- uint8_t *createStubFunction(uint8_t *Addr);
+ uint8_t *createStubFunction(uint8_t *Addr, unsigned AbiVariant = 0);
/// \brief Resolves relocations from Relocs list with address from Value.
void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
@@ -349,7 +360,7 @@ protected:
public:
RuntimeDyldImpl(RTDyldMemoryManager *mm)
- : MemMgr(mm), ProcessAllSections(false), HasError(false) {
+ : MemMgr(mm), Checker(nullptr), ProcessAllSections(false), HasError(false) {
}
virtual ~RuntimeDyldImpl();
@@ -358,9 +369,14 @@ public:
this->ProcessAllSections = ProcessAllSections;
}
- ObjectImage *loadObject(ObjectImage *InputObject);
+ void setRuntimeDyldChecker(RuntimeDyldCheckerImpl *Checker) {
+ this->Checker = Checker;
+ }
+
+ std::unique_ptr<ObjectImage>
+ loadObject(std::unique_ptr<ObjectImage> InputObject);
- uint8_t* getSymbolAddress(StringRef Name) {
+ uint8_t* getSymbolAddress(StringRef Name) const {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
@@ -370,7 +386,7 @@ public:
return getSectionAddress(Loc.first) + Loc.second;
}
- uint64_t getSymbolLoadAddress(StringRef Name) {
+ uint64_t getSymbolLoadAddress(StringRef Name) const {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 4eb516c..d3d6f5d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -14,8 +14,12 @@
#include "RuntimeDyldMachO.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "ObjectImageCommon.h"
-#include "JITRegistrar.h"
+
+#include "Targets/RuntimeDyldMachOARM.h"
+#include "Targets/RuntimeDyldMachOAArch64.h"
+#include "Targets/RuntimeDyldMachOI386.h"
+#include "Targets/RuntimeDyldMachOX86_64.h"
+
using namespace llvm;
using namespace llvm::object;
@@ -23,164 +27,231 @@ using namespace llvm::object;
namespace llvm {
-class MachOObjectImage : public ObjectImageCommon {
-private:
- typedef SmallVector<uint64_t, 1> SectionAddrList;
- SectionAddrList OldSectionAddrList;
-
-protected:
- bool is64;
- bool Registered;
-
-private:
- void initOldAddress() {
- MachOObjectFile *objf = static_cast<MachOObjectFile *>(ObjFile.get());
- // Unfortunately we need to do this, since there's information encoded
- // in the original addr of the section that we could not otherwise
- // recover. The reason for this is that symbols do not actually store
- // their file offset, but only their vmaddr. This means that in order
- // to locate the symbol correctly in the object file, we need to know
- // where the original start of the section was (including any padding,
- // etc).
- for (section_iterator i = objf->section_begin(), e = objf->section_end();
- i != e; ++i) {
- uint64_t Addr;
- i->getAddress(Addr);
- OldSectionAddrList[i->getRawDataRefImpl().d.a] = Addr;
+int64_t RuntimeDyldMachO::memcpyAddend(const RelocationEntry &RE) const {
+ unsigned NumBytes = 1 << RE.Size;
+ uint8_t *Src = Sections[RE.SectionID].Address + RE.Offset;
+
+ return static_cast<int64_t>(readBytesUnaligned(Src, NumBytes));
+}
+
+RelocationValueRef RuntimeDyldMachO::getRelocationValueRef(
+ ObjectImage &ObjImg, const relocation_iterator &RI,
+ const RelocationEntry &RE, ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols) {
+
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile &>(*ObjImg.getObjectFile());
+ MachO::any_relocation_info RelInfo =
+ Obj.getRelocation(RI->getRawDataRefImpl());
+ RelocationValueRef Value;
+
+ bool IsExternal = Obj.getPlainRelocationExternal(RelInfo);
+ if (IsExternal) {
+ symbol_iterator Symbol = RI->getSymbol();
+ StringRef TargetName;
+ Symbol->getName(TargetName);
+ SymbolTableMap::const_iterator SI = Symbols.find(TargetName.data());
+ if (SI != Symbols.end()) {
+ Value.SectionID = SI->second.first;
+ Value.Offset = SI->second.second + RE.Addend;
+ } else {
+ SI = GlobalSymbolTable.find(TargetName.data());
+ if (SI != GlobalSymbolTable.end()) {
+ Value.SectionID = SI->second.first;
+ Value.Offset = SI->second.second + RE.Addend;
+ } else {
+ Value.SymbolName = TargetName.data();
+ Value.Offset = RE.Addend;
+ }
}
+ } else {
+ SectionRef Sec = Obj.getRelocationSection(RelInfo);
+ bool IsCode = Sec.isText();
+ Value.SectionID = findOrEmitSection(ObjImg, Sec, IsCode, ObjSectionToID);
+ uint64_t Addr = Sec.getAddress();
+ Value.Offset = RE.Addend - Addr;
}
-public:
- MachOObjectImage(ObjectBuffer *Input, bool is64)
- : ObjectImageCommon(Input),
- OldSectionAddrList(ObjFile->section_end()->getRawDataRefImpl().d.a, 0),
- is64(is64), Registered(false) {
- initOldAddress();
- }
+ return Value;
+}
- MachOObjectImage(std::unique_ptr<object::ObjectFile> Input, bool is64)
- : ObjectImageCommon(std::move(Input)),
- OldSectionAddrList(ObjFile->section_end()->getRawDataRefImpl().d.a, 0),
- is64(is64), Registered(false) {
- initOldAddress();
+void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value,
+ ObjectImage &ObjImg,
+ const relocation_iterator &RI,
+ unsigned OffsetToNextPC) {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile &>(*ObjImg.getObjectFile());
+ MachO::any_relocation_info RelInfo =
+ Obj.getRelocation(RI->getRawDataRefImpl());
+
+ bool IsPCRel = Obj.getAnyRelocationPCRel(RelInfo);
+ if (IsPCRel) {
+ uint64_t RelocAddr = 0;
+ RI->getAddress(RelocAddr);
+ Value.Offset += RelocAddr + OffsetToNextPC;
}
+}
+
+void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE,
+ uint64_t Value) const {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *LocalAddress = Section.Address + RE.Offset;
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+
+ dbgs() << "resolveRelocation Section: " << RE.SectionID
+ << " LocalAddress: " << format("%p", LocalAddress)
+ << " FinalAddress: " << format("0x%016" PRIx64, FinalAddress)
+ << " Value: " << format("0x%016" PRIx64, Value) << " Addend: " << RE.Addend
+ << " isPCRel: " << RE.IsPCRel << " MachoType: " << RE.RelType
+ << " Size: " << (1 << RE.Size) << "\n";
+}
- virtual ~MachOObjectImage() {
- if (Registered)
- deregisterWithDebugger();
+section_iterator
+RuntimeDyldMachO::getSectionByAddress(const MachOObjectFile &Obj,
+ uint64_t Addr) {
+ section_iterator SI = Obj.section_begin();
+ section_iterator SE = Obj.section_end();
+
+ for (; SI != SE; ++SI) {
+ uint64_t SAddr = SI->getAddress();
+ uint64_t SSize = SI->getSize();
+ if ((Addr >= SAddr) && (Addr < SAddr + SSize))
+ return SI;
}
- // Subclasses can override these methods to update the image with loaded
- // addresses for sections and common symbols
- virtual void updateSectionAddress(const SectionRef &Sec, uint64_t Addr) {
- MachOObjectFile *objf = static_cast<MachOObjectFile *>(ObjFile.get());
- char *data =
- const_cast<char *>(objf->getSectionPointer(Sec.getRawDataRefImpl()));
+ return SE;
+}
- uint64_t oldAddr = OldSectionAddrList[Sec.getRawDataRefImpl().d.a];
- if (is64) {
- ((MachO::section_64 *)data)->addr = Addr;
- } else {
- ((MachO::section *)data)->addr = Addr;
- }
+// Populate __pointers section.
+void RuntimeDyldMachO::populateIndirectSymbolPointersSection(
+ MachOObjectFile &Obj,
+ const SectionRef &PTSection,
+ unsigned PTSectionID) {
+ assert(!Obj.is64Bit() &&
+ "Pointer table section not supported in 64-bit MachO.");
- for (symbol_iterator i = objf->symbol_begin(), e = objf->symbol_end();
- i != e; ++i) {
- section_iterator symSec(objf->section_end());
- (*i).getSection(symSec);
- if (*symSec == Sec) {
- uint64_t symAddr;
- (*i).getAddress(symAddr);
- updateSymbolAddress(*i, symAddr + Addr - oldAddr);
- }
- }
- }
+ MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand();
+ MachO::section Sec32 = Obj.getSection(PTSection.getRawDataRefImpl());
+ uint32_t PTSectionSize = Sec32.size;
+ unsigned FirstIndirectSymbol = Sec32.reserved1;
+ const unsigned PTEntrySize = 4;
+ unsigned NumPTEntries = PTSectionSize / PTEntrySize;
+ unsigned PTEntryOffset = 0;
- uint64_t getOldSectionAddr(const SectionRef &Sec) const {
- return OldSectionAddrList[Sec.getRawDataRefImpl().d.a];
- }
+ assert((PTSectionSize % PTEntrySize) == 0 &&
+ "Pointers section does not contain a whole number of stubs?");
- virtual void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr) {
- char *data = const_cast<char *>(
- reinterpret_cast<const char *>(Sym.getRawDataRefImpl().p));
- if (is64)
- ((MachO::nlist_64 *)data)->n_value = Addr;
- else
- ((MachO::nlist *)data)->n_value = Addr;
- }
+ DEBUG(dbgs() << "Populating pointer table section "
+ << Sections[PTSectionID].Name
+ << ", Section ID " << PTSectionID << ", "
+ << NumPTEntries << " entries, " << PTEntrySize
+ << " bytes each:\n");
- virtual void registerWithDebugger() {
- JITRegistrar::getGDBRegistrar().registerObject(*Buffer);
- Registered = true;
+ for (unsigned i = 0; i < NumPTEntries; ++i) {
+ unsigned SymbolIndex =
+ Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
+ symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
+ StringRef IndirectSymbolName;
+ SI->getName(IndirectSymbolName);
+ DEBUG(dbgs() << " " << IndirectSymbolName << ": index " << SymbolIndex
+ << ", PT offset: " << PTEntryOffset << "\n");
+ RelocationEntry RE(PTSectionID, PTEntryOffset,
+ MachO::GENERIC_RELOC_VANILLA, 0, false, 2);
+ addRelocationForSymbol(RE, IndirectSymbolName);
+ PTEntryOffset += PTEntrySize;
}
+}
- virtual void deregisterWithDebugger() {
- JITRegistrar::getGDBRegistrar().deregisterObject(*Buffer);
- }
-};
-
-ObjectImage *RuntimeDyldMachO::createObjectImage(ObjectBuffer *Buffer) {
- uint32_t magic = *((const uint32_t *)Buffer->getBufferStart());
- bool is64 = (magic == MachO::MH_MAGIC_64);
- assert((magic == MachO::MH_MAGIC_64 || magic == MachO::MH_MAGIC) &&
- "Unrecognized Macho Magic");
- return new MachOObjectImage(Buffer, is64);
+bool
+RuntimeDyldMachO::isCompatibleFormat(const ObjectBuffer *InputBuffer) const {
+ if (InputBuffer->getBufferSize() < 4)
+ return false;
+ StringRef Magic(InputBuffer->getBufferStart(), 4);
+ if (Magic == "\xFE\xED\xFA\xCE")
+ return true;
+ if (Magic == "\xCE\xFA\xED\xFE")
+ return true;
+ if (Magic == "\xFE\xED\xFA\xCF")
+ return true;
+ if (Magic == "\xCF\xFA\xED\xFE")
+ return true;
+ return false;
}
-ObjectImage *RuntimeDyldMachO::createObjectImageFromFile(
- std::unique_ptr<object::ObjectFile> ObjFile) {
- if (!ObjFile)
- return nullptr;
+bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile *Obj) const {
+ return Obj->isMachO();
+}
- MemoryBuffer *Buffer =
- MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false);
+template <typename Impl>
+void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(ObjectImage &ObjImg,
+ ObjSectionToIDMap &SectionMap) {
+ unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
+ unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
+ unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
+ ObjSectionToIDMap::iterator i, e;
- uint32_t magic = *((const uint32_t *)Buffer->getBufferStart());
- bool is64 = (magic == MachO::MH_MAGIC_64);
- assert((magic == MachO::MH_MAGIC_64 || magic == MachO::MH_MAGIC) &&
- "Unrecognized Macho Magic");
- return new MachOObjectImage(std::move(ObjFile), is64);
+ for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
+ const SectionRef &Section = i->first;
+ StringRef Name;
+ Section.getName(Name);
+ if (Name == "__eh_frame")
+ EHFrameSID = i->second;
+ else if (Name == "__text")
+ TextSID = i->second;
+ else if (Name == "__gcc_except_tab")
+ ExceptTabSID = i->second;
+ else
+ impl().finalizeSection(ObjImg, i->second, Section);
+ }
+ UnregisteredEHFrameSections.push_back(
+ EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
}
-static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText,
- intptr_t DeltaForEH) {
+template <typename Impl>
+unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
+ int64_t DeltaForText,
+ int64_t DeltaForEH) {
+ typedef typename Impl::TargetPtrT TargetPtrT;
+
DEBUG(dbgs() << "Processing FDE: Delta for text: " << DeltaForText
<< ", Delta for EH: " << DeltaForEH << "\n");
- uint32_t Length = *((uint32_t *)P);
+ uint32_t Length = readBytesUnaligned(P, 4);
P += 4;
unsigned char *Ret = P + Length;
- uint32_t Offset = *((uint32_t *)P);
+ uint32_t Offset = readBytesUnaligned(P, 4);
if (Offset == 0) // is a CIE
return Ret;
P += 4;
- intptr_t FDELocation = *((intptr_t *)P);
- intptr_t NewLocation = FDELocation - DeltaForText;
- *((intptr_t *)P) = NewLocation;
- P += sizeof(intptr_t);
+ TargetPtrT FDELocation = readBytesUnaligned(P, sizeof(TargetPtrT));
+ TargetPtrT NewLocation = FDELocation - DeltaForText;
+ writeBytesUnaligned(NewLocation, P, sizeof(TargetPtrT));
+
+ P += sizeof(TargetPtrT);
// Skip the FDE address range
- P += sizeof(intptr_t);
+ P += sizeof(TargetPtrT);
uint8_t Augmentationsize = *P;
P += 1;
if (Augmentationsize != 0) {
- intptr_t LSDA = *((intptr_t *)P);
- intptr_t NewLSDA = LSDA - DeltaForEH;
- *((intptr_t *)P) = NewLSDA;
+ TargetPtrT LSDA = readBytesUnaligned(P, sizeof(TargetPtrT));
+ TargetPtrT NewLSDA = LSDA - DeltaForEH;
+ writeBytesUnaligned(NewLSDA, P, sizeof(TargetPtrT));
}
return Ret;
}
-static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) {
- intptr_t ObjDistance = A->ObjAddress - B->ObjAddress;
- intptr_t MemDistance = A->LoadAddress - B->LoadAddress;
+static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
+ int64_t ObjDistance = A->ObjAddress - B->ObjAddress;
+ int64_t MemDistance = A->LoadAddress - B->LoadAddress;
return ObjDistance - MemDistance;
}
-void RuntimeDyldMachO::registerEHFrames() {
+template <typename Impl>
+void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
if (!MemMgr)
return;
@@ -195,8 +266,8 @@ void RuntimeDyldMachO::registerEHFrames() {
if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID)
ExceptTab = &Sections[SectionInfo.ExceptTabSID];
- intptr_t DeltaForText = computeDelta(Text, EHFrame);
- intptr_t DeltaForEH = 0;
+ int64_t DeltaForText = computeDelta(Text, EHFrame);
+ int64_t DeltaForEH = 0;
if (ExceptTab)
DeltaForEH = computeDelta(ExceptTab, EHFrame);
@@ -212,615 +283,17 @@ void RuntimeDyldMachO::registerEHFrames() {
UnregisteredEHFrameSections.clear();
}
-void RuntimeDyldMachO::finalizeLoad(ObjectImage &ObjImg,
- ObjSectionToIDMap &SectionMap) {
- unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
- unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
- unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
- ObjSectionToIDMap::iterator i, e;
- for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
- const SectionRef &Section = i->first;
- StringRef Name;
- Section.getName(Name);
- if (Name == "__eh_frame")
- EHFrameSID = i->second;
- else if (Name == "__text")
- TextSID = i->second;
- else if (Name == "__gcc_except_tab")
- ExceptTabSID = i->second;
- else if (Name == "__jump_table")
- populateJumpTable(cast<MachOObjectFile>(*ObjImg.getObjectFile()),
- Section, i->second);
- else if (Name == "__pointers")
- populatePointersSection(cast<MachOObjectFile>(*ObjImg.getObjectFile()),
- Section, i->second);
- }
- UnregisteredEHFrameSections.push_back(
- EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
-}
-
-// The target location for the relocation is described by RE.SectionID and
-// RE.Offset. RE.SectionID can be used to find the SectionEntry. Each
-// SectionEntry has three members describing its location.
-// SectionEntry::Address is the address at which the section has been loaded
-// into memory in the current (host) process. SectionEntry::LoadAddress is the
-// address that the section will have in the target process.
-// SectionEntry::ObjAddress is the address of the bits for this section in the
-// original emitted object image (also in the current address space).
-//
-// Relocations will be applied as if the section were loaded at
-// SectionEntry::LoadAddress, but they will be applied at an address based
-// on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer to
-// Target memory contents if they are required for value calculations.
-//
-// The Value parameter here is the load address of the symbol for the
-// relocation to be applied. For relocations which refer to symbols in the
-// current object Value will be the LoadAddress of the section in which
-// the symbol resides (RE.Addend provides additional information about the
-// symbol location). For external symbols, Value will be the address of the
-// symbol in the target address space.
-void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
- uint64_t Value) {
- DEBUG (
- const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t* LocalAddress = Section.Address + RE.Offset;
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
-
- dbgs() << "resolveRelocation Section: " << RE.SectionID
- << " LocalAddress: " << format("%p", LocalAddress)
- << " FinalAddress: " << format("%p", FinalAddress)
- << " Value: " << format("%p", Value)
- << " Addend: " << RE.Addend
- << " isPCRel: " << RE.IsPCRel
- << " MachoType: " << RE.RelType
- << " Size: " << (1 << RE.Size) << "\n";
- );
-
- // This just dispatches to the proper target specific routine.
+std::unique_ptr<RuntimeDyldMachO>
+llvm::RuntimeDyldMachO::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) {
switch (Arch) {
default:
- llvm_unreachable("Unsupported CPU type!");
- case Triple::x86_64:
- resolveX86_64Relocation(RE, Value);
- break;
- case Triple::x86:
- resolveI386Relocation(RE, Value);
- break;
- case Triple::arm: // Fall through.
- case Triple::thumb:
- resolveARMRelocation(RE, Value);
- break;
- case Triple::aarch64:
- case Triple::arm64:
- resolveAArch64Relocation(RE, Value);
- break;
- }
-}
-
-bool RuntimeDyldMachO::resolveI386Relocation(const RelocationEntry &RE,
- uint64_t Value) {
- const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t* LocalAddress = Section.Address + RE.Offset;
-
- if (RE.IsPCRel) {
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
- Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation.
- }
-
- switch (RE.RelType) {
- default:
- llvm_unreachable("Invalid relocation type!");
- case MachO::GENERIC_RELOC_VANILLA:
- return applyRelocationValue(LocalAddress, Value + RE.Addend,
- 1 << RE.Size);
- case MachO::GENERIC_RELOC_SECTDIFF:
- case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
- uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
- uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
- assert((Value == SectionABase || Value == SectionBBase) &&
- "Unexpected SECTDIFF relocation value.");
- Value = SectionABase - SectionBBase + RE.Addend;
- return applyRelocationValue(LocalAddress, Value, 1 << RE.Size);
- }
- case MachO::GENERIC_RELOC_PB_LA_PTR:
- return Error("Relocation type not implemented yet!");
- }
-}
-
-bool RuntimeDyldMachO::resolveX86_64Relocation(const RelocationEntry &RE,
- uint64_t Value) {
- const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t* LocalAddress = Section.Address + RE.Offset;
-
- // If the relocation is PC-relative, the value to be encoded is the
- // pointer difference.
- if (RE.IsPCRel) {
- // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
- // address. Is that expected? Only for branches, perhaps?
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
- Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation.
- }
-
- switch (RE.RelType) {
- default:
- llvm_unreachable("Invalid relocation type!");
- case MachO::X86_64_RELOC_SIGNED_1:
- case MachO::X86_64_RELOC_SIGNED_2:
- case MachO::X86_64_RELOC_SIGNED_4:
- case MachO::X86_64_RELOC_SIGNED:
- case MachO::X86_64_RELOC_UNSIGNED:
- case MachO::X86_64_RELOC_BRANCH:
- return applyRelocationValue(LocalAddress, Value + RE.Addend, 1 << RE.Size);
- case MachO::X86_64_RELOC_GOT_LOAD:
- case MachO::X86_64_RELOC_GOT:
- case MachO::X86_64_RELOC_SUBTRACTOR:
- case MachO::X86_64_RELOC_TLV:
- return Error("Relocation type not implemented yet!");
- }
-}
-
-bool RuntimeDyldMachO::resolveARMRelocation(const RelocationEntry &RE,
- uint64_t Value) {
- const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t* LocalAddress = Section.Address + RE.Offset;
-
- // If the relocation is PC-relative, the value to be encoded is the
- // pointer difference.
- if (RE.IsPCRel) {
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
- Value -= FinalAddress;
- // ARM PCRel relocations have an effective-PC offset of two instructions
- // (four bytes in Thumb mode, 8 bytes in ARM mode).
- // FIXME: For now, assume ARM mode.
- Value -= 8;
- }
-
- switch (RE.RelType) {
- default:
- llvm_unreachable("Invalid relocation type!");
- case MachO::ARM_RELOC_VANILLA:
- return applyRelocationValue(LocalAddress, Value, 1 << RE.Size);
- case MachO::ARM_RELOC_BR24: {
- // Mask the value into the target address. We know instructions are
- // 32-bit aligned, so we can do it all at once.
- uint32_t *p = (uint32_t *)LocalAddress;
- // The low two bits of the value are not encoded.
- Value >>= 2;
- // Mask the value to 24 bits.
- uint64_t FinalValue = Value & 0xffffff;
- // Check for overflow.
- if (Value != FinalValue)
- return Error("ARM BR24 relocation out of range.");
- // FIXME: If the destination is a Thumb function (and the instruction
- // is a non-predicated BL instruction), we need to change it to a BLX
- // instruction instead.
-
- // Insert the value into the instruction.
- *p = (*p & ~0xffffff) | FinalValue;
+ llvm_unreachable("Unsupported target for RuntimeDyldMachO.");
break;
+ case Triple::arm: return make_unique<RuntimeDyldMachOARM>(MM);
+ case Triple::aarch64: return make_unique<RuntimeDyldMachOAArch64>(MM);
+ case Triple::x86: return make_unique<RuntimeDyldMachOI386>(MM);
+ case Triple::x86_64: return make_unique<RuntimeDyldMachOX86_64>(MM);
}
- case MachO::ARM_THUMB_RELOC_BR22:
- case MachO::ARM_THUMB_32BIT_BRANCH:
- case MachO::ARM_RELOC_HALF:
- case MachO::ARM_RELOC_HALF_SECTDIFF:
- case MachO::ARM_RELOC_PAIR:
- case MachO::ARM_RELOC_SECTDIFF:
- case MachO::ARM_RELOC_LOCAL_SECTDIFF:
- case MachO::ARM_RELOC_PB_LA_PTR:
- return Error("Relocation type not implemented yet!");
- }
- return false;
-}
-
-bool RuntimeDyldMachO::resolveAArch64Relocation(const RelocationEntry &RE,
- uint64_t Value) {
- const SectionEntry &Section = Sections[RE.SectionID];
- uint8_t* LocalAddress = Section.Address + RE.Offset;
-
- // If the relocation is PC-relative, the value to be encoded is the
- // pointer difference.
- if (RE.IsPCRel) {
- uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
- Value -= FinalAddress;
- }
-
- switch (RE.RelType) {
- default:
- llvm_unreachable("Invalid relocation type!");
- case MachO::ARM64_RELOC_UNSIGNED:
- return applyRelocationValue(LocalAddress, Value, 1 << RE.Size);
- case MachO::ARM64_RELOC_BRANCH26: {
- // Mask the value into the target address. We know instructions are
- // 32-bit aligned, so we can do it all at once.
- uint32_t *p = (uint32_t *)LocalAddress;
- // The low two bits of the value are not encoded.
- Value >>= 2;
- // Mask the value to 26 bits.
- uint64_t FinalValue = Value & 0x3ffffff;
- // Check for overflow.
- if (FinalValue != Value)
- return Error("ARM64 BRANCH26 relocation out of range.");
- // Insert the value into the instruction.
- *p = (*p & ~0x3ffffff) | FinalValue;
- break;
- }
- case MachO::ARM64_RELOC_SUBTRACTOR:
- case MachO::ARM64_RELOC_PAGE21:
- case MachO::ARM64_RELOC_PAGEOFF12:
- case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
- case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
- case MachO::ARM64_RELOC_POINTER_TO_GOT:
- case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
- case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
- case MachO::ARM64_RELOC_ADDEND:
- return Error("Relocation type not implemented yet!");
- }
- return false;
-}
-
-void RuntimeDyldMachO::populateJumpTable(MachOObjectFile &Obj,
- const SectionRef &JTSection,
- unsigned JTSectionID) {
- assert(!Obj.is64Bit() &&
- "__jump_table section not supported in 64-bit MachO.");
-
- MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand();
- MachO::section Sec32 = Obj.getSection(JTSection.getRawDataRefImpl());
- uint32_t JTSectionSize = Sec32.size;
- unsigned FirstIndirectSymbol = Sec32.reserved1;
- unsigned JTEntrySize = Sec32.reserved2;
- unsigned NumJTEntries = JTSectionSize / JTEntrySize;
- uint8_t* JTSectionAddr = getSectionAddress(JTSectionID);
- unsigned JTEntryOffset = 0;
-
- assert((JTSectionSize % JTEntrySize) == 0 &&
- "Jump-table section does not contain a whole number of stubs?");
-
- for (unsigned i = 0; i < NumJTEntries; ++i) {
- unsigned SymbolIndex =
- Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
- symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
- StringRef IndirectSymbolName;
- SI->getName(IndirectSymbolName);
- uint8_t* JTEntryAddr = JTSectionAddr + JTEntryOffset;
- createStubFunction(JTEntryAddr);
- RelocationEntry RE(JTSectionID, JTEntryOffset + 1,
- MachO::GENERIC_RELOC_VANILLA, 0, true, 2);
- addRelocationForSymbol(RE, IndirectSymbolName);
- JTEntryOffset += JTEntrySize;
- }
-}
-
-void RuntimeDyldMachO::populatePointersSection(MachOObjectFile &Obj,
- const SectionRef &PTSection,
- unsigned PTSectionID) {
- assert(!Obj.is64Bit() &&
- "__pointers section not supported in 64-bit MachO.");
-
- MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand();
- MachO::section Sec32 = Obj.getSection(PTSection.getRawDataRefImpl());
- uint32_t PTSectionSize = Sec32.size;
- unsigned FirstIndirectSymbol = Sec32.reserved1;
- const unsigned PTEntrySize = 4;
- unsigned NumPTEntries = PTSectionSize / PTEntrySize;
- unsigned PTEntryOffset = 0;
-
- assert((PTSectionSize % PTEntrySize) == 0 &&
- "Pointers section does not contain a whole number of stubs?");
-
- DEBUG(dbgs() << "Populating __pointers, Section ID " << PTSectionID
- << ", " << NumPTEntries << " entries, "
- << PTEntrySize << " bytes each:\n");
-
- for (unsigned i = 0; i < NumPTEntries; ++i) {
- unsigned SymbolIndex =
- Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
- symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
- StringRef IndirectSymbolName;
- SI->getName(IndirectSymbolName);
- DEBUG(dbgs() << " " << IndirectSymbolName << ": index " << SymbolIndex
- << ", PT offset: " << PTEntryOffset << "\n");
- RelocationEntry RE(PTSectionID, PTEntryOffset,
- MachO::GENERIC_RELOC_VANILLA, 0, false, 2);
- addRelocationForSymbol(RE, IndirectSymbolName);
- PTEntryOffset += PTEntrySize;
- }
-}
-
-
-section_iterator getSectionByAddress(const MachOObjectFile &Obj,
- uint64_t Addr) {
- section_iterator SI = Obj.section_begin();
- section_iterator SE = Obj.section_end();
-
- for (; SI != SE; ++SI) {
- uint64_t SAddr, SSize;
- SI->getAddress(SAddr);
- SI->getSize(SSize);
- if ((Addr >= SAddr) && (Addr < SAddr + SSize))
- return SI;
- }
-
- return SE;
-}
-
-relocation_iterator RuntimeDyldMachO::processSECTDIFFRelocation(
- unsigned SectionID,
- relocation_iterator RelI,
- ObjectImage &Obj,
- ObjSectionToIDMap &ObjSectionToID) {
- const MachOObjectFile *MachO =
- static_cast<const MachOObjectFile*>(Obj.getObjectFile());
- MachO::any_relocation_info RE =
- MachO->getRelocation(RelI->getRawDataRefImpl());
-
- SectionEntry &Section = Sections[SectionID];
- uint32_t RelocType = MachO->getAnyRelocationType(RE);
- bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
- unsigned Size = MachO->getAnyRelocationLength(RE);
- uint64_t Offset;
- RelI->getOffset(Offset);
- uint8_t *LocalAddress = Section.Address + Offset;
- unsigned NumBytes = 1 << Size;
- int64_t Addend = 0;
- memcpy(&Addend, LocalAddress, NumBytes);
-
- ++RelI;
- MachO::any_relocation_info RE2 =
- MachO->getRelocation(RelI->getRawDataRefImpl());
-
- uint32_t AddrA = MachO->getScatteredRelocationValue(RE);
- section_iterator SAI = getSectionByAddress(*MachO, AddrA);
- assert(SAI != MachO->section_end() && "Can't find section for address A");
- uint64_t SectionABase;
- SAI->getAddress(SectionABase);
- uint64_t SectionAOffset = AddrA - SectionABase;
- SectionRef SectionA = *SAI;
- bool IsCode;
- SectionA.isText(IsCode);
- uint32_t SectionAID = findOrEmitSection(Obj, SectionA, IsCode,
- ObjSectionToID);
-
- uint32_t AddrB = MachO->getScatteredRelocationValue(RE2);
- section_iterator SBI = getSectionByAddress(*MachO, AddrB);
- assert(SBI != MachO->section_end() && "Can't find section for address B");
- uint64_t SectionBBase;
- SBI->getAddress(SectionBBase);
- uint64_t SectionBOffset = AddrB - SectionBBase;
- SectionRef SectionB = *SBI;
- uint32_t SectionBID = findOrEmitSection(Obj, SectionB, IsCode,
- ObjSectionToID);
-
- if (Addend != AddrA - AddrB)
- Error("Unexpected SECTDIFF relocation addend.");
-
- DEBUG(dbgs() << "Found SECTDIFF: AddrA: " << AddrA << ", AddrB: " << AddrB
- << ", Addend: " << Addend << ", SectionA ID: "
- << SectionAID << ", SectionAOffset: " << SectionAOffset
- << ", SectionB ID: " << SectionBID << ", SectionBOffset: "
- << SectionBOffset << "\n");
- RelocationEntry R(SectionID, Offset, RelocType, 0,
- SectionAID, SectionAOffset, SectionBID, SectionBOffset,
- IsPCRel, Size);
-
- addRelocationForSection(R, SectionAID);
- addRelocationForSection(R, SectionBID);
-
- return ++RelI;
-}
-
-relocation_iterator RuntimeDyldMachO::processI386ScatteredVANILLA(
- unsigned SectionID,
- relocation_iterator RelI,
- ObjectImage &Obj,
- ObjSectionToIDMap &ObjSectionToID) {
- const MachOObjectFile *MachO =
- static_cast<const MachOObjectFile*>(Obj.getObjectFile());
- MachO::any_relocation_info RE =
- MachO->getRelocation(RelI->getRawDataRefImpl());
-
- SectionEntry &Section = Sections[SectionID];
- uint32_t RelocType = MachO->getAnyRelocationType(RE);
- bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
- unsigned Size = MachO->getAnyRelocationLength(RE);
- uint64_t Offset;
- RelI->getOffset(Offset);
- uint8_t *LocalAddress = Section.Address + Offset;
- unsigned NumBytes = 1 << Size;
- int64_t Addend = 0;
- memcpy(&Addend, LocalAddress, NumBytes);
-
- unsigned SymbolBaseAddr = MachO->getScatteredRelocationValue(RE);
- section_iterator TargetSI = getSectionByAddress(*MachO, SymbolBaseAddr);
- assert(TargetSI != MachO->section_end() && "Can't find section for symbol");
- uint64_t SectionBaseAddr;
- TargetSI->getAddress(SectionBaseAddr);
- SectionRef TargetSection = *TargetSI;
- bool IsCode;
- TargetSection.isText(IsCode);
- uint32_t TargetSectionID = findOrEmitSection(Obj, TargetSection, IsCode,
- ObjSectionToID);
-
- Addend -= SectionBaseAddr;
- RelocationEntry R(SectionID, Offset, RelocType, Addend,
- IsPCRel, Size);
-
- addRelocationForSection(R, TargetSectionID);
-
- return ++RelI;
-}
-
-relocation_iterator RuntimeDyldMachO::processRelocationRef(
- unsigned SectionID, relocation_iterator RelI, ObjectImage &Obj,
- ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols,
- StubMap &Stubs) {
- const ObjectFile *OF = Obj.getObjectFile();
- const MachOObjectImage &MachOObj = *static_cast<MachOObjectImage *>(&Obj);
- const MachOObjectFile *MachO = static_cast<const MachOObjectFile *>(OF);
- MachO::any_relocation_info RE =
- MachO->getRelocation(RelI->getRawDataRefImpl());
-
- uint32_t RelType = MachO->getAnyRelocationType(RE);
-
- // FIXME: Properly handle scattered relocations.
- // Special case the couple of scattered relocations that we know how
- // to handle: SECTDIFF relocations, and scattered VANILLA relocations
- // on I386.
- // For all other scattered relocations, just bail out and hope for the
- // best, since the offsets computed by scattered relocations have often
- // been optimisticaly filled in by the compiler. This will fail
- // horribly where the relocations *do* need to be applied, but that was
- // already the case.
- if (MachO->isRelocationScattered(RE)) {
- if (RelType == MachO::GENERIC_RELOC_SECTDIFF ||
- RelType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)
- return processSECTDIFFRelocation(SectionID, RelI, Obj, ObjSectionToID);
- else if (Arch == Triple::x86 && RelType == MachO::GENERIC_RELOC_VANILLA)
- return processI386ScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID);
- else
- return ++RelI;
- }
-
- RelocationValueRef Value;
- SectionEntry &Section = Sections[SectionID];
-
- bool IsExtern = MachO->getPlainRelocationExternal(RE);
- bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
- unsigned Size = MachO->getAnyRelocationLength(RE);
- uint64_t Offset;
- RelI->getOffset(Offset);
- uint8_t *LocalAddress = Section.Address + Offset;
- unsigned NumBytes = 1 << Size;
- uint64_t Addend = 0;
- memcpy(&Addend, LocalAddress, NumBytes);
-
- if (IsExtern) {
- // Obtain the symbol name which is referenced in the relocation
- symbol_iterator Symbol = RelI->getSymbol();
- StringRef TargetName;
- Symbol->getName(TargetName);
- // First search for the symbol in the local symbol table
- SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
- if (lsi != Symbols.end()) {
- Value.SectionID = lsi->second.first;
- Value.Addend = lsi->second.second + Addend;
- } else {
- // Search for the symbol in the global symbol table
- SymbolTableMap::const_iterator gsi =
- GlobalSymbolTable.find(TargetName.data());
- if (gsi != GlobalSymbolTable.end()) {
- Value.SectionID = gsi->second.first;
- Value.Addend = gsi->second.second + Addend;
- } else {
- Value.SymbolName = TargetName.data();
- Value.Addend = Addend;
- }
- }
-
- // Addends for external, PC-rel relocations on i386 point back to the zero
- // offset. Calculate the final offset from the relocation target instead.
- // This allows us to use the same logic for both external and internal
- // relocations in resolveI386RelocationRef.
- if (Arch == Triple::x86 && IsPCRel) {
- uint64_t RelocAddr = 0;
- RelI->getAddress(RelocAddr);
- Value.Addend += RelocAddr + 4;
- }
-
- } else {
- SectionRef Sec = MachO->getRelocationSection(RE);
- bool IsCode = false;
- Sec.isText(IsCode);
- Value.SectionID = findOrEmitSection(Obj, Sec, IsCode, ObjSectionToID);
- uint64_t Addr = MachOObj.getOldSectionAddr(Sec);
- DEBUG(dbgs() << "\nAddr: " << Addr << "\nAddend: " << Addend);
- Value.Addend = Addend - Addr;
- if (IsPCRel)
- Value.Addend += Offset + NumBytes;
- }
-
- if (Arch == Triple::x86_64 && (RelType == MachO::X86_64_RELOC_GOT ||
- RelType == MachO::X86_64_RELOC_GOT_LOAD)) {
- assert(IsPCRel);
- assert(Size == 2);
-
- // FIXME: Teach the generic code above not to prematurely conflate
- // relocation addends and symbol offsets.
- Value.Addend -= Addend;
- StubMap::const_iterator i = Stubs.find(Value);
- uint8_t *Addr;
- if (i != Stubs.end()) {
- Addr = Section.Address + i->second;
- } else {
- Stubs[Value] = Section.StubOffset;
- uint8_t *GOTEntry = Section.Address + Section.StubOffset;
- RelocationEntry GOTRE(SectionID, Section.StubOffset,
- MachO::X86_64_RELOC_UNSIGNED, Value.Addend, false,
- 3);
- if (Value.SymbolName)
- addRelocationForSymbol(GOTRE, Value.SymbolName);
- else
- addRelocationForSection(GOTRE, Value.SectionID);
- Section.StubOffset += 8;
- Addr = GOTEntry;
- }
- RelocationEntry TargetRE(SectionID, Offset,
- MachO::X86_64_RELOC_UNSIGNED, Addend, true,
- 2);
- resolveRelocation(TargetRE, (uint64_t)Addr);
- } else if (Arch == Triple::arm && (RelType & 0xf) == MachO::ARM_RELOC_BR24) {
- // This is an ARM branch relocation, need to use a stub function.
-
- // Look up for existing stub.
- StubMap::const_iterator i = Stubs.find(Value);
- uint8_t *Addr;
- if (i != Stubs.end()) {
- Addr = Section.Address + i->second;
- } else {
- // Create a new stub function.
- Stubs[Value] = Section.StubOffset;
- uint8_t *StubTargetAddr =
- createStubFunction(Section.Address + Section.StubOffset);
- RelocationEntry StubRE(SectionID, StubTargetAddr - Section.Address,
- MachO::GENERIC_RELOC_VANILLA, Value.Addend);
- if (Value.SymbolName)
- addRelocationForSymbol(StubRE, Value.SymbolName);
- else
- addRelocationForSection(StubRE, Value.SectionID);
- Addr = Section.Address + Section.StubOffset;
- Section.StubOffset += getMaxStubSize();
- }
- RelocationEntry TargetRE(Value.SectionID, Offset, RelType, 0, IsPCRel,
- Size);
- resolveRelocation(TargetRE, (uint64_t)Addr);
- } else {
- RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, IsPCRel, Size);
- if (Value.SymbolName)
- addRelocationForSymbol(RE, Value.SymbolName);
- else
- addRelocationForSection(RE, Value.SectionID);
- }
- return ++RelI;
-}
-
-bool
-RuntimeDyldMachO::isCompatibleFormat(const ObjectBuffer *InputBuffer) const {
- if (InputBuffer->getBufferSize() < 4)
- return false;
- StringRef Magic(InputBuffer->getBufferStart(), 4);
- if (Magic == "\xFE\xED\xFA\xCE")
- return true;
- if (Magic == "\xCE\xFA\xED\xFE")
- return true;
- if (Magic == "\xFE\xED\xFA\xCF")
- return true;
- if (Magic == "\xCF\xFA\xED\xFE")
- return true;
- return false;
-}
-
-bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile *Obj) const {
- return Obj->isMachO();
}
} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 35f0720..7583474 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -11,74 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_RUNTIME_DYLD_MACHO_H
-#define LLVM_RUNTIME_DYLD_MACHO_H
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDMACHO_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_RUNTIMEDYLDMACHO_H
#include "ObjectImageCommon.h"
#include "RuntimeDyldImpl.h"
-#include "llvm/ADT/IndexedMap.h"
#include "llvm/Object/MachO.h"
#include "llvm/Support/Format.h"
+#define DEBUG_TYPE "dyld"
+
using namespace llvm;
using namespace llvm::object;
namespace llvm {
class RuntimeDyldMachO : public RuntimeDyldImpl {
-private:
-
- /// Write the least significant 'Size' bytes in 'Value' out at the address
- /// pointed to by Addr.
- bool applyRelocationValue(uint8_t *Addr, uint64_t Value, unsigned Size) {
- for (unsigned i = 0; i < Size; ++i) {
- *Addr++ = (uint8_t)Value;
- Value >>= 8;
- }
-
- return false;
- }
-
- bool resolveI386Relocation(const RelocationEntry &RE, uint64_t Value);
- bool resolveX86_64Relocation(const RelocationEntry &RE, uint64_t Value);
- bool resolveARMRelocation(const RelocationEntry &RE, uint64_t Value);
- bool resolveAArch64Relocation(const RelocationEntry &RE, uint64_t Value);
-
- // Populate stubs in __jump_table section.
- void populateJumpTable(MachOObjectFile &Obj, const SectionRef &JTSection,
- unsigned JTSectionID);
-
- // Populate __pointers section.
- void populatePointersSection(MachOObjectFile &Obj, const SectionRef &PTSection,
- unsigned PTSectionID);
-
- unsigned getMaxStubSize() override {
- if (Arch == Triple::arm || Arch == Triple::thumb)
- return 8; // 32-bit instruction and 32-bit address
- else if (Arch == Triple::x86_64)
- return 8; // GOT entry
- else
- return 0;
- }
-
- unsigned getStubAlignment() override { return 1; }
-
- relocation_iterator processSECTDIFFRelocation(
- unsigned SectionID,
- relocation_iterator RelI,
- ObjectImage &ObjImg,
- ObjSectionToIDMap &ObjSectionToID);
-
- relocation_iterator processI386ScatteredVANILLA(
- unsigned SectionID,
- relocation_iterator RelI,
- ObjectImage &ObjImg,
- ObjSectionToIDMap &ObjSectionToID);
+protected:
+ struct SectionOffsetPair {
+ unsigned SectionID;
+ uint64_t Offset;
+ };
struct EHFrameRelatedSections {
EHFrameRelatedSections()
: EHFrameSID(RTDYLD_INVALID_SECTION_ID),
TextSID(RTDYLD_INVALID_SECTION_ID),
ExceptTabSID(RTDYLD_INVALID_SECTION_ID) {}
+
EHFrameRelatedSections(SID EH, SID T, SID Ex)
: EHFrameSID(EH), TextSID(T), ExceptTabSID(Ex) {}
SID EHFrameSID;
@@ -91,25 +50,116 @@ private:
// EH frame sections with the memory manager.
SmallVector<EHFrameRelatedSections, 2> UnregisteredEHFrameSections;
-public:
RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
- void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override;
- relocation_iterator
- processRelocationRef(unsigned SectionID, relocation_iterator RelI,
- ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID,
- const SymbolTableMap &Symbols, StubMap &Stubs) override;
+ /// This convenience method uses memcpy to extract a contiguous addend (the
+ /// addend size and offset are taken from the corresponding fields of the RE).
+ int64_t memcpyAddend(const RelocationEntry &RE) const;
+
+ /// Given a relocation_iterator for a non-scattered relocation, construct a
+ /// RelocationEntry and fill in the common fields. The 'Addend' field is *not*
+ /// filled in, since immediate encodings are highly target/opcode specific.
+ /// For targets/opcodes with simple, contiguous immediates (e.g. X86) the
+ /// memcpyAddend method can be used to read the immediate.
+ RelocationEntry getRelocationEntry(unsigned SectionID, ObjectImage &ObjImg,
+ const relocation_iterator &RI) const {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile &>(*ObjImg.getObjectFile());
+ MachO::any_relocation_info RelInfo =
+ Obj.getRelocation(RI->getRawDataRefImpl());
+
+ bool IsPCRel = Obj.getAnyRelocationPCRel(RelInfo);
+ unsigned Size = Obj.getAnyRelocationLength(RelInfo);
+ uint64_t Offset;
+ RI->getOffset(Offset);
+ MachO::RelocationInfoType RelType =
+ static_cast<MachO::RelocationInfoType>(Obj.getAnyRelocationType(RelInfo));
+
+ return RelocationEntry(SectionID, Offset, RelType, 0, IsPCRel, Size);
+ }
+
+ /// Construct a RelocationValueRef representing the relocation target.
+ /// For Symbols in known sections, this will return a RelocationValueRef
+ /// representing a (SectionID, Offset) pair.
+ /// For Symbols whose section is not known, this will return a
+ /// (SymbolName, Offset) pair, where the Offset is taken from the instruction
+ /// immediate (held in RE.Addend).
+ /// In both cases the Addend field is *NOT* fixed up to be PC-relative. That
+ /// should be done by the caller where appropriate by calling makePCRel on
+ /// the RelocationValueRef.
+ RelocationValueRef getRelocationValueRef(ObjectImage &ObjImg,
+ const relocation_iterator &RI,
+ const RelocationEntry &RE,
+ ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols);
+
+ /// Make the RelocationValueRef addend PC-relative.
+ void makeValueAddendPCRel(RelocationValueRef &Value, ObjectImage &ObjImg,
+ const relocation_iterator &RI,
+ unsigned OffsetToNextPC);
+
+ /// Dump information about the relocation entry (RE) and resolved value.
+ void dumpRelocationToResolve(const RelocationEntry &RE, uint64_t Value) const;
+
+ // Return a section iterator for the section containing the given address.
+ static section_iterator getSectionByAddress(const MachOObjectFile &Obj,
+ uint64_t Addr);
+
+
+ // Populate __pointers section.
+ void populateIndirectSymbolPointersSection(MachOObjectFile &Obj,
+ const SectionRef &PTSection,
+ unsigned PTSectionID);
+
+public:
+ /// Create an ObjectImage from the given ObjectBuffer.
+ static std::unique_ptr<ObjectImage>
+ createObjectImage(std::unique_ptr<ObjectBuffer> InputBuffer) {
+ return llvm::make_unique<ObjectImageCommon>(std::move(InputBuffer));
+ }
+
+ /// Create an ObjectImage from the given ObjectFile.
+ static ObjectImage *
+ createObjectImageFromFile(std::unique_ptr<object::ObjectFile> InputObject) {
+ return new ObjectImageCommon(std::move(InputObject));
+ }
+
+ /// Create a RuntimeDyldMachO instance for the given target architecture.
+ static std::unique_ptr<RuntimeDyldMachO> create(Triple::ArchType Arch,
+ RTDyldMemoryManager *mm);
+
+ SectionEntry &getSection(unsigned SectionID) { return Sections[SectionID]; }
+
bool isCompatibleFormat(const ObjectBuffer *Buffer) const override;
bool isCompatibleFile(const object::ObjectFile *Obj) const override;
- void registerEHFrames() override;
+};
+
+/// RuntimeDyldMachOTarget - Templated base class for generic MachO linker
+/// algorithms and data structures.
+///
+/// Concrete, target specific sub-classes can be accessed via the impl()
+/// methods. (i.e. the RuntimeDyldMachO hierarchy uses the Curiously
+/// Recurring Template Idiom). Concrete subclasses for each target
+/// can be found in ./Targets.
+template <typename Impl>
+class RuntimeDyldMachOCRTPBase : public RuntimeDyldMachO {
+private:
+ Impl &impl() { return static_cast<Impl &>(*this); }
+ const Impl &impl() const { return static_cast<const Impl &>(*this); }
+
+ unsigned char *processFDE(unsigned char *P, int64_t DeltaForText,
+ int64_t DeltaForEH);
+
+public:
+ RuntimeDyldMachOCRTPBase(RTDyldMemoryManager *mm) : RuntimeDyldMachO(mm) {}
+
void finalizeLoad(ObjectImage &ObjImg,
ObjSectionToIDMap &SectionMap) override;
-
- static ObjectImage *createObjectImage(ObjectBuffer *Buffer);
- static ObjectImage *
- createObjectImageFromFile(std::unique_ptr<object::ObjectFile> InputObject);
+ void registerEHFrames() override;
};
} // end namespace llvm
+#undef DEBUG_TYPE
+
#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
new file mode 100644
index 0000000..f5cf9ac
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -0,0 +1,405 @@
+//===-- RuntimeDyldMachOAArch64.h -- MachO/AArch64 specific code. -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOAARCH64_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOAARCH64_H
+
+#include "../RuntimeDyldMachO.h"
+#include "llvm/Support/Endian.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldMachOAArch64
+ : public RuntimeDyldMachOCRTPBase<RuntimeDyldMachOAArch64> {
+public:
+
+ typedef uint64_t TargetPtrT;
+
+ RuntimeDyldMachOAArch64(RTDyldMemoryManager *MM)
+ : RuntimeDyldMachOCRTPBase(MM) {}
+
+ unsigned getMaxStubSize() override { return 8; }
+
+ unsigned getStubAlignment() override { return 8; }
+
+ /// Extract the addend encoded in the instruction / memory location.
+ int64_t decodeAddend(const RelocationEntry &RE) const {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *LocalAddress = Section.Address + RE.Offset;
+ unsigned NumBytes = 1 << RE.Size;
+ int64_t Addend = 0;
+ // Verify that the relocation has the correct size and alignment.
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("Unsupported relocation type!");
+ case MachO::ARM64_RELOC_UNSIGNED:
+ assert((NumBytes == 4 || NumBytes == 8) && "Invalid relocation size.");
+ break;
+ case MachO::ARM64_RELOC_BRANCH26:
+ case MachO::ARM64_RELOC_PAGE21:
+ case MachO::ARM64_RELOC_PAGEOFF12:
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
+ assert(NumBytes == 4 && "Invalid relocation size.");
+ assert((((uintptr_t)LocalAddress & 0x3) == 0) &&
+ "Instruction address is not aligned to 4 bytes.");
+ break;
+ }
+
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("Unsupported relocation type!");
+ case MachO::ARM64_RELOC_UNSIGNED:
+ // This could be an unaligned memory location.
+ if (NumBytes == 4)
+ Addend = *reinterpret_cast<support::ulittle32_t *>(LocalAddress);
+ else
+ Addend = *reinterpret_cast<support::ulittle64_t *>(LocalAddress);
+ break;
+ case MachO::ARM64_RELOC_BRANCH26: {
+ // Verify that the relocation points to the expected branch instruction.
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ assert((*p & 0xFC000000) == 0x14000000 && "Expected branch instruction.");
+
+ // Get the 26 bit addend encoded in the branch instruction and sign-extend
+ // to 64 bit. The lower 2 bits are always zeros and are therefore implicit
+ // (<< 2).
+ Addend = (*p & 0x03FFFFFF) << 2;
+ Addend = SignExtend64(Addend, 28);
+ break;
+ }
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
+ case MachO::ARM64_RELOC_PAGE21: {
+ // Verify that the relocation points to the expected adrp instruction.
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ assert((*p & 0x9F000000) == 0x90000000 && "Expected adrp instruction.");
+
+ // Get the 21 bit addend encoded in the adrp instruction and sign-extend
+ // to 64 bit. The lower 12 bits (4096 byte page) are always zeros and are
+ // therefore implicit (<< 12).
+ Addend = ((*p & 0x60000000) >> 29) | ((*p & 0x01FFFFE0) >> 3) << 12;
+ Addend = SignExtend64(Addend, 33);
+ break;
+ }
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: {
+ // Verify that the relocation points to one of the expected load / store
+ // instructions.
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ (void)p;
+ assert((*p & 0x3B000000) == 0x39000000 &&
+ "Only expected load / store instructions.");
+ } // fall-through
+ case MachO::ARM64_RELOC_PAGEOFF12: {
+ // Verify that the relocation points to one of the expected load / store
+ // or add / sub instructions.
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ assert((((*p & 0x3B000000) == 0x39000000) ||
+ ((*p & 0x11C00000) == 0x11000000) ) &&
+ "Expected load / store or add/sub instruction.");
+
+ // Get the 12 bit addend encoded in the instruction.
+ Addend = (*p & 0x003FFC00) >> 10;
+
+ // Check which instruction we are decoding to obtain the implicit shift
+ // factor of the instruction.
+ int ImplicitShift = 0;
+ if ((*p & 0x3B000000) == 0x39000000) { // << load / store
+ // For load / store instructions the size is encoded in bits 31:30.
+ ImplicitShift = ((*p >> 30) & 0x3);
+ if (ImplicitShift == 0) {
+ // Check if this a vector op to get the correct shift value.
+ if ((*p & 0x04800000) == 0x04800000)
+ ImplicitShift = 4;
+ }
+ }
+ // Compensate for implicit shift.
+ Addend <<= ImplicitShift;
+ break;
+ }
+ }
+ return Addend;
+ }
+
+ /// Extract the addend encoded in the instruction.
+ void encodeAddend(uint8_t *LocalAddress, unsigned NumBytes,
+ MachO::RelocationInfoType RelType, int64_t Addend) const {
+ // Verify that the relocation has the correct alignment.
+ switch (RelType) {
+ default:
+ llvm_unreachable("Unsupported relocation type!");
+ case MachO::ARM64_RELOC_UNSIGNED:
+ assert((NumBytes == 4 || NumBytes == 8) && "Invalid relocation size.");
+ break;
+ case MachO::ARM64_RELOC_BRANCH26:
+ case MachO::ARM64_RELOC_PAGE21:
+ case MachO::ARM64_RELOC_PAGEOFF12:
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
+ assert(NumBytes == 4 && "Invalid relocation size.");
+ assert((((uintptr_t)LocalAddress & 0x3) == 0) &&
+ "Instruction address is not aligned to 4 bytes.");
+ break;
+ }
+
+ switch (RelType) {
+ default:
+ llvm_unreachable("Unsupported relocation type!");
+ case MachO::ARM64_RELOC_UNSIGNED:
+ // This could be an unaligned memory location.
+ if (NumBytes == 4)
+ *reinterpret_cast<support::ulittle32_t *>(LocalAddress) = Addend;
+ else
+ *reinterpret_cast<support::ulittle64_t *>(LocalAddress) = Addend;
+ break;
+ case MachO::ARM64_RELOC_BRANCH26: {
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ // Verify that the relocation points to the expected branch instruction.
+ assert((*p & 0xFC000000) == 0x14000000 && "Expected branch instruction.");
+
+ // Verify addend value.
+ assert((Addend & 0x3) == 0 && "Branch target is not aligned");
+ assert(isInt<28>(Addend) && "Branch target is out of range.");
+
+ // Encode the addend as 26 bit immediate in the branch instruction.
+ *p = (*p & 0xFC000000) | ((uint32_t)(Addend >> 2) & 0x03FFFFFF);
+ break;
+ }
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
+ case MachO::ARM64_RELOC_PAGE21: {
+ // Verify that the relocation points to the expected adrp instruction.
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ assert((*p & 0x9F000000) == 0x90000000 && "Expected adrp instruction.");
+
+ // Check that the addend fits into 21 bits (+ 12 lower bits).
+ assert((Addend & 0xFFF) == 0 && "ADRP target is not page aligned.");
+ assert(isInt<33>(Addend) && "Invalid page reloc value.");
+
+ // Encode the addend into the instruction.
+ uint32_t ImmLoValue = (uint32_t)(Addend << 17) & 0x60000000;
+ uint32_t ImmHiValue = (uint32_t)(Addend >> 9) & 0x00FFFFE0;
+ *p = (*p & 0x9F00001F) | ImmHiValue | ImmLoValue;
+ break;
+ }
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: {
+ // Verify that the relocation points to one of the expected load / store
+ // instructions.
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ assert((*p & 0x3B000000) == 0x39000000 &&
+ "Only expected load / store instructions.");
+ (void)p;
+ } // fall-through
+ case MachO::ARM64_RELOC_PAGEOFF12: {
+ // Verify that the relocation points to one of the expected load / store
+ // or add / sub instructions.
+ auto *p = reinterpret_cast<support::aligned_ulittle32_t *>(LocalAddress);
+ assert((((*p & 0x3B000000) == 0x39000000) ||
+ ((*p & 0x11C00000) == 0x11000000) ) &&
+ "Expected load / store or add/sub instruction.");
+
+ // Check which instruction we are decoding to obtain the implicit shift
+ // factor of the instruction and verify alignment.
+ int ImplicitShift = 0;
+ if ((*p & 0x3B000000) == 0x39000000) { // << load / store
+ // For load / store instructions the size is encoded in bits 31:30.
+ ImplicitShift = ((*p >> 30) & 0x3);
+ switch (ImplicitShift) {
+ case 0:
+ // Check if this a vector op to get the correct shift value.
+ if ((*p & 0x04800000) == 0x04800000) {
+ ImplicitShift = 4;
+ assert(((Addend & 0xF) == 0) &&
+ "128-bit LDR/STR not 16-byte aligned.");
+ }
+ break;
+ case 1:
+ assert(((Addend & 0x1) == 0) && "16-bit LDR/STR not 2-byte aligned.");
+ break;
+ case 2:
+ assert(((Addend & 0x3) == 0) && "32-bit LDR/STR not 4-byte aligned.");
+ break;
+ case 3:
+ assert(((Addend & 0x7) == 0) && "64-bit LDR/STR not 8-byte aligned.");
+ break;
+ }
+ }
+ // Compensate for implicit shift.
+ Addend >>= ImplicitShift;
+ assert(isUInt<12>(Addend) && "Addend cannot be encoded.");
+
+ // Encode the addend into the instruction.
+ *p = (*p & 0xFFC003FF) | ((uint32_t)(Addend << 10) & 0x003FFC00);
+ break;
+ }
+ }
+ }
+
+ relocation_iterator
+ processRelocationRef(unsigned SectionID, relocation_iterator RelI,
+ ObjectImage &ObjImg, ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols, StubMap &Stubs) override {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile &>(*ObjImg.getObjectFile());
+ MachO::any_relocation_info RelInfo =
+ Obj.getRelocation(RelI->getRawDataRefImpl());
+
+ assert(!Obj.isRelocationScattered(RelInfo) && "");
+
+ // ARM64 has an ARM64_RELOC_ADDEND relocation type that carries an explicit
+ // addend for the following relocation. If found: (1) store the associated
+ // addend, (2) consume the next relocation, and (3) use the stored addend to
+ // override the addend.
+ int64_t ExplicitAddend = 0;
+ if (Obj.getAnyRelocationType(RelInfo) == MachO::ARM64_RELOC_ADDEND) {
+ assert(!Obj.getPlainRelocationExternal(RelInfo));
+ assert(!Obj.getAnyRelocationPCRel(RelInfo));
+ assert(Obj.getAnyRelocationLength(RelInfo) == 2);
+ int64_t RawAddend = Obj.getPlainRelocationSymbolNum(RelInfo);
+ // Sign-extend the 24-bit to 64-bit.
+ ExplicitAddend = SignExtend64(RawAddend, 24);
+ ++RelI;
+ RelInfo = Obj.getRelocation(RelI->getRawDataRefImpl());
+ }
+
+ RelocationEntry RE(getRelocationEntry(SectionID, ObjImg, RelI));
+ RE.Addend = decodeAddend(RE);
+ RelocationValueRef Value(
+ getRelocationValueRef(ObjImg, RelI, RE, ObjSectionToID, Symbols));
+
+ assert((ExplicitAddend == 0 || RE.Addend == 0) && "Relocation has "\
+ "ARM64_RELOC_ADDEND and embedded addend in the instruction.");
+ if (ExplicitAddend) {
+ RE.Addend = ExplicitAddend;
+ Value.Offset = ExplicitAddend;
+ }
+
+ bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
+ if (!IsExtern && RE.IsPCRel)
+ makeValueAddendPCRel(Value, ObjImg, RelI, 1 << RE.Size);
+
+ RE.Addend = Value.Offset;
+
+ if (RE.RelType == MachO::ARM64_RELOC_GOT_LOAD_PAGE21 ||
+ RE.RelType == MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12)
+ processGOTRelocation(RE, Value, Stubs);
+ else {
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+
+ return ++RelI;
+ }
+
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ DEBUG(dumpRelocationToResolve(RE, Value));
+
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *LocalAddress = Section.Address + RE.Offset;
+ MachO::RelocationInfoType RelType =
+ static_cast<MachO::RelocationInfoType>(RE.RelType);
+
+ switch (RelType) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case MachO::ARM64_RELOC_UNSIGNED: {
+ assert(!RE.IsPCRel && "PCRel and ARM64_RELOC_UNSIGNED not supported");
+ // Mask in the target value a byte at a time (we don't have an alignment
+ // guarantee for the target address, so this is safest).
+ if (RE.Size < 2)
+ llvm_unreachable("Invalid size for ARM64_RELOC_UNSIGNED");
+
+ encodeAddend(LocalAddress, 1 << RE.Size, RelType, Value + RE.Addend);
+ break;
+ }
+ case MachO::ARM64_RELOC_BRANCH26: {
+ assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_BRANCH26 not supported");
+ // Check if branch is in range.
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ int64_t PCRelVal = Value - FinalAddress + RE.Addend;
+ encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal);
+ break;
+ }
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
+ case MachO::ARM64_RELOC_PAGE21: {
+ assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_PAGE21 not supported");
+ // Adjust for PC-relative relocation and offset.
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ int64_t PCRelVal =
+ ((Value + RE.Addend) & (-4096)) - (FinalAddress & (-4096));
+ encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal);
+ break;
+ }
+ case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
+ case MachO::ARM64_RELOC_PAGEOFF12: {
+ assert(!RE.IsPCRel && "PCRel and ARM64_RELOC_PAGEOFF21 not supported");
+ // Add the offset from the symbol.
+ Value += RE.Addend;
+ // Mask out the page address and only use the lower 12 bits.
+ Value &= 0xFFF;
+ encodeAddend(LocalAddress, /*Size=*/4, RelType, Value);
+ break;
+ }
+ case MachO::ARM64_RELOC_SUBTRACTOR:
+ case MachO::ARM64_RELOC_POINTER_TO_GOT:
+ case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
+ case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
+ llvm_unreachable("Relocation type not yet implemented!");
+ case MachO::ARM64_RELOC_ADDEND:
+ llvm_unreachable("ARM64_RELOC_ADDEND should have been handeled by "
+ "processRelocationRef!");
+ }
+ }
+
+ void finalizeSection(ObjectImage &ObjImg, unsigned SectionID,
+ const SectionRef &Section) {}
+
+private:
+ void processGOTRelocation(const RelocationEntry &RE,
+ RelocationValueRef &Value, StubMap &Stubs) {
+ assert(RE.Size == 2);
+ SectionEntry &Section = Sections[RE.SectionID];
+ StubMap::const_iterator i = Stubs.find(Value);
+ int64_t Offset;
+ if (i != Stubs.end())
+ Offset = static_cast<int64_t>(i->second);
+ else {
+ // FIXME: There must be a better way to do this then to check and fix the
+ // alignment every time!!!
+ uintptr_t BaseAddress = uintptr_t(Section.Address);
+ uintptr_t StubAlignment = getStubAlignment();
+ uintptr_t StubAddress =
+ (BaseAddress + Section.StubOffset + StubAlignment - 1) &
+ -StubAlignment;
+ unsigned StubOffset = StubAddress - BaseAddress;
+ Stubs[Value] = StubOffset;
+ assert(((StubAddress % getStubAlignment()) == 0) &&
+ "GOT entry not aligned");
+ RelocationEntry GOTRE(RE.SectionID, StubOffset,
+ MachO::ARM64_RELOC_UNSIGNED, Value.Offset,
+ /*IsPCRel=*/false, /*Size=*/3);
+ if (Value.SymbolName)
+ addRelocationForSymbol(GOTRE, Value.SymbolName);
+ else
+ addRelocationForSection(GOTRE, Value.SectionID);
+ Section.StubOffset = StubOffset + getMaxStubSize();
+ Offset = static_cast<int64_t>(StubOffset);
+ }
+ RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, Offset,
+ RE.IsPCRel, RE.Size);
+ addRelocationForSection(TargetRE, RE.SectionID);
+ }
+};
+}
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
new file mode 100644
index 0000000..9766751
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -0,0 +1,277 @@
+//===----- RuntimeDyldMachOARM.h ---- MachO/ARM specific code. ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOARM_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOARM_H
+
+#include "../RuntimeDyldMachO.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldMachOARM
+ : public RuntimeDyldMachOCRTPBase<RuntimeDyldMachOARM> {
+private:
+ typedef RuntimeDyldMachOCRTPBase<RuntimeDyldMachOARM> ParentT;
+
+public:
+
+ typedef uint32_t TargetPtrT;
+
+ RuntimeDyldMachOARM(RTDyldMemoryManager *MM) : RuntimeDyldMachOCRTPBase(MM) {}
+
+ unsigned getMaxStubSize() override { return 8; }
+
+ unsigned getStubAlignment() override { return 4; }
+
+ int64_t decodeAddend(const RelocationEntry &RE) const {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *LocalAddress = Section.Address + RE.Offset;
+
+ switch (RE.RelType) {
+ default:
+ return memcpyAddend(RE);
+ case MachO::ARM_RELOC_BR24: {
+ uint32_t Temp = readBytesUnaligned(LocalAddress, 4);
+ Temp &= 0x00ffffff; // Mask out the opcode.
+ // Now we've got the shifted immediate, shift by 2, sign extend and ret.
+ return SignExtend32<26>(Temp << 2);
+ }
+ }
+ }
+
+ relocation_iterator
+ processRelocationRef(unsigned SectionID, relocation_iterator RelI,
+ ObjectImage &ObjImg, ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols, StubMap &Stubs) override {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile &>(*ObjImg.getObjectFile());
+ MachO::any_relocation_info RelInfo =
+ Obj.getRelocation(RelI->getRawDataRefImpl());
+ uint32_t RelType = Obj.getAnyRelocationType(RelInfo);
+
+ if (Obj.isRelocationScattered(RelInfo)) {
+ if (RelType == MachO::ARM_RELOC_HALF_SECTDIFF)
+ return processHALFSECTDIFFRelocation(SectionID, RelI, ObjImg,
+ ObjSectionToID);
+ else
+ return ++++RelI;
+ }
+
+ RelocationEntry RE(getRelocationEntry(SectionID, ObjImg, RelI));
+ RE.Addend = decodeAddend(RE);
+ RelocationValueRef Value(
+ getRelocationValueRef(ObjImg, RelI, RE, ObjSectionToID, Symbols));
+
+ if (RE.IsPCRel)
+ makeValueAddendPCRel(Value, ObjImg, RelI, 8);
+
+ if ((RE.RelType & 0xf) == MachO::ARM_RELOC_BR24)
+ processBranchRelocation(RE, Value, Stubs);
+ else {
+ RE.Addend = Value.Offset;
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+
+ return ++RelI;
+ }
+
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ DEBUG(dumpRelocationToResolve(RE, Value));
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *LocalAddress = Section.Address + RE.Offset;
+
+ // If the relocation is PC-relative, the value to be encoded is the
+ // pointer difference.
+ if (RE.IsPCRel) {
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ Value -= FinalAddress;
+ // ARM PCRel relocations have an effective-PC offset of two instructions
+ // (four bytes in Thumb mode, 8 bytes in ARM mode).
+ // FIXME: For now, assume ARM mode.
+ Value -= 8;
+ }
+
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case MachO::ARM_RELOC_VANILLA:
+ writeBytesUnaligned(Value + RE.Addend, LocalAddress, 1 << RE.Size);
+ break;
+ case MachO::ARM_RELOC_BR24: {
+ // Mask the value into the target address. We know instructions are
+ // 32-bit aligned, so we can do it all at once.
+ Value += RE.Addend;
+ // The low two bits of the value are not encoded.
+ Value >>= 2;
+ // Mask the value to 24 bits.
+ uint64_t FinalValue = Value & 0xffffff;
+ // FIXME: If the destination is a Thumb function (and the instruction
+ // is a non-predicated BL instruction), we need to change it to a BLX
+ // instruction instead.
+
+ // Insert the value into the instruction.
+ uint32_t Temp = readBytesUnaligned(LocalAddress, 4);
+ writeBytesUnaligned((Temp & ~0xffffff) | FinalValue, LocalAddress, 4);
+
+ break;
+ }
+ case MachO::ARM_RELOC_HALF_SECTDIFF: {
+ uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
+ uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
+ assert((Value == SectionABase || Value == SectionBBase) &&
+ "Unexpected HALFSECTDIFF relocation value.");
+ Value = SectionABase - SectionBBase + RE.Addend;
+ if (RE.Size & 0x1) // :upper16:
+ Value = (Value >> 16);
+ Value &= 0xffff;
+
+ uint32_t Insn = readBytesUnaligned(LocalAddress, 4);
+ Insn = (Insn & 0xfff0f000) | ((Value & 0xf000) << 4) | (Value & 0x0fff);
+ writeBytesUnaligned(Insn, LocalAddress, 4);
+ break;
+ }
+
+ case MachO::ARM_THUMB_RELOC_BR22:
+ case MachO::ARM_THUMB_32BIT_BRANCH:
+ case MachO::ARM_RELOC_HALF:
+ case MachO::ARM_RELOC_PAIR:
+ case MachO::ARM_RELOC_SECTDIFF:
+ case MachO::ARM_RELOC_LOCAL_SECTDIFF:
+ case MachO::ARM_RELOC_PB_LA_PTR:
+ Error("Relocation type not implemented yet!");
+ return;
+ }
+ }
+
+ void finalizeSection(ObjectImage &ObjImg, unsigned SectionID,
+ const SectionRef &Section) {
+ StringRef Name;
+ Section.getName(Name);
+
+ if (Name == "__nl_symbol_ptr")
+ populateIndirectSymbolPointersSection(
+ cast<MachOObjectFile>(*ObjImg.getObjectFile()),
+ Section, SectionID);
+ }
+
+private:
+
+ void processBranchRelocation(const RelocationEntry &RE,
+ const RelocationValueRef &Value,
+ StubMap &Stubs) {
+ // This is an ARM branch relocation, need to use a stub function.
+ // Look up for existing stub.
+ SectionEntry &Section = Sections[RE.SectionID];
+ RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value);
+ uint8_t *Addr;
+ if (i != Stubs.end()) {
+ Addr = Section.Address + i->second;
+ } else {
+ // Create a new stub function.
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr =
+ createStubFunction(Section.Address + Section.StubOffset);
+ RelocationEntry StubRE(RE.SectionID, StubTargetAddr - Section.Address,
+ MachO::GENERIC_RELOC_VANILLA, Value.Offset, false,
+ 2);
+ if (Value.SymbolName)
+ addRelocationForSymbol(StubRE, Value.SymbolName);
+ else
+ addRelocationForSection(StubRE, Value.SectionID);
+ Addr = Section.Address + Section.StubOffset;
+ Section.StubOffset += getMaxStubSize();
+ }
+ RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, 0,
+ RE.IsPCRel, RE.Size);
+ resolveRelocation(TargetRE, (uint64_t)Addr);
+ }
+
+ relocation_iterator
+ processHALFSECTDIFFRelocation(unsigned SectionID, relocation_iterator RelI,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID) {
+ const MachOObjectFile *MachO =
+ static_cast<const MachOObjectFile *>(Obj.getObjectFile());
+ MachO::any_relocation_info RE =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+
+
+ // For a half-diff relocation the length bits actually record whether this
+ // is a movw/movt, and whether this is arm or thumb.
+ // Bit 0 indicates movw (b0 == 0) or movt (b0 == 1).
+ // Bit 1 indicates arm (b1 == 0) or thumb (b1 == 1).
+ unsigned HalfDiffKindBits = MachO->getAnyRelocationLength(RE);
+ if (HalfDiffKindBits & 0x2)
+ llvm_unreachable("Thumb not yet supported.");
+
+ SectionEntry &Section = Sections[SectionID];
+ uint32_t RelocType = MachO->getAnyRelocationType(RE);
+ bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+ uint64_t Offset;
+ RelI->getOffset(Offset);
+ uint8_t *LocalAddress = Section.Address + Offset;
+ int64_t Immediate = readBytesUnaligned(LocalAddress, 4); // Copy the whole instruction out.
+ Immediate = ((Immediate >> 4) & 0xf000) | (Immediate & 0xfff);
+
+ ++RelI;
+ MachO::any_relocation_info RE2 =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+ uint32_t AddrA = MachO->getScatteredRelocationValue(RE);
+ section_iterator SAI = getSectionByAddress(*MachO, AddrA);
+ assert(SAI != MachO->section_end() && "Can't find section for address A");
+ uint64_t SectionABase = SAI->getAddress();
+ uint64_t SectionAOffset = AddrA - SectionABase;
+ SectionRef SectionA = *SAI;
+ bool IsCode = SectionA.isText();
+ uint32_t SectionAID =
+ findOrEmitSection(Obj, SectionA, IsCode, ObjSectionToID);
+
+ uint32_t AddrB = MachO->getScatteredRelocationValue(RE2);
+ section_iterator SBI = getSectionByAddress(*MachO, AddrB);
+ assert(SBI != MachO->section_end() && "Can't find section for address B");
+ uint64_t SectionBBase = SBI->getAddress();
+ uint64_t SectionBOffset = AddrB - SectionBBase;
+ SectionRef SectionB = *SBI;
+ uint32_t SectionBID =
+ findOrEmitSection(Obj, SectionB, IsCode, ObjSectionToID);
+
+ uint32_t OtherHalf = MachO->getAnyRelocationAddress(RE2) & 0xffff;
+ unsigned Shift = (HalfDiffKindBits & 0x1) ? 16 : 0;
+ uint32_t FullImmVal = (Immediate << Shift) | (OtherHalf << (16 - Shift));
+ int64_t Addend = FullImmVal - (AddrA - AddrB);
+
+ // addend = Encoded - Expected
+ // = Encoded - (AddrA - AddrB)
+
+ DEBUG(dbgs() << "Found SECTDIFF: AddrA: " << AddrA << ", AddrB: " << AddrB
+ << ", Addend: " << Addend << ", SectionA ID: " << SectionAID
+ << ", SectionAOffset: " << SectionAOffset
+ << ", SectionB ID: " << SectionBID
+ << ", SectionBOffset: " << SectionBOffset << "\n");
+ RelocationEntry R(SectionID, Offset, RelocType, Addend, SectionAID,
+ SectionAOffset, SectionBID, SectionBOffset, IsPCRel,
+ HalfDiffKindBits);
+
+ addRelocationForSection(R, SectionAID);
+ addRelocationForSection(R, SectionBID);
+
+ return ++RelI;
+ }
+
+};
+}
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
new file mode 100644
index 0000000..258b847
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -0,0 +1,261 @@
+//===---- RuntimeDyldMachOI386.h ---- MachO/I386 specific code. ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOI386_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOI386_H
+
+#include "../RuntimeDyldMachO.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldMachOI386
+ : public RuntimeDyldMachOCRTPBase<RuntimeDyldMachOI386> {
+public:
+
+ typedef uint32_t TargetPtrT;
+
+ RuntimeDyldMachOI386(RTDyldMemoryManager *MM)
+ : RuntimeDyldMachOCRTPBase(MM) {}
+
+ unsigned getMaxStubSize() override { return 0; }
+
+ unsigned getStubAlignment() override { return 1; }
+
+ relocation_iterator
+ processRelocationRef(unsigned SectionID, relocation_iterator RelI,
+ ObjectImage &ObjImg, ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols, StubMap &Stubs) override {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile &>(*ObjImg.getObjectFile());
+ MachO::any_relocation_info RelInfo =
+ Obj.getRelocation(RelI->getRawDataRefImpl());
+ uint32_t RelType = Obj.getAnyRelocationType(RelInfo);
+
+ if (Obj.isRelocationScattered(RelInfo)) {
+ if (RelType == MachO::GENERIC_RELOC_SECTDIFF ||
+ RelType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)
+ return processSECTDIFFRelocation(SectionID, RelI, ObjImg,
+ ObjSectionToID);
+ else if (RelType == MachO::GENERIC_RELOC_VANILLA)
+ return processI386ScatteredVANILLA(SectionID, RelI, ObjImg,
+ ObjSectionToID);
+ llvm_unreachable("Unhandled scattered relocation.");
+ }
+
+ RelocationEntry RE(getRelocationEntry(SectionID, ObjImg, RelI));
+ RE.Addend = memcpyAddend(RE);
+ RelocationValueRef Value(
+ getRelocationValueRef(ObjImg, RelI, RE, ObjSectionToID, Symbols));
+
+ // Addends for external, PC-rel relocations on i386 point back to the zero
+ // offset. Calculate the final offset from the relocation target instead.
+ // This allows us to use the same logic for both external and internal
+ // relocations in resolveI386RelocationRef.
+ // bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
+ // if (IsExtern && RE.IsPCRel) {
+ // uint64_t RelocAddr = 0;
+ // RelI->getAddress(RelocAddr);
+ // Value.Addend += RelocAddr + 4;
+ // }
+ if (RE.IsPCRel)
+ makeValueAddendPCRel(Value, ObjImg, RelI, 1 << RE.Size);
+
+ RE.Addend = Value.Offset;
+
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+
+ return ++RelI;
+ }
+
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ DEBUG(dumpRelocationToResolve(RE, Value));
+
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *LocalAddress = Section.Address + RE.Offset;
+
+ if (RE.IsPCRel) {
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation.
+ }
+
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case MachO::GENERIC_RELOC_VANILLA:
+ writeBytesUnaligned(Value + RE.Addend, LocalAddress, 1 << RE.Size);
+ break;
+ case MachO::GENERIC_RELOC_SECTDIFF:
+ case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
+ uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
+ uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
+ assert((Value == SectionABase || Value == SectionBBase) &&
+ "Unexpected SECTDIFF relocation value.");
+ Value = SectionABase - SectionBBase + RE.Addend;
+ writeBytesUnaligned(Value, LocalAddress, 1 << RE.Size);
+ break;
+ }
+ case MachO::GENERIC_RELOC_PB_LA_PTR:
+ Error("Relocation type not implemented yet!");
+ }
+ }
+
+ void finalizeSection(ObjectImage &ObjImg, unsigned SectionID,
+ const SectionRef &Section) {
+ StringRef Name;
+ Section.getName(Name);
+
+ if (Name == "__jump_table")
+ populateJumpTable(cast<MachOObjectFile>(*ObjImg.getObjectFile()), Section,
+ SectionID);
+ else if (Name == "__pointers")
+ populateIndirectSymbolPointersSection(
+ cast<MachOObjectFile>(*ObjImg.getObjectFile()),
+ Section, SectionID);
+ }
+
+private:
+ relocation_iterator
+ processSECTDIFFRelocation(unsigned SectionID, relocation_iterator RelI,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID) {
+ const MachOObjectFile *MachO =
+ static_cast<const MachOObjectFile *>(Obj.getObjectFile());
+ MachO::any_relocation_info RE =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+
+ SectionEntry &Section = Sections[SectionID];
+ uint32_t RelocType = MachO->getAnyRelocationType(RE);
+ bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+ unsigned Size = MachO->getAnyRelocationLength(RE);
+ uint64_t Offset;
+ RelI->getOffset(Offset);
+ uint8_t *LocalAddress = Section.Address + Offset;
+ unsigned NumBytes = 1 << Size;
+ uint64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
+
+ ++RelI;
+ MachO::any_relocation_info RE2 =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+
+ uint32_t AddrA = MachO->getScatteredRelocationValue(RE);
+ section_iterator SAI = getSectionByAddress(*MachO, AddrA);
+ assert(SAI != MachO->section_end() && "Can't find section for address A");
+ uint64_t SectionABase = SAI->getAddress();
+ uint64_t SectionAOffset = AddrA - SectionABase;
+ SectionRef SectionA = *SAI;
+ bool IsCode = SectionA.isText();
+ uint32_t SectionAID =
+ findOrEmitSection(Obj, SectionA, IsCode, ObjSectionToID);
+
+ uint32_t AddrB = MachO->getScatteredRelocationValue(RE2);
+ section_iterator SBI = getSectionByAddress(*MachO, AddrB);
+ assert(SBI != MachO->section_end() && "Can't find section for address B");
+ uint64_t SectionBBase = SBI->getAddress();
+ uint64_t SectionBOffset = AddrB - SectionBBase;
+ SectionRef SectionB = *SBI;
+ uint32_t SectionBID =
+ findOrEmitSection(Obj, SectionB, IsCode, ObjSectionToID);
+
+ if (Addend != AddrA - AddrB)
+ Error("Unexpected SECTDIFF relocation addend.");
+
+ DEBUG(dbgs() << "Found SECTDIFF: AddrA: " << AddrA << ", AddrB: " << AddrB
+ << ", Addend: " << Addend << ", SectionA ID: " << SectionAID
+ << ", SectionAOffset: " << SectionAOffset
+ << ", SectionB ID: " << SectionBID
+ << ", SectionBOffset: " << SectionBOffset << "\n");
+ RelocationEntry R(SectionID, Offset, RelocType, 0, SectionAID,
+ SectionAOffset, SectionBID, SectionBOffset, IsPCRel,
+ Size);
+
+ addRelocationForSection(R, SectionAID);
+ addRelocationForSection(R, SectionBID);
+
+ return ++RelI;
+ }
+
+ relocation_iterator processI386ScatteredVANILLA(
+ unsigned SectionID, relocation_iterator RelI, ObjectImage &Obj,
+ RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID) {
+ const MachOObjectFile *MachO =
+ static_cast<const MachOObjectFile *>(Obj.getObjectFile());
+ MachO::any_relocation_info RE =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+
+ SectionEntry &Section = Sections[SectionID];
+ uint32_t RelocType = MachO->getAnyRelocationType(RE);
+ bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+ unsigned Size = MachO->getAnyRelocationLength(RE);
+ uint64_t Offset;
+ RelI->getOffset(Offset);
+ uint8_t *LocalAddress = Section.Address + Offset;
+ unsigned NumBytes = 1 << Size;
+ int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
+
+ unsigned SymbolBaseAddr = MachO->getScatteredRelocationValue(RE);
+ section_iterator TargetSI = getSectionByAddress(*MachO, SymbolBaseAddr);
+ assert(TargetSI != MachO->section_end() && "Can't find section for symbol");
+ uint64_t SectionBaseAddr = TargetSI->getAddress();
+ SectionRef TargetSection = *TargetSI;
+ bool IsCode = TargetSection.isText();
+ uint32_t TargetSectionID =
+ findOrEmitSection(Obj, TargetSection, IsCode, ObjSectionToID);
+
+ Addend -= SectionBaseAddr;
+ RelocationEntry R(SectionID, Offset, RelocType, Addend, IsPCRel, Size);
+
+ addRelocationForSection(R, TargetSectionID);
+
+ return ++RelI;
+ }
+
+ // Populate stubs in __jump_table section.
+ void populateJumpTable(MachOObjectFile &Obj, const SectionRef &JTSection,
+ unsigned JTSectionID) {
+ assert(!Obj.is64Bit() &&
+ "__jump_table section not supported in 64-bit MachO.");
+
+ MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand();
+ MachO::section Sec32 = Obj.getSection(JTSection.getRawDataRefImpl());
+ uint32_t JTSectionSize = Sec32.size;
+ unsigned FirstIndirectSymbol = Sec32.reserved1;
+ unsigned JTEntrySize = Sec32.reserved2;
+ unsigned NumJTEntries = JTSectionSize / JTEntrySize;
+ uint8_t *JTSectionAddr = getSectionAddress(JTSectionID);
+ unsigned JTEntryOffset = 0;
+
+ assert((JTSectionSize % JTEntrySize) == 0 &&
+ "Jump-table section does not contain a whole number of stubs?");
+
+ for (unsigned i = 0; i < NumJTEntries; ++i) {
+ unsigned SymbolIndex =
+ Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
+ symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
+ StringRef IndirectSymbolName;
+ SI->getName(IndirectSymbolName);
+ uint8_t *JTEntryAddr = JTSectionAddr + JTEntryOffset;
+ createStubFunction(JTEntryAddr);
+ RelocationEntry RE(JTSectionID, JTEntryOffset + 1,
+ MachO::GENERIC_RELOC_VANILLA, 0, true, 2);
+ addRelocationForSymbol(RE, IndirectSymbolName);
+ JTEntryOffset += JTEntrySize;
+ }
+ }
+
+};
+}
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
new file mode 100644
index 0000000..84d9e80
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -0,0 +1,136 @@
+//===-- RuntimeDyldMachOX86_64.h ---- MachO/X86_64 specific code. -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOX86_64_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOX86_64_H
+
+#include "../RuntimeDyldMachO.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldMachOX86_64
+ : public RuntimeDyldMachOCRTPBase<RuntimeDyldMachOX86_64> {
+public:
+
+ typedef uint64_t TargetPtrT;
+
+ RuntimeDyldMachOX86_64(RTDyldMemoryManager *MM)
+ : RuntimeDyldMachOCRTPBase(MM) {}
+
+ unsigned getMaxStubSize() override { return 8; }
+
+ unsigned getStubAlignment() override { return 1; }
+
+ relocation_iterator
+ processRelocationRef(unsigned SectionID, relocation_iterator RelI,
+ ObjectImage &ObjImg, ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols, StubMap &Stubs) override {
+ const MachOObjectFile &Obj =
+ static_cast<const MachOObjectFile &>(*ObjImg.getObjectFile());
+ MachO::any_relocation_info RelInfo =
+ Obj.getRelocation(RelI->getRawDataRefImpl());
+
+ assert(!Obj.isRelocationScattered(RelInfo) &&
+ "Scattered relocations not supported on X86_64");
+
+ RelocationEntry RE(getRelocationEntry(SectionID, ObjImg, RelI));
+ RE.Addend = memcpyAddend(RE);
+ RelocationValueRef Value(
+ getRelocationValueRef(ObjImg, RelI, RE, ObjSectionToID, Symbols));
+
+ bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
+ if (!IsExtern && RE.IsPCRel)
+ makeValueAddendPCRel(Value, ObjImg, RelI, 1 << RE.Size);
+
+ if (RE.RelType == MachO::X86_64_RELOC_GOT ||
+ RE.RelType == MachO::X86_64_RELOC_GOT_LOAD)
+ processGOTRelocation(RE, Value, Stubs);
+ else {
+ RE.Addend = Value.Offset;
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+
+ return ++RelI;
+ }
+
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ DEBUG(dumpRelocationToResolve(RE, Value));
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *LocalAddress = Section.Address + RE.Offset;
+
+ // If the relocation is PC-relative, the value to be encoded is the
+ // pointer difference.
+ if (RE.IsPCRel) {
+ // FIXME: It seems this value needs to be adjusted by 4 for an effective
+ // PC address. Is that expected? Only for branches, perhaps?
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ Value -= FinalAddress + 4;
+ }
+
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case MachO::X86_64_RELOC_SIGNED_1:
+ case MachO::X86_64_RELOC_SIGNED_2:
+ case MachO::X86_64_RELOC_SIGNED_4:
+ case MachO::X86_64_RELOC_SIGNED:
+ case MachO::X86_64_RELOC_UNSIGNED:
+ case MachO::X86_64_RELOC_BRANCH:
+ writeBytesUnaligned(Value + RE.Addend, LocalAddress, 1 << RE.Size);
+ break;
+ case MachO::X86_64_RELOC_GOT_LOAD:
+ case MachO::X86_64_RELOC_GOT:
+ case MachO::X86_64_RELOC_SUBTRACTOR:
+ case MachO::X86_64_RELOC_TLV:
+ Error("Relocation type not implemented yet!");
+ }
+ }
+
+ void finalizeSection(ObjectImage &ObjImg, unsigned SectionID,
+ const SectionRef &Section) {}
+
+private:
+ void processGOTRelocation(const RelocationEntry &RE,
+ RelocationValueRef &Value, StubMap &Stubs) {
+ SectionEntry &Section = Sections[RE.SectionID];
+ assert(RE.IsPCRel);
+ assert(RE.Size == 2);
+ Value.Offset -= RE.Addend;
+ RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value);
+ uint8_t *Addr;
+ if (i != Stubs.end()) {
+ Addr = Section.Address + i->second;
+ } else {
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *GOTEntry = Section.Address + Section.StubOffset;
+ RelocationEntry GOTRE(RE.SectionID, Section.StubOffset,
+ MachO::X86_64_RELOC_UNSIGNED, Value.Offset, false,
+ 3);
+ if (Value.SymbolName)
+ addRelocationForSymbol(GOTRE, Value.SymbolName);
+ else
+ addRelocationForSection(GOTRE, Value.SectionID);
+ Section.StubOffset += 8;
+ Addr = GOTEntry;
+ }
+ RelocationEntry TargetRE(RE.SectionID, RE.Offset,
+ MachO::X86_64_RELOC_UNSIGNED, RE.Addend, true, 2);
+ resolveRelocation(TargetRE, (uint64_t)Addr);
+ }
+};
+}
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index b10d51f..e6679cf 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -30,7 +30,7 @@ TargetMachine *EngineBuilder::selectTarget() {
// MCJIT can generate code for remote targets, but the old JIT and Interpreter
// must use the host architecture.
- if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M)
+ if (WhichEngine != EngineKind::Interpreter && M)
TT.setTriple(M->getTargetTriple());
return selectTarget(TT, MArch, MCPU, MAttrs);
@@ -89,8 +89,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
}
// FIXME: non-iOS ARM FastISel is broken with MCJIT.
- if (UseMCJIT &&
- TheTriple.getArch() == Triple::arm &&
+ if (TheTriple.getArch() == Triple::arm &&
!TheTriple.isiOS() &&
OptLevel == CodeGenOpt::None) {
OptLevel = CodeGenOpt::Less;
diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk
index c51b241..a3632cf 100644
--- a/lib/IR/Android.mk
+++ b/lib/IR/Android.mk
@@ -41,6 +41,7 @@ vmcore_SRC_FILES := \
Type.cpp \
TypeFinder.cpp \
Use.cpp \
+ UseListOrder.cpp \
User.cpp \
Value.cpp \
ValueSymbolTable.cpp \
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index a7499bc..1961a20 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -49,6 +49,213 @@ AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
// Helper Functions
//===----------------------------------------------------------------------===//
+namespace {
+struct OrderMap {
+ DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
+
+ unsigned size() const { return IDs.size(); }
+ std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; }
+ std::pair<unsigned, bool> lookup(const Value *V) const {
+ return IDs.lookup(V);
+ }
+ void index(const Value *V) {
+ // Explicitly sequence get-size and insert-value operations to avoid UB.
+ unsigned ID = IDs.size() + 1;
+ IDs[V].first = ID;
+ }
+};
+}
+
+static void orderValue(const Value *V, OrderMap &OM) {
+ if (OM.lookup(V).first)
+ return;
+
+ if (const Constant *C = dyn_cast<Constant>(V))
+ if (C->getNumOperands() && !isa<GlobalValue>(C))
+ for (const Value *Op : C->operands())
+ if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
+ orderValue(Op, OM);
+
+ // Note: we cannot cache this lookup above, since inserting into the map
+ // changes the map's size, and thus affects the other IDs.
+ OM.index(V);
+}
+
+static OrderMap orderModule(const Module *M) {
+ // This needs to match the order used by ValueEnumerator::ValueEnumerator()
+ // and ValueEnumerator::incorporateFunction().
+ OrderMap OM;
+
+ for (const GlobalVariable &G : M->globals()) {
+ if (G.hasInitializer())
+ if (!isa<GlobalValue>(G.getInitializer()))
+ orderValue(G.getInitializer(), OM);
+ orderValue(&G, OM);
+ }
+ for (const GlobalAlias &A : M->aliases()) {
+ if (!isa<GlobalValue>(A.getAliasee()))
+ orderValue(A.getAliasee(), OM);
+ orderValue(&A, OM);
+ }
+ for (const Function &F : *M) {
+ if (F.hasPrefixData())
+ if (!isa<GlobalValue>(F.getPrefixData()))
+ orderValue(F.getPrefixData(), OM);
+ orderValue(&F, OM);
+
+ if (F.isDeclaration())
+ continue;
+
+ for (const Argument &A : F.args())
+ orderValue(&A, OM);
+ for (const BasicBlock &BB : F) {
+ orderValue(&BB, OM);
+ for (const Instruction &I : BB) {
+ for (const Value *Op : I.operands())
+ if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
+ isa<InlineAsm>(*Op))
+ orderValue(Op, OM);
+ orderValue(&I, OM);
+ }
+ }
+ }
+ return OM;
+}
+
+static void predictValueUseListOrderImpl(const Value *V, const Function *F,
+ unsigned ID, const OrderMap &OM,
+ UseListOrderStack &Stack) {
+ // Predict use-list order for this one.
+ typedef std::pair<const Use *, unsigned> Entry;
+ SmallVector<Entry, 64> List;
+ for (const Use &U : V->uses())
+ // Check if this user will be serialized.
+ if (OM.lookup(U.getUser()).first)
+ List.push_back(std::make_pair(&U, List.size()));
+
+ if (List.size() < 2)
+ // We may have lost some users.
+ return;
+
+ bool GetsReversed =
+ !isa<GlobalVariable>(V) && !isa<Function>(V) && !isa<BasicBlock>(V);
+ if (auto *BA = dyn_cast<BlockAddress>(V))
+ ID = OM.lookup(BA->getBasicBlock()).first;
+ std::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) {
+ const Use *LU = L.first;
+ const Use *RU = R.first;
+ if (LU == RU)
+ return false;
+
+ auto LID = OM.lookup(LU->getUser()).first;
+ auto RID = OM.lookup(RU->getUser()).first;
+
+ // If ID is 4, then expect: 7 6 5 1 2 3.
+ if (LID < RID) {
+ if (GetsReversed)
+ if (RID <= ID)
+ return true;
+ return false;
+ }
+ if (RID < LID) {
+ if (GetsReversed)
+ if (LID <= ID)
+ return false;
+ return true;
+ }
+
+ // LID and RID are equal, so we have different operands of the same user.
+ // Assume operands are added in order for all instructions.
+ if (GetsReversed)
+ if (LID <= ID)
+ return LU->getOperandNo() < RU->getOperandNo();
+ return LU->getOperandNo() > RU->getOperandNo();
+ });
+
+ if (std::is_sorted(
+ List.begin(), List.end(),
+ [](const Entry &L, const Entry &R) { return L.second < R.second; }))
+ // Order is already correct.
+ return;
+
+ // Store the shuffle.
+ Stack.emplace_back(V, F, List.size());
+ assert(List.size() == Stack.back().Shuffle.size() && "Wrong size");
+ for (size_t I = 0, E = List.size(); I != E; ++I)
+ Stack.back().Shuffle[I] = List[I].second;
+}
+
+static void predictValueUseListOrder(const Value *V, const Function *F,
+ OrderMap &OM, UseListOrderStack &Stack) {
+ auto &IDPair = OM[V];
+ assert(IDPair.first && "Unmapped value");
+ if (IDPair.second)
+ // Already predicted.
+ return;
+
+ // Do the actual prediction.
+ IDPair.second = true;
+ if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
+ predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
+
+ // Recursive descent into constants.
+ if (const Constant *C = dyn_cast<Constant>(V))
+ if (C->getNumOperands()) // Visit GlobalValues.
+ for (const Value *Op : C->operands())
+ if (isa<Constant>(Op)) // Visit GlobalValues.
+ predictValueUseListOrder(Op, F, OM, Stack);
+}
+
+static UseListOrderStack predictUseListOrder(const Module *M) {
+ OrderMap OM = orderModule(M);
+
+ // Use-list orders need to be serialized after all the users have been added
+ // to a value, or else the shuffles will be incomplete. Store them per
+ // function in a stack.
+ //
+ // Aside from function order, the order of values doesn't matter much here.
+ UseListOrderStack Stack;
+
+ // We want to visit the functions backward now so we can list function-local
+ // constants in the last Function they're used in. Module-level constants
+ // have already been visited above.
+ for (auto I = M->rbegin(), E = M->rend(); I != E; ++I) {
+ const Function &F = *I;
+ if (F.isDeclaration())
+ continue;
+ for (const BasicBlock &BB : F)
+ predictValueUseListOrder(&BB, &F, OM, Stack);
+ for (const Argument &A : F.args())
+ predictValueUseListOrder(&A, &F, OM, Stack);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ for (const Value *Op : I.operands())
+ if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
+ predictValueUseListOrder(Op, &F, OM, Stack);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
+ predictValueUseListOrder(&I, &F, OM, Stack);
+ }
+
+ // Visit globals last.
+ for (const GlobalVariable &G : M->globals())
+ predictValueUseListOrder(&G, nullptr, OM, Stack);
+ for (const Function &F : *M)
+ predictValueUseListOrder(&F, nullptr, OM, Stack);
+ for (const GlobalAlias &A : M->aliases())
+ predictValueUseListOrder(&A, nullptr, OM, Stack);
+ for (const GlobalVariable &G : M->globals())
+ if (G.hasInitializer())
+ predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
+ for (const GlobalAlias &A : M->aliases())
+ predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
+ for (const Function &F : *M)
+ if (F.hasPrefixData())
+ predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
+
+ return Stack;
+}
+
static const Module *getModuleFromVal(const Value *V) {
if (const Argument *MA = dyn_cast<Argument>(V))
return MA->getParent() ? MA->getParent()->getParent() : nullptr;
@@ -78,6 +285,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
+ case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
@@ -347,6 +555,8 @@ public:
FunctionProcessed = false;
}
+ const Function *getFunction() const { return TheFunction; }
+
/// After calling incorporateFunction, use this method to remove the
/// most recently incorporated function from the SlotTracker. This
/// will reset the state of the machine back to just the module contents.
@@ -508,7 +718,7 @@ void SlotTracker::processFunction() {
ST_DEBUG("Inserting Instructions:\n");
- SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDForInst;
// Add all of the basic blocks and instructions with no names.
for (Function::const_iterator BB = TheFunction->begin(),
@@ -1279,6 +1489,9 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
void AssemblyWriter::printModule(const Module *M) {
Machine.initialize();
+ if (shouldPreserveAssemblyUseListOrder())
+ UseListOrders = predictUseListOrder(M);
+
if (!M->getModuleIdentifier().empty() &&
// Don't print the ID if it will start a new line (which would
// require a comment char before it).
@@ -1339,9 +1552,13 @@ void AssemblyWriter::printModule(const Module *M) {
I != E; ++I)
printAlias(I);
+ // Output global use-lists.
+ printUseLists(nullptr);
+
// Output all of the functions.
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
printFunction(I);
+ assert(UseListOrders.empty() && "All use-lists should have been consumed");
// Output all attribute groups.
if (!Machine.as_empty()) {
@@ -1509,6 +1726,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
PrintLLVMName(Out, GA);
Out << " = ";
}
+ PrintLinkage(GA->getLinkage(), Out);
PrintVisibility(GA->getVisibility(), Out);
PrintDLLStorageClass(GA->getDLLStorageClass(), Out);
PrintThreadLocalModel(GA->getThreadLocalMode(), Out);
@@ -1517,8 +1735,6 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
Out << "alias ";
- PrintLinkage(GA->getLinkage(), Out);
-
const Constant *Aliasee = GA->getAliasee();
if (!Aliasee) {
@@ -1693,6 +1909,9 @@ void AssemblyWriter::printFunction(const Function *F) {
for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
printBasicBlock(I);
+ // Output the function's use-lists.
+ printUseLists(F);
+
Out << "}\n";
}
@@ -1956,6 +2175,14 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ";
writeParamOperand(CI->getArgOperand(op), PAL, op + 1);
}
+
+ // Emit an ellipsis if this is a musttail call in a vararg function. This
+ // is only to aid readability, musttail calls forward varargs by default.
+ if (CI->isMustTailCall() && CI->getParent() &&
+ CI->getParent()->getParent() &&
+ CI->getParent()->getParent()->isVarArg())
+ Out << ", ...";
+
Out << ')';
if (PAL.hasAttributes(AttributeSet::FunctionIndex))
Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
@@ -2088,7 +2315,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
}
// Print Metadata info.
- SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+ SmallVector<std::pair<unsigned, MDNode *>, 4> InstMD;
I.getAllMetadata(InstMD);
if (!InstMD.empty()) {
SmallVector<StringRef, 8> MDNames;
@@ -2114,7 +2341,7 @@ static void WriteMDNodeComment(const MDNode *Node,
return;
Value *Op = Node->getOperand(0);
- if (!Op || !isa<ConstantInt>(Op) || cast<ConstantInt>(Op)->getBitWidth() < 32)
+ if (!Op || !isa<MDString>(Op))
return;
DIDescriptor Desc(Node);
@@ -2170,6 +2397,45 @@ void AssemblyWriter::writeAllAttributeGroups() {
} // namespace llvm
+void AssemblyWriter::printUseListOrder(const UseListOrder &Order) {
+ bool IsInFunction = Machine.getFunction();
+ if (IsInFunction)
+ Out << " ";
+
+ Out << "uselistorder";
+ if (const BasicBlock *BB =
+ IsInFunction ? nullptr : dyn_cast<BasicBlock>(Order.V)) {
+ Out << "_bb ";
+ writeOperand(BB->getParent(), false);
+ Out << ", ";
+ writeOperand(BB, false);
+ } else {
+ Out << " ";
+ writeOperand(Order.V, true);
+ }
+ Out << ", { ";
+
+ assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
+ Out << Order.Shuffle[0];
+ for (unsigned I = 1, E = Order.Shuffle.size(); I != E; ++I)
+ Out << ", " << Order.Shuffle[I];
+ Out << " }\n";
+}
+
+void AssemblyWriter::printUseLists(const Function *F) {
+ auto hasMore =
+ [&]() { return !UseListOrders.empty() && UseListOrders.back().F == F; };
+ if (!hasMore())
+ // Nothing to do.
+ return;
+
+ Out << "\n; uselistorder directives\n";
+ while (hasMore()) {
+ printUseListOrder(UseListOrders.back());
+ UseListOrders.pop_back();
+ }
+}
+
//===----------------------------------------------------------------------===//
// External Interface declarations
//===----------------------------------------------------------------------===//
@@ -2291,7 +2557,7 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons
void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
// Type::dump - allow easy printing of Types from the debugger.
-void Type::dump() const { print(dbgs()); }
+void Type::dump() const { print(dbgs()); dbgs() << '\n'; }
// Module::dump() - Allow printing of Modules from the debugger.
void Module::dump() const { print(dbgs(), nullptr); }
diff --git a/lib/IR/AsmWriter.h b/lib/IR/AsmWriter.h
index aef9c8a..60da5ad 100644
--- a/lib/IR/AsmWriter.h
+++ b/lib/IR/AsmWriter.h
@@ -12,14 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_IR_ASSEMBLYWRITER_H
-#define LLVM_IR_ASSEMBLYWRITER_H
+#ifndef LLVM_LIB_IR_ASMWRITER_H
+#define LLVM_LIB_IR_ASMWRITER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/UseListOrder.h"
#include "llvm/Support/FormattedStream.h"
namespace llvm {
@@ -73,6 +74,7 @@ private:
TypePrinting TypePrinter;
AssemblyAnnotationWriter *AnnotationWriter;
SetVector<const Comdat *> Comdats;
+ UseListOrderStack UseListOrders;
public:
/// Construct an AssemblyWriter with an external SlotTracker
@@ -111,6 +113,9 @@ public:
void printInstructionLine(const Instruction &I);
void printInstruction(const Instruction &I);
+ void printUseListOrder(const UseListOrder &Order);
+ void printUseLists(const Function *F);
+
private:
void init();
@@ -121,4 +126,4 @@ private:
} // namespace llvm
-#endif //LLVM_IR_ASMWRITER_H
+#endif
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 9f3fd3e..0448dc1 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -13,8 +13,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ATTRIBUTESIMPL_H
-#define LLVM_ATTRIBUTESIMPL_H
+#ifndef LLVM_LIB_IR_ATTRIBUTEIMPL_H
+#define LLVM_LIB_IR_ATTRIBUTEIMPL_H
#include "llvm/ADT/FoldingSet.h"
#include "llvm/IR/Attributes.h"
@@ -39,7 +39,7 @@ class AttributeImpl : public FoldingSetNode {
protected:
enum AttrEntryKind {
EnumAttrEntry,
- AlignAttrEntry,
+ IntAttrEntry,
StringAttrEntry
};
@@ -49,7 +49,7 @@ public:
virtual ~AttributeImpl();
bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
- bool isAlignAttribute() const { return KindID == AlignAttrEntry; }
+ bool isIntAttribute() const { return KindID == IntAttrEntry; }
bool isStringAttribute() const { return KindID == StringAttrEntry; }
bool hasAttribute(Attribute::AttrKind A) const;
@@ -67,7 +67,7 @@ public:
void Profile(FoldingSetNodeID &ID) const {
if (isEnumAttribute())
Profile(ID, getKindAsEnum(), 0);
- else if (isAlignAttribute())
+ else if (isIntAttribute())
Profile(ID, getKindAsEnum(), getValueAsInt());
else
Profile(ID, getKindAsString(), getValueAsString());
@@ -108,19 +108,20 @@ public:
Attribute::AttrKind getEnumKind() const { return Kind; }
};
-class AlignAttributeImpl : public EnumAttributeImpl {
+class IntAttributeImpl : public EnumAttributeImpl {
void anchor() override;
- unsigned Align;
+ uint64_t Val;
public:
- AlignAttributeImpl(Attribute::AttrKind Kind, unsigned Align)
- : EnumAttributeImpl(AlignAttrEntry, Kind), Align(Align) {
+ IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val)
+ : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) {
assert(
- (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment) &&
- "Wrong kind for alignment attribute!");
+ (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
+ Kind == Attribute::Dereferenceable) &&
+ "Wrong kind for int attribute!");
}
- unsigned getAlignment() const { return Align; }
+ uint64_t getValue() const { return Val; }
};
class StringAttributeImpl : public AttributeImpl {
@@ -164,6 +165,7 @@ public:
unsigned getAlignment() const;
unsigned getStackAlignment() const;
+ uint64_t getDereferenceableBytes() const;
std::string getAsString(bool InAttrGrp) const;
typedef const Attribute *iterator;
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 48a2ce8..04545ea 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -47,7 +47,7 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
if (!Val)
PA = new EnumAttributeImpl(Kind);
else
- PA = new AlignAttributeImpl(Kind, Val);
+ PA = new IntAttributeImpl(Kind, Val);
pImpl->AttrsSet.InsertNode(PA, InsertPoint);
}
@@ -88,6 +88,12 @@ Attribute Attribute::getWithStackAlignment(LLVMContext &Context,
return get(Context, StackAlignment, Align);
}
+Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context,
+ uint64_t Bytes) {
+ assert(Bytes && "Bytes must be non-zero.");
+ return get(Context, Dereferenceable, Bytes);
+}
+
//===----------------------------------------------------------------------===//
// Attribute Accessor Methods
//===----------------------------------------------------------------------===//
@@ -96,8 +102,8 @@ bool Attribute::isEnumAttribute() const {
return pImpl && pImpl->isEnumAttribute();
}
-bool Attribute::isAlignAttribute() const {
- return pImpl && pImpl->isAlignAttribute();
+bool Attribute::isIntAttribute() const {
+ return pImpl && pImpl->isIntAttribute();
}
bool Attribute::isStringAttribute() const {
@@ -106,15 +112,15 @@ bool Attribute::isStringAttribute() const {
Attribute::AttrKind Attribute::getKindAsEnum() const {
if (!pImpl) return None;
- assert((isEnumAttribute() || isAlignAttribute()) &&
+ assert((isEnumAttribute() || isIntAttribute()) &&
"Invalid attribute type to get the kind as an enum!");
return pImpl ? pImpl->getKindAsEnum() : None;
}
uint64_t Attribute::getValueAsInt() const {
if (!pImpl) return 0;
- assert(isAlignAttribute() &&
- "Expected the attribute to be an alignment attribute!");
+ assert(isIntAttribute() &&
+ "Expected the attribute to be an integer attribute!");
return pImpl ? pImpl->getValueAsInt() : 0;
}
@@ -156,6 +162,14 @@ unsigned Attribute::getStackAlignment() const {
return pImpl->getValueAsInt();
}
+/// This returns the number of dereferenceable bytes.
+uint64_t Attribute::getDereferenceableBytes() const {
+ assert(hasAttribute(Attribute::Dereferenceable) &&
+ "Trying to get dereferenceable bytes from "
+ "non-dereferenceable attribute!");
+ return pImpl->getValueAsInt();
+}
+
std::string Attribute::getAsString(bool InAttrGrp) const {
if (!pImpl) return "";
@@ -263,6 +277,20 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return Result;
}
+ if (hasAttribute(Attribute::Dereferenceable)) {
+ std::string Result;
+ Result += "dereferenceable";
+ if (InAttrGrp) {
+ Result += "=";
+ Result += utostr(getValueAsInt());
+ } else {
+ Result += "(";
+ Result += utostr(getValueAsInt());
+ Result += ")";
+ }
+ return Result;
+ }
+
// Convert target-dependent attributes to strings of the form:
//
// "kind"
@@ -296,7 +324,7 @@ bool Attribute::operator<(Attribute A) const {
// Pin the vtables to this file.
AttributeImpl::~AttributeImpl() {}
void EnumAttributeImpl::anchor() {}
-void AlignAttributeImpl::anchor() {}
+void IntAttributeImpl::anchor() {}
void StringAttributeImpl::anchor() {}
bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
@@ -310,13 +338,13 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const {
}
Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
- assert(isEnumAttribute() || isAlignAttribute());
+ assert(isEnumAttribute() || isIntAttribute());
return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
}
uint64_t AttributeImpl::getValueAsInt() const {
- assert(isAlignAttribute());
- return static_cast<const AlignAttributeImpl *>(this)->getAlignment();
+ assert(isIntAttribute());
+ return static_cast<const IntAttributeImpl *>(this)->getValue();
}
StringRef AttributeImpl::getKindAsString() const {
@@ -334,18 +362,18 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
// relative to their enum value) and then strings.
if (isEnumAttribute()) {
if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
- if (AI.isAlignAttribute()) return true;
+ if (AI.isIntAttribute()) return true;
if (AI.isStringAttribute()) return true;
}
- if (isAlignAttribute()) {
+ if (isIntAttribute()) {
if (AI.isEnumAttribute()) return false;
- if (AI.isAlignAttribute()) return getValueAsInt() < AI.getValueAsInt();
+ if (AI.isIntAttribute()) return getValueAsInt() < AI.getValueAsInt();
if (AI.isStringAttribute()) return true;
}
if (AI.isEnumAttribute()) return false;
- if (AI.isAlignAttribute()) return false;
+ if (AI.isIntAttribute()) return false;
if (getKindAsString() == AI.getKindAsString())
return getValueAsString() < AI.getValueAsString();
return getKindAsString() < AI.getKindAsString();
@@ -398,6 +426,8 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
case Attribute::InAlloca: return 1ULL << 43;
case Attribute::NonNull: return 1ULL << 44;
case Attribute::JumpTable: return 1ULL << 45;
+ case Attribute::Dereferenceable:
+ llvm_unreachable("dereferenceable attribute not supported in raw format");
}
llvm_unreachable("Unsupported attribute type");
}
@@ -482,6 +512,13 @@ unsigned AttributeSetNode::getStackAlignment() const {
return 0;
}
+uint64_t AttributeSetNode::getDereferenceableBytes() const {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (I->hasAttribute(Attribute::Dereferenceable))
+ return I->getDereferenceableBytes();
+ return 0;
+}
+
std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
std::string Str;
for (iterator I = begin(), E = end(); I != E; ++I) {
@@ -515,6 +552,8 @@ uint64_t AttributeSetImpl::Raw(unsigned Index) const {
Mask |= (Log2_32(ASN->getAlignment()) + 1) << 16;
else if (Kind == Attribute::StackAlignment)
Mask |= (Log2_32(ASN->getStackAlignment()) + 1) << 26;
+ else if (Kind == Attribute::Dereferenceable)
+ llvm_unreachable("dereferenceable not supported in bit mask");
else
Mask |= AttributeImpl::getAttrMask(Kind);
}
@@ -620,6 +659,10 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
else if (Kind == Attribute::StackAlignment)
Attrs.push_back(std::make_pair(Index, Attribute::
getWithStackAlignment(C, B.getStackAlignment())));
+ else if (Kind == Attribute::Dereferenceable)
+ Attrs.push_back(std::make_pair(Index,
+ Attribute::getWithDereferenceableBytes(C,
+ B.getDereferenceableBytes())));
else
Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
}
@@ -877,6 +920,11 @@ unsigned AttributeSet::getStackAlignment(unsigned Index) const {
return ASN ? ASN->getStackAlignment() : 0;
}
+uint64_t AttributeSet::getDereferenceableBytes(unsigned Index) const {
+ AttributeSetNode *ASN = getAttributes(Index);
+ return ASN ? ASN->getDereferenceableBytes() : 0;
+}
+
std::string AttributeSet::getAsString(unsigned Index,
bool InAttrGrp) const {
AttributeSetNode *ASN = getAttributes(Index);
@@ -956,7 +1004,7 @@ void AttributeSet::dump() const {
//===----------------------------------------------------------------------===//
AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
- : Attrs(0), Alignment(0), StackAlignment(0) {
+ : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) {
AttributeSetImpl *pImpl = AS.pImpl;
if (!pImpl) return;
@@ -973,13 +1021,14 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
void AttrBuilder::clear() {
Attrs.reset();
- Alignment = StackAlignment = 0;
+ Alignment = StackAlignment = DerefBytes = 0;
}
AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!");
assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment &&
- "Adding alignment attribute without adding alignment value!");
+ Val != Attribute::Dereferenceable &&
+ "Adding integer attribute without adding a value!");
Attrs[Val] = true;
return *this;
}
@@ -997,6 +1046,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
Alignment = Attr.getAlignment();
else if (Kind == Attribute::StackAlignment)
StackAlignment = Attr.getStackAlignment();
+ else if (Kind == Attribute::Dereferenceable)
+ DerefBytes = Attr.getDereferenceableBytes();
return *this;
}
@@ -1013,6 +1064,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
Alignment = 0;
else if (Val == Attribute::StackAlignment)
StackAlignment = 0;
+ else if (Val == Attribute::Dereferenceable)
+ DerefBytes = 0;
return *this;
}
@@ -1029,7 +1082,7 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
Attribute Attr = *I;
- if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
+ if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
Attribute::AttrKind Kind = I->getKindAsEnum();
Attrs[Kind] = false;
@@ -1037,6 +1090,8 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
Alignment = 0;
else if (Kind == Attribute::StackAlignment)
StackAlignment = 0;
+ else if (Kind == Attribute::Dereferenceable)
+ DerefBytes = 0;
} else {
assert(Attr.isStringAttribute() && "Invalid attribute type!");
std::map<std::string, std::string>::iterator
@@ -1079,6 +1134,14 @@ AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) {
return *this;
}
+AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) {
+ if (Bytes == 0) return *this;
+
+ Attrs[Attribute::Dereferenceable] = true;
+ DerefBytes = Bytes;
+ return *this;
+}
+
AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
// FIXME: What if both have alignments, but they don't match?!
if (!Alignment)
@@ -1087,6 +1150,9 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
if (!StackAlignment)
StackAlignment = B.StackAlignment;
+ if (!DerefBytes)
+ DerefBytes = B.DerefBytes;
+
Attrs |= B.Attrs;
for (td_const_iterator I = B.TargetDepAttrs.begin(),
@@ -1117,7 +1183,7 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot);
I != E; ++I) {
Attribute Attr = *I;
- if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
+ if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
if (Attrs[I->getKindAsEnum()])
return true;
} else {
@@ -1142,7 +1208,8 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
if (B.TargetDepAttrs.find(I->first) == B.TargetDepAttrs.end())
return false;
- return Alignment == B.Alignment && StackAlignment == B.StackAlignment;
+ return Alignment == B.Alignment && StackAlignment == B.StackAlignment &&
+ DerefBytes == B.DerefBytes;
}
AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
@@ -1151,6 +1218,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
I = Attribute::AttrKind(I + 1)) {
+ if (I == Attribute::Dereferenceable)
+ continue;
if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
Attrs[I] = true;
@@ -1184,6 +1253,7 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
.addAttribute(Attribute::NoAlias)
.addAttribute(Attribute::NoCapture)
.addAttribute(Attribute::NonNull)
+ .addDereferenceableAttr(1) // the int here is ignored
.addAttribute(Attribute::ReadNone)
.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::StructRet)
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 6554b3c..c24dfea 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
@@ -43,6 +44,22 @@ static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
return true;
}
+// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
+// arguments have changed their type from i32 to i8.
+static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
+ Function *&NewFn) {
+ // Check that the last argument is an i32.
+ Type *LastArgType = F->getFunctionType()->getParamType(
+ F->getFunctionType()->getNumParams() - 1);
+ if (!LastArgType->isIntegerTy(32))
+ return false;
+
+ // Move this function aside and map down.
+ F->setName(F->getName() + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+ return true;
+}
+
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
@@ -90,6 +107,20 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
}
+ case 'd': {
+ if (Name.startswith("dbg.declare") && F->arg_size() == 2) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_declare);
+ return true;
+ }
+ if (Name.startswith("dbg.value") && F->arg_size() == 3) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
+ return true;
+ }
+ break;
+ }
+
case 'o':
// We only need to change the name to match the mangling including the
// address space.
@@ -130,6 +161,51 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (Name == "x86.sse41.ptestnzc")
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
}
+ // Several blend and other instructions with maskes used the wrong number of
+ // bits.
+ if (Name == "x86.sse41.pblendw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
+ NewFn);
+ if (Name == "x86.sse41.blendpd")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
+ NewFn);
+ if (Name == "x86.sse41.blendps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
+ NewFn);
+ if (Name == "x86.sse41.insertps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
+ NewFn);
+ if (Name == "x86.sse41.dppd")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
+ NewFn);
+ if (Name == "x86.sse41.dpps")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
+ NewFn);
+ if (Name == "x86.sse41.mpsadbw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
+ NewFn);
+ if (Name == "x86.avx.blend.pd.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx_blend_pd_256, NewFn);
+ if (Name == "x86.avx.blend.ps.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx_blend_ps_256, NewFn);
+ if (Name == "x86.avx.dp.ps.256")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
+ NewFn);
+ if (Name == "x86.avx2.pblendw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
+ NewFn);
+ if (Name == "x86.avx2.pblendd.128")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx2_pblendd_128, NewFn);
+ if (Name == "x86.avx2.pblendd.256")
+ return UpgradeX86IntrinsicsWith8BitMask(
+ F, Intrinsic::x86_avx2_pblendd_256, NewFn);
+ if (Name == "x86.avx2.mpsadbw")
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
+ NewFn);
+
// frcz.ss/sd may need to have an argument dropped
if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
F->setName(Name + ".old");
@@ -173,66 +249,27 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
return Upgraded;
}
-static bool UpgradeGlobalStructors(GlobalVariable *GV) {
- ArrayType *ATy = dyn_cast<ArrayType>(GV->getType()->getElementType());
- StructType *OldTy =
- ATy ? dyn_cast<StructType>(ATy->getElementType()) : nullptr;
-
- // Only upgrade an array of a two field struct with the appropriate field
- // types.
- if (!OldTy || OldTy->getNumElements() != 2)
- return false;
-
- // Get the upgraded 3 element type.
- PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo();
- Type *Tys[3] = {
- OldTy->getElementType(0),
- OldTy->getElementType(1),
- VoidPtrTy
- };
- StructType *NewTy =
- StructType::get(GV->getContext(), Tys, /*isPacked=*/false);
-
- // Build new constants with a null third field filled in.
- Constant *OldInitC = GV->getInitializer();
- ConstantArray *OldInit = dyn_cast<ConstantArray>(OldInitC);
- if (!OldInit && !isa<ConstantAggregateZero>(OldInitC))
- return false;
- std::vector<Constant *> Initializers;
- if (OldInit) {
- for (Use &U : OldInit->operands()) {
- ConstantStruct *Init = cast<ConstantStruct>(&U);
- Constant *NewInit =
- ConstantStruct::get(NewTy, Init->getOperand(0), Init->getOperand(1),
- Constant::getNullValue(VoidPtrTy), nullptr);
- Initializers.push_back(NewInit);
- }
- }
- assert(Initializers.size() == ATy->getNumElements());
-
- // Replace the old GV with a new one.
- ATy = ArrayType::get(NewTy, Initializers.size());
- Constant *NewInit = ConstantArray::get(ATy, Initializers);
- GlobalVariable *NewGV = new GlobalVariable(
- *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "",
- GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(),
- GV->isExternallyInitialized());
- NewGV->copyAttributesFrom(GV);
- NewGV->takeName(GV);
- assert(GV->use_empty() && "program cannot use initializer list");
- GV->eraseFromParent();
- return true;
-}
-
bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
- if (GV->getName() == "llvm.global_ctors" ||
- GV->getName() == "llvm.global_dtors")
- return UpgradeGlobalStructors(GV);
-
// Nothing to do yet.
return false;
}
+static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
+ if (!DbgNode || Elt >= DbgNode->getNumOperands())
+ return nullptr;
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt));
+}
+
+static DIExpression getExpression(Value *VarOperand, Function *F) {
+ // Old-style DIVariables have an optional expression as the 8th element.
+ DIExpression Expr(getNodeField(cast<MDNode>(VarOperand), 8));
+ if (!Expr) {
+ DIBuilder DIB(*F->getParent());
+ Expr = DIB.createExpression();
+ }
+ return Expr;
+}
+
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
// upgraded intrinsic. All argument and return casting must be provided in
// order to seamlessly integrate with existing context.
@@ -396,12 +433,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}
std::string Name = CI->getName().str();
- CI->setName(Name + ".old");
+ if (!Name.empty())
+ CI->setName(Name + ".old");
switch (NewFn->getIntrinsicID()) {
default:
llvm_unreachable("Unknown function for CallInst upgrade.");
+ // Upgrade debug intrinsics to use an additional DIExpression argument.
+ case Intrinsic::dbg_declare: {
+ auto NewCI =
+ Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
+ getExpression(CI->getArgOperand(1), F), Name);
+ NewCI->setDebugLoc(CI->getDebugLoc());
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ return;
+ }
+ case Intrinsic::dbg_value: {
+ auto NewCI = Builder.CreateCall4(
+ NewFn, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
+ getExpression(CI->getArgOperand(2), F), Name);
+ NewCI->setDebugLoc(CI->getDebugLoc());
+ CI->replaceAllUsesWith(NewCI);
+ CI->eraseFromParent();
+ return;
+ }
case Intrinsic::ctlz:
case Intrinsic::cttz:
assert(CI->getNumArgOperands() == 1 &&
@@ -419,14 +476,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
- case Intrinsic::arm_neon_vclz: {
- // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
- CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
- Builder.getFalse(),
- "llvm.ctlz." + Name.substr(14)));
- CI->eraseFromParent();
- return;
- }
case Intrinsic::ctpop: {
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
CI->eraseFromParent();
@@ -468,6 +517,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
return;
}
+
+ case Intrinsic::x86_sse41_pblendw:
+ case Intrinsic::x86_sse41_blendpd:
+ case Intrinsic::x86_sse41_blendps:
+ case Intrinsic::x86_sse41_insertps:
+ case Intrinsic::x86_sse41_dppd:
+ case Intrinsic::x86_sse41_dpps:
+ case Intrinsic::x86_sse41_mpsadbw:
+ case Intrinsic::x86_avx_blend_pd_256:
+ case Intrinsic::x86_avx_blend_ps_256:
+ case Intrinsic::x86_avx_dp_ps_256:
+ case Intrinsic::x86_avx2_pblendw:
+ case Intrinsic::x86_avx2_pblendd_128:
+ case Intrinsic::x86_avx2_pblendd_256:
+ case Intrinsic::x86_avx2_mpsadbw: {
+ // Need to truncate the last argument from i32 to i8 -- this argument models
+ // an inherently 8-bit immediate operand to these x86 instructions.
+ SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+ CI->arg_operands().end());
+
+ // Replace the last argument with a trunc.
+ Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
+
+ CallInst *NewCall = Builder.CreateCall(NewFn, Args);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ return;
+ }
}
}
@@ -580,7 +657,9 @@ bool llvm::UpgradeDebugInfo(Module &M) {
void llvm::UpgradeMDStringConstant(std::string &String) {
const std::string OldPrefix = "llvm.vectorizer.";
- if (String.find(OldPrefix) == 0) {
- String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");
+ if (String == "llvm.vectorizer.unroll") {
+ String = "llvm.loop.interleave.count";
+ } else if (String.find(OldPrefix) == 0) {
+ String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");
}
}
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index ba07433..5ed9bed 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -50,17 +50,24 @@ BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
// Make sure that we get added to a function
LeakDetector::addGarbageObject(this);
- if (InsertBefore) {
- assert(NewParent &&
+ if (NewParent)
+ insertInto(NewParent, InsertBefore);
+ else
+ assert(!InsertBefore &&
"Cannot insert block before another block with no function!");
- NewParent->getBasicBlockList().insert(InsertBefore, this);
- } else if (NewParent) {
- NewParent->getBasicBlockList().push_back(this);
- }
setName(Name);
}
+void BasicBlock::insertInto(Function *NewParent, BasicBlock *InsertBefore) {
+ assert(NewParent && "Expected a parent");
+ assert(!Parent && "Already has a parent");
+
+ if (InsertBefore)
+ NewParent->getBasicBlockList().insert(InsertBefore, this);
+ else
+ NewParent->getBasicBlockList().push_back(this);
+}
BasicBlock::~BasicBlock() {
// If the address of the block is taken and it is being deleted (e.g. because
@@ -131,6 +138,37 @@ const TerminatorInst *BasicBlock::getTerminator() const {
return dyn_cast<TerminatorInst>(&InstList.back());
}
+CallInst *BasicBlock::getTerminatingMustTailCall() {
+ if (InstList.empty())
+ return nullptr;
+ ReturnInst *RI = dyn_cast<ReturnInst>(&InstList.back());
+ if (!RI || RI == &InstList.front())
+ return nullptr;
+
+ Instruction *Prev = RI->getPrevNode();
+ if (!Prev)
+ return nullptr;
+
+ if (Value *RV = RI->getReturnValue()) {
+ if (RV != Prev)
+ return nullptr;
+
+ // Look through the optional bitcast.
+ if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
+ RV = BI->getOperand(0);
+ Prev = BI->getPrevNode();
+ if (!Prev || RV != Prev)
+ return nullptr;
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(Prev)) {
+ if (CI->isMustTailCall())
+ return CI;
+ }
+ return nullptr;
+}
+
Instruction* BasicBlock::getFirstNonPHI() {
BasicBlock::iterator i = begin();
// All valid basic blocks should have a terminator,
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index 38a80b1..b3889e6 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_library(LLVMCore
Type.cpp
TypeFinder.cpp
Use.cpp
+ UseListOrder.cpp
User.cpp
Value.cpp
ValueSymbolTable.cpp
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 395ac39..cdfb41f 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -593,8 +593,13 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
bool ignored;
uint64_t x[2];
uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
- (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
- APFloat::rmTowardZero, &ignored);
+ if (APFloat::opInvalidOp ==
+ V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
+ APFloat::rmTowardZero, &ignored)) {
+ // Undefined behavior invoked - the destination type can't represent
+ // the input constant.
+ return UndefValue::get(DestTy);
+ }
APInt Val(DestBitWidth, x);
return ConstantInt::get(FPC->getContext(), Val);
}
@@ -653,9 +658,13 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
APInt api = CI->getValue();
APFloat apf(DestTy->getFltSemantics(),
APInt::getNullValue(DestTy->getPrimitiveSizeInBits()));
- (void)apf.convertFromAPInt(api,
- opc==Instruction::SIToFP,
- APFloat::rmNearestTiesToEven);
+ if (APFloat::opOverflow &
+ apf.convertFromAPInt(api, opc==Instruction::SIToFP,
+ APFloat::rmNearestTiesToEven)) {
+ // Undefined behavior invoked - the destination type can't represent
+ // the input constant.
+ return UndefValue::get(DestTy);
+ }
return ConstantFP::get(V->getContext(), apf);
}
return nullptr;
@@ -674,6 +683,9 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
}
return nullptr;
case Instruction::Trunc: {
+ if (V->getType()->isVectorTy())
+ return nullptr;
+
uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
return ConstantInt::get(V->getContext(),
@@ -2144,9 +2156,10 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
// If all indices are known integers and normalized, we can do a simple
// check for the "inbounds" property.
- if (!Unknown && !inBounds &&
- isa<GlobalVariable>(C) && isInBoundsIndices(Idxs))
- return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
+ if (!Unknown && !inBounds)
+ if (auto *GV = dyn_cast<GlobalVariable>(C))
+ if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs))
+ return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
return nullptr;
}
diff --git a/lib/IR/ConstantFold.h b/lib/IR/ConstantFold.h
index e12f27a..a516abe 100644
--- a/lib/IR/ConstantFold.h
+++ b/lib/IR/ConstantFold.h
@@ -16,8 +16,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CONSTANTFOLDING_H
-#define CONSTANTFOLDING_H
+#ifndef LLVM_LIB_IR_CONSTANTFOLD_H
+#define LLVM_LIB_IR_CONSTANTFOLD_H
#include "llvm/ADT/ArrayRef.h"
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index b815936..e0cb835 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -107,6 +107,28 @@ bool Constant::isAllOnesValue() const {
return false;
}
+bool Constant::isOneValue() const {
+ // Check for 1 integers
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->isOne();
+
+ // Check for FP which are bitcasted from 1 integers
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->getValueAPF().bitcastToAPInt() == 1;
+
+ // Check for constant vectors which are splats of 1 values.
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isOneValue();
+
+ // Check for constant vectors which are splats of 1 values.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isOneValue();
+
+ return false;
+}
+
bool Constant::isMinSignedValue() const {
// Check for INT_MIN integers
if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
@@ -129,6 +151,29 @@ bool Constant::isMinSignedValue() const {
return false;
}
+bool Constant::isNotMinSignedValue() const {
+ // Check for INT_MIN integers
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return !CI->isMinValue(/*isSigned=*/true);
+
+ // Check for FP which are bitcasted from INT_MIN integers
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return !CFP->getValueAPF().bitcastToAPInt().isMinSignedValue();
+
+ // Check for constant vectors which are splats of INT_MIN values.
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isNotMinSignedValue();
+
+ // Check for constant vectors which are splats of INT_MIN values.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isNotMinSignedValue();
+
+ // It *may* contain INT_MIN, we can't tell.
+ return false;
+}
+
// Constructor to create a '0' constant of arbitrary type...
Constant *Constant::getNullValue(Type *Ty) {
switch (Ty->getTypeID()) {
@@ -261,7 +306,7 @@ void Constant::destroyConstantImpl() {
}
static bool canTrapImpl(const Constant *C,
- SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) {
+ SmallPtrSetImpl<const ConstantExpr *> &NonTrappingOps) {
assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
// The only thing that could possibly trap are constant exprs.
const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
@@ -271,7 +316,7 @@ static bool canTrapImpl(const Constant *C,
// ConstantExpr traps if any operands can trap.
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
- if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps))
+ if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps))
return true;
}
}
@@ -318,7 +363,7 @@ ConstHasGlobalValuePredicate(const Constant *C,
const Constant *ConstOp = dyn_cast<Constant>(Op);
if (!ConstOp)
continue;
- if (Visited.insert(ConstOp))
+ if (Visited.insert(ConstOp).second)
WorkList.push_back(ConstOp);
}
}
@@ -781,6 +826,11 @@ ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
}
Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
+ if (Constant *C = getImpl(Ty, V))
+ return C;
+ return Ty->getContext().pImpl->ArrayConstants.getOrCreate(Ty, V);
+}
+Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef<Constant*> V) {
// Empty arrays are canonicalized to ConstantAggregateZero.
if (V.empty())
return ConstantAggregateZero::get(Ty);
@@ -789,7 +839,6 @@ Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
assert(V[i]->getType() == Ty->getElementType() &&
"Wrong type in array element initializer");
}
- LLVMContextImpl *pImpl = Ty->getContext().pImpl;
// If this is an all-zero array, return a ConstantAggregateZero object. If
// all undef, return an UndefValue, if "all simple", then return a
@@ -871,7 +920,7 @@ Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
}
// Otherwise, we really do want to create a ConstantArray.
- return pImpl->ArrayConstants.getOrCreate(Ty, V);
+ return nullptr;
}
/// getTypeForElements - Return an anonymous struct type to use for a constant
@@ -959,9 +1008,14 @@ ConstantVector::ConstantVector(VectorType *T, ArrayRef<Constant *> V)
// ConstantVector accessors.
Constant *ConstantVector::get(ArrayRef<Constant*> V) {
+ if (Constant *C = getImpl(V))
+ return C;
+ VectorType *Ty = VectorType::get(V.front()->getType(), V.size());
+ return Ty->getContext().pImpl->VectorConstants.getOrCreate(Ty, V);
+}
+Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
assert(!V.empty() && "Vectors can't be empty");
VectorType *T = VectorType::get(V.front()->getType(), V.size());
- LLVMContextImpl *pImpl = T->getContext().pImpl;
// If this is an all-undef or all-zero vector, return a
// ConstantAggregateZero or UndefValue.
@@ -1053,7 +1107,7 @@ Constant *ConstantVector::get(ArrayRef<Constant*> V) {
// Otherwise, the element type isn't compatible with ConstantDataVector, or
// the operand list constants a ConstantExpr or something else strange.
- return pImpl->VectorConstants.getOrCreate(T, V);
+ return nullptr;
}
Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
@@ -1141,8 +1195,8 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
/// getWithOperands - This returns the current constant expression with the
/// operands replaced with the specified values. The specified array must
/// have the same number of operands as our current one.
-Constant *ConstantExpr::
-getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
+Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
+ bool OnlyIfReduced) const {
assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
bool AnyChange = Ty != getType();
for (unsigned i = 0; i != Ops.size(); ++i)
@@ -1151,6 +1205,7 @@ getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
if (!AnyChange) // No operands changed, return self.
return const_cast<ConstantExpr*>(this);
+ Type *OnlyIfReducedTy = OnlyIfReduced ? Ty : nullptr;
switch (getOpcode()) {
case Instruction::Trunc:
case Instruction::ZExt:
@@ -1165,28 +1220,34 @@ getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
case Instruction::IntToPtr:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
- return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
+ return ConstantExpr::getCast(getOpcode(), Ops[0], Ty, OnlyIfReduced);
case Instruction::Select:
- return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2], OnlyIfReducedTy);
case Instruction::InsertElement:
- return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+ return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2],
+ OnlyIfReducedTy);
case Instruction::ExtractElement:
- return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ return ConstantExpr::getExtractElement(Ops[0], Ops[1], OnlyIfReducedTy);
case Instruction::InsertValue:
- return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices());
+ return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices(),
+ OnlyIfReducedTy);
case Instruction::ExtractValue:
- return ConstantExpr::getExtractValue(Ops[0], getIndices());
+ return ConstantExpr::getExtractValue(Ops[0], getIndices(), OnlyIfReducedTy);
case Instruction::ShuffleVector:
- return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2],
+ OnlyIfReducedTy);
case Instruction::GetElementPtr:
return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
- cast<GEPOperator>(this)->isInBounds());
+ cast<GEPOperator>(this)->isInBounds(),
+ OnlyIfReducedTy);
case Instruction::ICmp:
case Instruction::FCmp:
- return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
+ return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1],
+ OnlyIfReducedTy);
default:
assert(getNumOperands() == 2 && "Must be binary operator?");
- return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData);
+ return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData,
+ OnlyIfReducedTy);
}
}
@@ -1447,27 +1508,21 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
// and return early.
BlockAddress *&NewBA =
getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
- if (!NewBA) {
- getBasicBlock()->AdjustBlockAddressRefCount(-1);
-
- // Remove the old entry, this can't cause the map to rehash (just a
- // tombstone will get added).
- getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
- getBasicBlock()));
- NewBA = this;
- setOperand(0, NewF);
- setOperand(1, NewBB);
- getBasicBlock()->AdjustBlockAddressRefCount(1);
+ if (NewBA) {
+ replaceUsesOfWithOnConstantImpl(NewBA);
return;
}
- // Otherwise, I do need to replace this with an existing value.
- assert(NewBA != this && "I didn't contain From!");
-
- // Everyone using this now uses the replacement.
- replaceAllUsesWith(NewBA);
+ getBasicBlock()->AdjustBlockAddressRefCount(-1);
- destroyConstant();
+ // Remove the old entry, this can't cause the map to rehash (just a
+ // tombstone will get added).
+ getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
+ getBasicBlock()));
+ NewBA = this;
+ setOperand(0, NewF);
+ setOperand(1, NewBB);
+ getBasicBlock()->AdjustBlockAddressRefCount(1);
}
//---- ConstantExpr::get() implementations.
@@ -1475,22 +1530,26 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
/// This is a utility function to handle folding of casts and lookup of the
/// cast in the ExprConstants map. It is used by the various get* methods below.
-static inline Constant *getFoldedCast(
- Instruction::CastOps opc, Constant *C, Type *Ty) {
+static Constant *getFoldedCast(Instruction::CastOps opc, Constant *C, Type *Ty,
+ bool OnlyIfReduced = false) {
assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
// Fold a few common cases
if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
return FC;
+ if (OnlyIfReduced)
+ return nullptr;
+
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
// Look up the constant in the table first to ensure uniqueness.
- ExprMapKeyType Key(opc, C);
+ ConstantExprKeyType Key(opc, C);
return pImpl->ExprConstants.getOrCreate(Ty, Key);
}
-Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty,
+ bool OnlyIfReduced) {
Instruction::CastOps opc = Instruction::CastOps(oc);
assert(Instruction::isCast(opc) && "opcode out of range");
assert(C && Ty && "Null arguments to getCast");
@@ -1499,19 +1558,32 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
switch (opc) {
default:
llvm_unreachable("Invalid cast opcode");
- case Instruction::Trunc: return getTrunc(C, Ty);
- case Instruction::ZExt: return getZExt(C, Ty);
- case Instruction::SExt: return getSExt(C, Ty);
- case Instruction::FPTrunc: return getFPTrunc(C, Ty);
- case Instruction::FPExt: return getFPExtend(C, Ty);
- case Instruction::UIToFP: return getUIToFP(C, Ty);
- case Instruction::SIToFP: return getSIToFP(C, Ty);
- case Instruction::FPToUI: return getFPToUI(C, Ty);
- case Instruction::FPToSI: return getFPToSI(C, Ty);
- case Instruction::PtrToInt: return getPtrToInt(C, Ty);
- case Instruction::IntToPtr: return getIntToPtr(C, Ty);
- case Instruction::BitCast: return getBitCast(C, Ty);
- case Instruction::AddrSpaceCast: return getAddrSpaceCast(C, Ty);
+ case Instruction::Trunc:
+ return getTrunc(C, Ty, OnlyIfReduced);
+ case Instruction::ZExt:
+ return getZExt(C, Ty, OnlyIfReduced);
+ case Instruction::SExt:
+ return getSExt(C, Ty, OnlyIfReduced);
+ case Instruction::FPTrunc:
+ return getFPTrunc(C, Ty, OnlyIfReduced);
+ case Instruction::FPExt:
+ return getFPExtend(C, Ty, OnlyIfReduced);
+ case Instruction::UIToFP:
+ return getUIToFP(C, Ty, OnlyIfReduced);
+ case Instruction::SIToFP:
+ return getSIToFP(C, Ty, OnlyIfReduced);
+ case Instruction::FPToUI:
+ return getFPToUI(C, Ty, OnlyIfReduced);
+ case Instruction::FPToSI:
+ return getFPToSI(C, Ty, OnlyIfReduced);
+ case Instruction::PtrToInt:
+ return getPtrToInt(C, Ty, OnlyIfReduced);
+ case Instruction::IntToPtr:
+ return getIntToPtr(C, Ty, OnlyIfReduced);
+ case Instruction::BitCast:
+ return getBitCast(C, Ty, OnlyIfReduced);
+ case Instruction::AddrSpaceCast:
+ return getAddrSpaceCast(C, Ty, OnlyIfReduced);
}
}
@@ -1584,7 +1656,7 @@ Constant *ConstantExpr::getFPCast(Constant *C, Type *Ty) {
return getCast(opcode, C, Ty);
}
-Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1595,10 +1667,10 @@ Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty) {
assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
"SrcTy must be larger than DestTy for Trunc!");
- return getFoldedCast(Instruction::Trunc, C, Ty);
+ return getFoldedCast(Instruction::Trunc, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getSExt(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getSExt(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1609,10 +1681,10 @@ Constant *ConstantExpr::getSExt(Constant *C, Type *Ty) {
assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"SrcTy must be smaller than DestTy for SExt!");
- return getFoldedCast(Instruction::SExt, C, Ty);
+ return getFoldedCast(Instruction::SExt, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getZExt(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getZExt(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1623,10 +1695,10 @@ Constant *ConstantExpr::getZExt(Constant *C, Type *Ty) {
assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"SrcTy must be smaller than DestTy for ZExt!");
- return getFoldedCast(Instruction::ZExt, C, Ty);
+ return getFoldedCast(Instruction::ZExt, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1635,10 +1707,10 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
"This is an illegal floating point truncation!");
- return getFoldedCast(Instruction::FPTrunc, C, Ty);
+ return getFoldedCast(Instruction::FPTrunc, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1647,10 +1719,10 @@ Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty) {
assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"This is an illegal floating point extension!");
- return getFoldedCast(Instruction::FPExt, C, Ty);
+ return getFoldedCast(Instruction::FPExt, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1658,10 +1730,10 @@ Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty) {
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
"This is an illegal uint to floating point cast!");
- return getFoldedCast(Instruction::UIToFP, C, Ty);
+ return getFoldedCast(Instruction::UIToFP, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1669,10 +1741,10 @@ Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty) {
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
"This is an illegal sint to floating point cast!");
- return getFoldedCast(Instruction::SIToFP, C, Ty);
+ return getFoldedCast(Instruction::SIToFP, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1680,10 +1752,10 @@ Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty) {
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
"This is an illegal floating point to uint cast!");
- return getFoldedCast(Instruction::FPToUI, C, Ty);
+ return getFoldedCast(Instruction::FPToUI, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
+Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty, bool OnlyIfReduced) {
#ifndef NDEBUG
bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
bool toVec = Ty->getTypeID() == Type::VectorTyID;
@@ -1691,10 +1763,11 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
"This is an illegal floating point to sint cast!");
- return getFoldedCast(Instruction::FPToSI, C, Ty);
+ return getFoldedCast(Instruction::FPToSI, C, Ty, OnlyIfReduced);
}
-Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
+Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy,
+ bool OnlyIfReduced) {
assert(C->getType()->getScalarType()->isPointerTy() &&
"PtrToInt source must be pointer or pointer vector");
assert(DstTy->getScalarType()->isIntegerTy() &&
@@ -1703,10 +1776,11 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
if (isa<VectorType>(C->getType()))
assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
"Invalid cast between a different number of vector elements");
- return getFoldedCast(Instruction::PtrToInt, C, DstTy);
+ return getFoldedCast(Instruction::PtrToInt, C, DstTy, OnlyIfReduced);
}
-Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
+Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy,
+ bool OnlyIfReduced) {
assert(C->getType()->getScalarType()->isIntegerTy() &&
"IntToPtr source must be integer or integer vector");
assert(DstTy->getScalarType()->isPointerTy() &&
@@ -1715,10 +1789,11 @@ Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
if (isa<VectorType>(C->getType()))
assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
"Invalid cast between a different number of vector elements");
- return getFoldedCast(Instruction::IntToPtr, C, DstTy);
+ return getFoldedCast(Instruction::IntToPtr, C, DstTy, OnlyIfReduced);
}
-Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) {
+Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy,
+ bool OnlyIfReduced) {
assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
"Invalid constantexpr bitcast!");
@@ -1726,10 +1801,11 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) {
// speedily.
if (C->getType() == DstTy) return C;
- return getFoldedCast(Instruction::BitCast, C, DstTy);
+ return getFoldedCast(Instruction::BitCast, C, DstTy, OnlyIfReduced);
}
-Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy) {
+Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
+ bool OnlyIfReduced) {
assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
"Invalid constantexpr addrspacecast!");
@@ -1746,11 +1822,11 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy) {
}
C = getBitCast(C, MidTy);
}
- return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy);
+ return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
}
Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
- unsigned Flags) {
+ unsigned Flags, Type *OnlyIfReducedTy) {
// Check the operands for consistency first.
assert(Opcode >= Instruction::BinaryOpsBegin &&
Opcode < Instruction::BinaryOpsEnd &&
@@ -1819,8 +1895,11 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
return FC; // Fold a few common cases.
+ if (OnlyIfReducedTy == C1->getType())
+ return nullptr;
+
Constant *ArgVec[] = { C1, C2 };
- ExprMapKeyType Key(Opcode, ArgVec, 0, Flags);
+ ConstantExprKeyType Key(Opcode, ArgVec, 0, Flags);
LLVMContextImpl *pImpl = C1->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
@@ -1840,7 +1919,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) {
// alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
Type *AligningTy =
- StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+ StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, nullptr);
Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo(0));
Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
@@ -1868,8 +1947,8 @@ Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
Type::getInt64Ty(Ty->getContext()));
}
-Constant *ConstantExpr::getCompare(unsigned short Predicate,
- Constant *C1, Constant *C2) {
+Constant *ConstantExpr::getCompare(unsigned short Predicate, Constant *C1,
+ Constant *C2, bool OnlyIfReduced) {
assert(C1->getType() == C2->getType() && "Op types should be identical!");
switch (Predicate) {
@@ -1880,31 +1959,35 @@ Constant *ConstantExpr::getCompare(unsigned short Predicate,
case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
case CmpInst::FCMP_TRUE:
- return getFCmp(Predicate, C1, C2);
+ return getFCmp(Predicate, C1, C2, OnlyIfReduced);
case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT:
case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
case CmpInst::ICMP_SLE:
- return getICmp(Predicate, C1, C2);
+ return getICmp(Predicate, C1, C2, OnlyIfReduced);
}
}
-Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
+Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2,
+ Type *OnlyIfReducedTy) {
assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
return SC; // Fold common cases
+ if (OnlyIfReducedTy == V1->getType())
+ return nullptr;
+
Constant *ArgVec[] = { C, V1, V2 };
- ExprMapKeyType Key(Instruction::Select, ArgVec);
+ ConstantExprKeyType Key(Instruction::Select, ArgVec);
LLVMContextImpl *pImpl = C->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
}
Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
- bool InBounds) {
+ bool InBounds, Type *OnlyIfReducedTy) {
assert(C->getType()->isPtrOrPtrVectorTy() &&
"Non-pointer type for constant GetElementPtr expression");
@@ -1919,6 +2002,9 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
ReqTy = VectorType::get(ReqTy, VecTy->getNumElements());
+ if (OnlyIfReducedTy == ReqTy)
+ return nullptr;
+
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
ArgVec.reserve(1 + Idxs.size());
@@ -1932,15 +2018,15 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
"getelementptr index type missmatch");
ArgVec.push_back(cast<Constant>(Idxs[i]));
}
- const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
- InBounds ? GEPOperator::IsInBounds : 0);
+ const ConstantExprKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
+ InBounds ? GEPOperator::IsInBounds : 0);
LLVMContextImpl *pImpl = C->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
}
-Constant *
-ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS,
+ Constant *RHS, bool OnlyIfReduced) {
assert(LHS->getType() == RHS->getType());
assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE &&
pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
@@ -1948,10 +2034,13 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
return FC; // Fold a few common cases...
+ if (OnlyIfReduced)
+ return nullptr;
+
// Look up the constant in the table first to ensure uniqueness
Constant *ArgVec[] = { LHS, RHS };
// Get the key type with both the opcode and predicate
- const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+ const ConstantExprKeyType Key(Instruction::ICmp, ArgVec, pred);
Type *ResultTy = Type::getInt1Ty(LHS->getContext());
if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
@@ -1961,18 +2050,21 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
}
-Constant *
-ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+Constant *ConstantExpr::getFCmp(unsigned short pred, Constant *LHS,
+ Constant *RHS, bool OnlyIfReduced) {
assert(LHS->getType() == RHS->getType());
assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
return FC; // Fold a few common cases...
+ if (OnlyIfReduced)
+ return nullptr;
+
// Look up the constant in the table first to ensure uniqueness
Constant *ArgVec[] = { LHS, RHS };
// Get the key type with both the opcode and predicate
- const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+ const ConstantExprKeyType Key(Instruction::FCmp, ArgVec, pred);
Type *ResultTy = Type::getInt1Ty(LHS->getContext());
if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
@@ -1982,7 +2074,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
}
-Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
+Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx,
+ Type *OnlyIfReducedTy) {
assert(Val->getType()->isVectorTy() &&
"Tried to create extractelement operation on non-vector type!");
assert(Idx->getType()->isIntegerTy() &&
@@ -1991,17 +2084,20 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
return FC; // Fold a few common cases.
+ Type *ReqTy = Val->getType()->getVectorElementType();
+ if (OnlyIfReducedTy == ReqTy)
+ return nullptr;
+
// Look up the constant in the table first to ensure uniqueness
Constant *ArgVec[] = { Val, Idx };
- const ExprMapKeyType Key(Instruction::ExtractElement, ArgVec);
+ const ConstantExprKeyType Key(Instruction::ExtractElement, ArgVec);
LLVMContextImpl *pImpl = Val->getContext().pImpl;
- Type *ReqTy = Val->getType()->getVectorElementType();
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
}
-Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
- Constant *Idx) {
+Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
+ Constant *Idx, Type *OnlyIfReducedTy) {
assert(Val->getType()->isVectorTy() &&
"Tried to create insertelement operation on non-vector type!");
assert(Elt->getType() == Val->getType()->getVectorElementType() &&
@@ -2011,16 +2107,20 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
return FC; // Fold a few common cases.
+
+ if (OnlyIfReducedTy == Val->getType())
+ return nullptr;
+
// Look up the constant in the table first to ensure uniqueness
Constant *ArgVec[] = { Val, Elt, Idx };
- const ExprMapKeyType Key(Instruction::InsertElement, ArgVec);
+ const ConstantExprKeyType Key(Instruction::InsertElement, ArgVec);
LLVMContextImpl *pImpl = Val->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
}
-Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
- Constant *Mask) {
+Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
+ Constant *Mask, Type *OnlyIfReducedTy) {
assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
"Invalid shuffle vector constant expr operands!");
@@ -2031,16 +2131,20 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
Type *EltTy = V1->getType()->getVectorElementType();
Type *ShufTy = VectorType::get(EltTy, NElts);
+ if (OnlyIfReducedTy == ShufTy)
+ return nullptr;
+
// Look up the constant in the table first to ensure uniqueness
Constant *ArgVec[] = { V1, V2, Mask };
- const ExprMapKeyType Key(Instruction::ShuffleVector, ArgVec);
+ const ConstantExprKeyType Key(Instruction::ShuffleVector, ArgVec);
LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
}
Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
- ArrayRef<unsigned> Idxs) {
+ ArrayRef<unsigned> Idxs,
+ Type *OnlyIfReducedTy) {
assert(Agg->getType()->isFirstClassType() &&
"Non-first-class type for constant insertvalue expression");
@@ -2052,15 +2156,18 @@ Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
if (Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs))
return FC;
+ if (OnlyIfReducedTy == ReqTy)
+ return nullptr;
+
Constant *ArgVec[] = { Agg, Val };
- const ExprMapKeyType Key(Instruction::InsertValue, ArgVec, 0, 0, Idxs);
+ const ConstantExprKeyType Key(Instruction::InsertValue, ArgVec, 0, 0, Idxs);
LLVMContextImpl *pImpl = Agg->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
}
-Constant *ConstantExpr::getExtractValue(Constant *Agg,
- ArrayRef<unsigned> Idxs) {
+Constant *ConstantExpr::getExtractValue(Constant *Agg, ArrayRef<unsigned> Idxs,
+ Type *OnlyIfReducedTy) {
assert(Agg->getType()->isFirstClassType() &&
"Tried to create extractelement operation on non-first-class type!");
@@ -2073,8 +2180,11 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
if (Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs))
return FC;
+ if (OnlyIfReducedTy == ReqTy)
+ return nullptr;
+
Constant *ArgVec[] = { Agg };
- const ExprMapKeyType Key(Instruction::ExtractValue, ArgVec, 0, 0, Idxs);
+ const ConstantExprKeyType Key(Instruction::ExtractValue, ArgVec, 0, 0, Idxs);
LLVMContextImpl *pImpl = Agg->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
@@ -2326,14 +2436,16 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
return ConstantAggregateZero::get(Ty);
// Do a lookup to see if we have already formed one of these.
- StringMap<ConstantDataSequential*>::MapEntryTy &Slot =
- Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements);
+ auto &Slot =
+ *Ty->getContext()
+ .pImpl->CDSConstants.insert(std::make_pair(Elements, nullptr))
+ .first;
// The bucket can point to a linked list of different CDS's that have the same
// body but different types. For example, 0,0,0,1 could be a 4 element array
// of i8, or a 1-element array of i32. They'll both end up in the same
/// StringMap bucket, linked up by their Next pointers. Walk the list.
- ConstantDataSequential **Entry = &Slot.getValue();
+ ConstantDataSequential **Entry = &Slot.second;
for (ConstantDataSequential *Node = *Entry; Node;
Entry = &Node->Next, Node = *Entry)
if (Node->getType() == Ty)
@@ -2342,10 +2454,10 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
// Okay, we didn't get a hit. Create a node of the right class, link it in,
// and return it.
if (isa<ArrayType>(Ty))
- return *Entry = new ConstantDataArray(Ty, Slot.getKeyData());
+ return *Entry = new ConstantDataArray(Ty, Slot.first().data());
assert(isa<VectorType>(Ty));
- return *Entry = new ConstantDataVector(Ty, Slot.getKeyData());
+ return *Entry = new ConstantDataVector(Ty, Slot.first().data());
}
void ConstantDataSequential::destroyConstant() {
@@ -2431,7 +2543,7 @@ Constant *ConstantDataArray::getString(LLVMContext &Context,
StringRef Str, bool AddNull) {
if (!AddNull) {
const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data());
- return get(Context, ArrayRef<uint8_t>(const_cast<uint8_t *>(Data),
+ return get(Context, makeArrayRef(const_cast<uint8_t *>(Data),
Str.size()));
}
@@ -2602,7 +2714,7 @@ bool ConstantDataSequential::isCString() const {
}
/// getSplatValue - If this is a splat constant, meaning that all of the
-/// elements have the same value, return that value. Otherwise return NULL.
+/// elements have the same value, return that value. Otherwise return nullptr.
Constant *ConstantDataVector::getSplatValue() const {
const char *Base = getRawDataValues().data();
@@ -2630,16 +2742,23 @@ Constant *ConstantDataVector::getSplatValue() const {
/// work, but would be really slow because it would have to unique each updated
/// array instance.
///
+void Constant::replaceUsesOfWithOnConstantImpl(Constant *Replacement) {
+ // I do need to replace this with an existing value.
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ replaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
+}
+
void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
Use *U) {
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
Constant *ToC = cast<Constant>(To);
- LLVMContextImpl *pImpl = getType()->getContext().pImpl;
-
SmallVector<Constant*, 8> Values;
- LLVMContextImpl::ArrayConstantsTy::LookupKey Lookup;
- Lookup.first = cast<ArrayType>(getType());
Values.reserve(getNumOperands()); // Build replacement array.
// Fill values with the modified operands of the constant array. Also,
@@ -2658,51 +2777,25 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
AllSame &= Val == ToC;
}
- Constant *Replacement = nullptr;
if (AllSame && ToC->isNullValue()) {
- Replacement = ConstantAggregateZero::get(getType());
- } else if (AllSame && isa<UndefValue>(ToC)) {
- Replacement = UndefValue::get(getType());
- } else {
- // Check to see if we have this array type already.
- Lookup.second = makeArrayRef(Values);
- LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
- pImpl->ArrayConstants.find(Lookup);
-
- if (I != pImpl->ArrayConstants.map_end()) {
- Replacement = I->first;
- } else {
- // Okay, the new shape doesn't exist in the system yet. Instead of
- // creating a new constant array, inserting it, replaceallusesof'ing the
- // old with the new, then deleting the old... just update the current one
- // in place!
- pImpl->ArrayConstants.remove(this);
-
- // Update to the new value. Optimize for the case when we have a single
- // operand that we're changing, but handle bulk updates efficiently.
- if (NumUpdated == 1) {
- unsigned OperandToUpdate = U - OperandList;
- assert(getOperand(OperandToUpdate) == From &&
- "ReplaceAllUsesWith broken!");
- setOperand(OperandToUpdate, ToC);
- } else {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (getOperand(i) == From)
- setOperand(i, ToC);
- }
- pImpl->ArrayConstants.insert(this);
- return;
- }
+ replaceUsesOfWithOnConstantImpl(ConstantAggregateZero::get(getType()));
+ return;
+ }
+ if (AllSame && isa<UndefValue>(ToC)) {
+ replaceUsesOfWithOnConstantImpl(UndefValue::get(getType()));
+ return;
}
- // Otherwise, I do need to replace this with an existing value.
- assert(Replacement != this && "I didn't contain From!");
-
- // Everyone using this now uses the replacement.
- replaceAllUsesWith(Replacement);
+ // Check for any other type of constant-folding.
+ if (Constant *C = getImpl(getType(), Values)) {
+ replaceUsesOfWithOnConstantImpl(C);
+ return;
+ }
- // Delete the old constant!
- destroyConstant();
+ // Update to the new value.
+ if (Constant *C = getContext().pImpl->ArrayConstants.replaceOperandsInPlace(
+ Values, this, From, ToC, NumUpdated, U - OperandList))
+ replaceUsesOfWithOnConstantImpl(C);
}
void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
@@ -2714,8 +2807,6 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
SmallVector<Constant*, 8> Values;
- LLVMContextImpl::StructConstantsTy::LookupKey Lookup;
- Lookup.first = cast<StructType>(getType());
Values.reserve(getNumOperands()); // Build replacement struct.
// Fill values with the modified operands of the constant struct. Also,
@@ -2742,64 +2833,47 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
}
Values[OperandToUpdate] = ToC;
- LLVMContextImpl *pImpl = getContext().pImpl;
-
- Constant *Replacement = nullptr;
if (isAllZeros) {
- Replacement = ConstantAggregateZero::get(getType());
- } else if (isAllUndef) {
- Replacement = UndefValue::get(getType());
- } else {
- // Check to see if we have this struct type already.
- Lookup.second = makeArrayRef(Values);
- LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
- pImpl->StructConstants.find(Lookup);
-
- if (I != pImpl->StructConstants.map_end()) {
- Replacement = I->first;
- } else {
- // Okay, the new shape doesn't exist in the system yet. Instead of
- // creating a new constant struct, inserting it, replaceallusesof'ing the
- // old with the new, then deleting the old... just update the current one
- // in place!
- pImpl->StructConstants.remove(this);
-
- // Update to the new value.
- setOperand(OperandToUpdate, ToC);
- pImpl->StructConstants.insert(this);
- return;
- }
+ replaceUsesOfWithOnConstantImpl(ConstantAggregateZero::get(getType()));
+ return;
+ }
+ if (isAllUndef) {
+ replaceUsesOfWithOnConstantImpl(UndefValue::get(getType()));
+ return;
}
- assert(Replacement != this && "I didn't contain From!");
-
- // Everyone using this now uses the replacement.
- replaceAllUsesWith(Replacement);
-
- // Delete the old constant!
- destroyConstant();
+ // Update to the new value.
+ if (Constant *C = getContext().pImpl->StructConstants.replaceOperandsInPlace(
+ Values, this, From, ToC))
+ replaceUsesOfWithOnConstantImpl(C);
}
void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
Use *U) {
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
SmallVector<Constant*, 8> Values;
Values.reserve(getNumOperands()); // Build replacement array...
+ unsigned NumUpdated = 0;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
Constant *Val = getOperand(i);
- if (Val == From) Val = cast<Constant>(To);
+ if (Val == From) {
+ ++NumUpdated;
+ Val = ToC;
+ }
Values.push_back(Val);
}
- Constant *Replacement = get(Values);
- assert(Replacement != this && "I didn't contain From!");
-
- // Everyone using this now uses the replacement.
- replaceAllUsesWith(Replacement);
+ if (Constant *C = getImpl(Values)) {
+ replaceUsesOfWithOnConstantImpl(C);
+ return;
+ }
- // Delete the old constant!
- destroyConstant();
+ // Update to the new value.
+ if (Constant *C = getContext().pImpl->VectorConstants.replaceOperandsInPlace(
+ Values, this, From, ToC, NumUpdated, U - OperandList))
+ replaceUsesOfWithOnConstantImpl(C);
}
void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
@@ -2808,19 +2882,26 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
Constant *To = cast<Constant>(ToV);
SmallVector<Constant*, 8> NewOps;
+ unsigned NumUpdated = 0;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
Constant *Op = getOperand(i);
- NewOps.push_back(Op == From ? To : Op);
+ if (Op == From) {
+ ++NumUpdated;
+ Op = To;
+ }
+ NewOps.push_back(Op);
}
+ assert(NumUpdated && "I didn't contain From!");
- Constant *Replacement = getWithOperands(NewOps);
- assert(Replacement != this && "I didn't contain From!");
-
- // Everyone using this now uses the replacement.
- replaceAllUsesWith(Replacement);
+ if (Constant *C = getWithOperands(NewOps, getType(), true)) {
+ replaceUsesOfWithOnConstantImpl(C);
+ return;
+ }
- // Delete the old constant!
- destroyConstant();
+ // Update to the new value.
+ if (Constant *C = getContext().pImpl->ExprConstants.replaceOperandsInPlace(
+ NewOps, this, From, To, NumUpdated, U - OperandList))
+ replaceUsesOfWithOnConstantImpl(C);
}
Instruction *ConstantExpr::getAsInstruction() {
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index f06509f..571dec2 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CONSTANTSCONTEXT_H
-#define LLVM_CONSTANTSCONTEXT_H
+#ifndef LLVM_LIB_IR_CONSTANTSCONTEXT_H
+#define LLVM_LIB_IR_CONSTANTSCONTEXT_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
@@ -29,8 +29,6 @@
#define DEBUG_TYPE "ir"
namespace llvm {
-template<class ValType>
-struct ConstantTraits;
/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
/// behind the scenes to implement unary constant exprs.
@@ -169,11 +167,10 @@ public:
void *operator new(size_t s) {
return User::operator new(s, 1);
}
- ExtractValueConstantExpr(Constant *Agg,
- const SmallVector<unsigned, 4> &IdxList,
+ ExtractValueConstantExpr(Constant *Agg, ArrayRef<unsigned> IdxList,
Type *DestTy)
- : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
- Indices(IdxList) {
+ : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+ Indices(IdxList.begin(), IdxList.end()) {
Op<0>() = Agg;
}
@@ -196,10 +193,9 @@ public:
return User::operator new(s, 2);
}
InsertValueConstantExpr(Constant *Agg, Constant *Val,
- const SmallVector<unsigned, 4> &IdxList,
- Type *DestTy)
- : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
- Indices(IdxList) {
+ ArrayRef<unsigned> IdxList, Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+ Indices(IdxList.begin(), IdxList.end()) {
Op<0>() = Agg;
Op<1>() = Val;
}
@@ -316,379 +312,241 @@ struct OperandTraits<CompareConstantExpr> :
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
-struct ExprMapKeyType {
- ExprMapKeyType(unsigned opc,
- ArrayRef<Constant*> ops,
- unsigned short flags = 0,
- unsigned short optionalflags = 0,
- ArrayRef<unsigned> inds = None)
- : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
- operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
- uint8_t opcode;
- uint8_t subclassoptionaldata;
- uint16_t subclassdata;
- std::vector<Constant*> operands;
- SmallVector<unsigned, 4> indices;
- bool operator==(const ExprMapKeyType& that) const {
- return this->opcode == that.opcode &&
- this->subclassdata == that.subclassdata &&
- this->subclassoptionaldata == that.subclassoptionaldata &&
- this->operands == that.operands &&
- this->indices == that.indices;
- }
- bool operator<(const ExprMapKeyType & that) const {
- return std::tie(opcode, operands, subclassdata, subclassoptionaldata,
- indices) <
- std::tie(that.opcode, that.operands, that.subclassdata,
- that.subclassoptionaldata, that.indices);
- }
-
- bool operator!=(const ExprMapKeyType& that) const {
- return !(*this == that);
- }
-};
-
-struct InlineAsmKeyType {
- InlineAsmKeyType(StringRef AsmString,
- StringRef Constraints, bool hasSideEffects,
- bool isAlignStack, InlineAsm::AsmDialect asmDialect)
- : asm_string(AsmString), constraints(Constraints),
- has_side_effects(hasSideEffects), is_align_stack(isAlignStack),
- asm_dialect(asmDialect) {}
- std::string asm_string;
- std::string constraints;
- bool has_side_effects;
- bool is_align_stack;
- InlineAsm::AsmDialect asm_dialect;
- bool operator==(const InlineAsmKeyType& that) const {
- return this->asm_string == that.asm_string &&
- this->constraints == that.constraints &&
- this->has_side_effects == that.has_side_effects &&
- this->is_align_stack == that.is_align_stack &&
- this->asm_dialect == that.asm_dialect;
- }
- bool operator<(const InlineAsmKeyType& that) const {
- return std::tie(asm_string, constraints, has_side_effects, is_align_stack,
- asm_dialect) <
- std::tie(that.asm_string, that.constraints, that.has_side_effects,
- that.is_align_stack, that.asm_dialect);
- }
-
- bool operator!=(const InlineAsmKeyType& that) const {
- return !(*this == that);
- }
-};
+template <class ConstantClass> struct ConstantAggrKeyType;
+struct InlineAsmKeyType;
+struct ConstantExprKeyType;
-// The number of operands for each ConstantCreator::create method is
-// determined by the ConstantTraits template.
-// ConstantCreator - A class that is used to create constants by
-// ConstantUniqueMap*. This class should be partially specialized if there is
-// something strange that needs to be done to interface to the ctor for the
-// constant.
-//
-template<typename T, typename Alloc>
-struct ConstantTraits< std::vector<T, Alloc> > {
- static unsigned uses(const std::vector<T, Alloc>& v) {
- return v.size();
- }
-};
-
-template<>
-struct ConstantTraits<Constant *> {
- static unsigned uses(Constant * const & v) {
- return 1;
- }
+template <class ConstantClass> struct ConstantInfo;
+template <> struct ConstantInfo<ConstantExpr> {
+ typedef ConstantExprKeyType ValType;
+ typedef Type TypeClass;
};
-
-template<class ConstantClass, class TypeClass, class ValType>
-struct ConstantCreator {
- static ConstantClass *create(TypeClass *Ty, const ValType &V) {
- return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+template <> struct ConstantInfo<InlineAsm> {
+ typedef InlineAsmKeyType ValType;
+ typedef PointerType TypeClass;
+};
+template <> struct ConstantInfo<ConstantArray> {
+ typedef ConstantAggrKeyType<ConstantArray> ValType;
+ typedef ArrayType TypeClass;
+};
+template <> struct ConstantInfo<ConstantStruct> {
+ typedef ConstantAggrKeyType<ConstantStruct> ValType;
+ typedef StructType TypeClass;
+};
+template <> struct ConstantInfo<ConstantVector> {
+ typedef ConstantAggrKeyType<ConstantVector> ValType;
+ typedef VectorType TypeClass;
+};
+
+template <class ConstantClass> struct ConstantAggrKeyType {
+ ArrayRef<Constant *> Operands;
+ ConstantAggrKeyType(ArrayRef<Constant *> Operands) : Operands(Operands) {}
+ ConstantAggrKeyType(ArrayRef<Constant *> Operands, const ConstantClass *)
+ : Operands(Operands) {}
+ ConstantAggrKeyType(const ConstantClass *C,
+ SmallVectorImpl<Constant *> &Storage) {
+ assert(Storage.empty() && "Expected empty storage");
+ for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
+ Storage.push_back(C->getOperand(I));
+ Operands = Storage;
+ }
+
+ bool operator==(const ConstantAggrKeyType &X) const {
+ return Operands == X.Operands;
+ }
+ bool operator==(const ConstantClass *C) const {
+ if (Operands.size() != C->getNumOperands())
+ return false;
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I)
+ if (Operands[I] != C->getOperand(I))
+ return false;
+ return true;
}
-};
-
-template<class ConstantClass, class TypeClass>
-struct ConstantArrayCreator {
- static ConstantClass *create(TypeClass *Ty, ArrayRef<Constant*> V) {
- return new(V.size()) ConstantClass(Ty, V);
+ unsigned getHash() const {
+ return hash_combine_range(Operands.begin(), Operands.end());
}
-};
-template<class ConstantClass>
-struct ConstantKeyData {
- typedef void ValType;
- static ValType getValType(ConstantClass *C) {
- llvm_unreachable("Unknown Constant type!");
+ typedef typename ConstantInfo<ConstantClass>::TypeClass TypeClass;
+ ConstantClass *create(TypeClass *Ty) const {
+ return new (Operands.size()) ConstantClass(Ty, Operands);
}
};
-template<>
-struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
- static ConstantExpr *create(Type *Ty, const ExprMapKeyType &V,
- unsigned short pred = 0) {
- if (Instruction::isCast(V.opcode))
- return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
- if ((V.opcode >= Instruction::BinaryOpsBegin &&
- V.opcode < Instruction::BinaryOpsEnd))
- return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
- V.subclassoptionaldata);
- if (V.opcode == Instruction::Select)
- return new SelectConstantExpr(V.operands[0], V.operands[1],
- V.operands[2]);
- if (V.opcode == Instruction::ExtractElement)
- return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
- if (V.opcode == Instruction::InsertElement)
- return new InsertElementConstantExpr(V.operands[0], V.operands[1],
- V.operands[2]);
- if (V.opcode == Instruction::ShuffleVector)
- return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
- V.operands[2]);
- if (V.opcode == Instruction::InsertValue)
- return new InsertValueConstantExpr(V.operands[0], V.operands[1],
- V.indices, Ty);
- if (V.opcode == Instruction::ExtractValue)
- return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
- if (V.opcode == Instruction::GetElementPtr) {
- std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
- return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
- V.subclassoptionaldata);
+struct InlineAsmKeyType {
+ StringRef AsmString;
+ StringRef Constraints;
+ bool HasSideEffects;
+ bool IsAlignStack;
+ InlineAsm::AsmDialect AsmDialect;
+
+ InlineAsmKeyType(StringRef AsmString, StringRef Constraints,
+ bool HasSideEffects, bool IsAlignStack,
+ InlineAsm::AsmDialect AsmDialect)
+ : AsmString(AsmString), Constraints(Constraints),
+ HasSideEffects(HasSideEffects), IsAlignStack(IsAlignStack),
+ AsmDialect(AsmDialect) {}
+ InlineAsmKeyType(const InlineAsm *Asm, SmallVectorImpl<Constant *> &)
+ : AsmString(Asm->getAsmString()), Constraints(Asm->getConstraintString()),
+ HasSideEffects(Asm->hasSideEffects()),
+ IsAlignStack(Asm->isAlignStack()), AsmDialect(Asm->getDialect()) {}
+
+ bool operator==(const InlineAsmKeyType &X) const {
+ return HasSideEffects == X.HasSideEffects &&
+ IsAlignStack == X.IsAlignStack && AsmDialect == X.AsmDialect &&
+ AsmString == X.AsmString && Constraints == X.Constraints;
+ }
+ bool operator==(const InlineAsm *Asm) const {
+ return HasSideEffects == Asm->hasSideEffects() &&
+ IsAlignStack == Asm->isAlignStack() &&
+ AsmDialect == Asm->getDialect() &&
+ AsmString == Asm->getAsmString() &&
+ Constraints == Asm->getConstraintString();
+ }
+ unsigned getHash() const {
+ return hash_combine(AsmString, Constraints, HasSideEffects, IsAlignStack,
+ AsmDialect);
+ }
+
+ typedef ConstantInfo<InlineAsm>::TypeClass TypeClass;
+ InlineAsm *create(TypeClass *Ty) const {
+ return new InlineAsm(Ty, AsmString, Constraints, HasSideEffects,
+ IsAlignStack, AsmDialect);
+ }
+};
+
+struct ConstantExprKeyType {
+ uint8_t Opcode;
+ uint8_t SubclassOptionalData;
+ uint16_t SubclassData;
+ ArrayRef<Constant *> Ops;
+ ArrayRef<unsigned> Indexes;
+
+ ConstantExprKeyType(unsigned Opcode, ArrayRef<Constant *> Ops,
+ unsigned short SubclassData = 0,
+ unsigned short SubclassOptionalData = 0,
+ ArrayRef<unsigned> Indexes = None)
+ : Opcode(Opcode), SubclassOptionalData(SubclassOptionalData),
+ SubclassData(SubclassData), Ops(Ops), Indexes(Indexes) {}
+ ConstantExprKeyType(ArrayRef<Constant *> Operands, const ConstantExpr *CE)
+ : Opcode(CE->getOpcode()),
+ SubclassOptionalData(CE->getRawSubclassOptionalData()),
+ SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands),
+ Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef<unsigned>()) {}
+ ConstantExprKeyType(const ConstantExpr *CE,
+ SmallVectorImpl<Constant *> &Storage)
+ : Opcode(CE->getOpcode()),
+ SubclassOptionalData(CE->getRawSubclassOptionalData()),
+ SubclassData(CE->isCompare() ? CE->getPredicate() : 0),
+ Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef<unsigned>()) {
+ assert(Storage.empty() && "Expected empty storage");
+ for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I)
+ Storage.push_back(CE->getOperand(I));
+ Ops = Storage;
+ }
+
+ bool operator==(const ConstantExprKeyType &X) const {
+ return Opcode == X.Opcode && SubclassData == X.SubclassData &&
+ SubclassOptionalData == X.SubclassOptionalData && Ops == X.Ops &&
+ Indexes == X.Indexes;
+ }
+
+ bool operator==(const ConstantExpr *CE) const {
+ if (Opcode != CE->getOpcode())
+ return false;
+ if (SubclassOptionalData != CE->getRawSubclassOptionalData())
+ return false;
+ if (Ops.size() != CE->getNumOperands())
+ return false;
+ if (SubclassData != (CE->isCompare() ? CE->getPredicate() : 0))
+ return false;
+ for (unsigned I = 0, E = Ops.size(); I != E; ++I)
+ if (Ops[I] != CE->getOperand(I))
+ return false;
+ if (Indexes != (CE->hasIndices() ? CE->getIndices() : ArrayRef<unsigned>()))
+ return false;
+ return true;
+ }
+
+ unsigned getHash() const {
+ return hash_combine(Opcode, SubclassOptionalData, SubclassData,
+ hash_combine_range(Ops.begin(), Ops.end()),
+ hash_combine_range(Indexes.begin(), Indexes.end()));
+ }
+
+ typedef ConstantInfo<ConstantExpr>::TypeClass TypeClass;
+ ConstantExpr *create(TypeClass *Ty) const {
+ switch (Opcode) {
+ default:
+ if (Instruction::isCast(Opcode))
+ return new UnaryConstantExpr(Opcode, Ops[0], Ty);
+ if ((Opcode >= Instruction::BinaryOpsBegin &&
+ Opcode < Instruction::BinaryOpsEnd))
+ return new BinaryConstantExpr(Opcode, Ops[0], Ops[1],
+ SubclassOptionalData);
+ llvm_unreachable("Invalid ConstantExpr!");
+ case Instruction::Select:
+ return new SelectConstantExpr(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return new ExtractElementConstantExpr(Ops[0], Ops[1]);
+ case Instruction::InsertElement:
+ return new InsertElementConstantExpr(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ShuffleVector:
+ return new ShuffleVectorConstantExpr(Ops[0], Ops[1], Ops[2]);
+ case Instruction::InsertValue:
+ return new InsertValueConstantExpr(Ops[0], Ops[1], Indexes, Ty);
+ case Instruction::ExtractValue:
+ return new ExtractValueConstantExpr(Ops[0], Indexes, Ty);
+ case Instruction::GetElementPtr:
+ return GetElementPtrConstantExpr::Create(Ops[0], Ops.slice(1), Ty,
+ SubclassOptionalData);
+ case Instruction::ICmp:
+ return new CompareConstantExpr(Ty, Instruction::ICmp, SubclassData,
+ Ops[0], Ops[1]);
+ case Instruction::FCmp:
+ return new CompareConstantExpr(Ty, Instruction::FCmp, SubclassData,
+ Ops[0], Ops[1]);
}
-
- // The compare instructions are weird. We have to encode the predicate
- // value and it is combined with the instruction opcode by multiplying
- // the opcode by one hundred. We must decode this to get the predicate.
- if (V.opcode == Instruction::ICmp)
- return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
- V.operands[0], V.operands[1]);
- if (V.opcode == Instruction::FCmp)
- return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
- V.operands[0], V.operands[1]);
- llvm_unreachable("Invalid ConstantExpr!");
- }
-};
-
-template<>
-struct ConstantKeyData<ConstantExpr> {
- typedef ExprMapKeyType ValType;
- static ValType getValType(ConstantExpr *CE) {
- std::vector<Constant*> Operands;
- Operands.reserve(CE->getNumOperands());
- for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
- Operands.push_back(cast<Constant>(CE->getOperand(i)));
- return ExprMapKeyType(CE->getOpcode(), Operands,
- CE->isCompare() ? CE->getPredicate() : 0,
- CE->getRawSubclassOptionalData(),
- CE->hasIndices() ?
- CE->getIndices() : ArrayRef<unsigned>());
- }
-};
-
-template<>
-struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
- static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) {
- return new InlineAsm(Ty, Key.asm_string, Key.constraints,
- Key.has_side_effects, Key.is_align_stack,
- Key.asm_dialect);
}
};
-template<>
-struct ConstantKeyData<InlineAsm> {
- typedef InlineAsmKeyType ValType;
- static ValType getValType(InlineAsm *Asm) {
- return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
- Asm->hasSideEffects(), Asm->isAlignStack(),
- Asm->getDialect());
- }
-};
-
-template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
- bool HasLargeKey = false /*true for arrays and structs*/ >
-class ConstantUniqueMap {
-public:
- typedef std::pair<TypeClass*, ValType> MapKey;
- typedef std::map<MapKey, ConstantClass *> MapTy;
- typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
-private:
- /// Map - This is the main map from the element descriptor to the Constants.
- /// This is the primary way we avoid creating two of the same shape
- /// constant.
- MapTy Map;
-
- /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
- /// from the constants to their element in Map. This is important for
- /// removal of constants from the array, which would otherwise have to scan
- /// through the map with very large keys.
- InverseMapTy InverseMap;
-
-public:
- typename MapTy::iterator map_begin() { return Map.begin(); }
- typename MapTy::iterator map_end() { return Map.end(); }
-
- void freeConstants() {
- for (typename MapTy::iterator I=Map.begin(), E=Map.end();
- I != E; ++I) {
- // Asserts that use_empty().
- delete I->second;
- }
- }
-
- /// InsertOrGetItem - Return an iterator for the specified element.
- /// If the element exists in the map, the returned iterator points to the
- /// entry and Exists=true. If not, the iterator points to the newly
- /// inserted entry and returns Exists=false. Newly inserted entries have
- /// I->second == 0, and should be filled in.
- typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
- &InsertVal,
- bool &Exists) {
- std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
- Exists = !IP.second;
- return IP.first;
- }
-
-private:
- typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
- if (HasLargeKey) {
- typename InverseMapTy::iterator IMI = InverseMap.find(CP);
- assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
- IMI->second->second == CP &&
- "InverseMap corrupt!");
- return IMI->second;
- }
-
- typename MapTy::iterator I =
- Map.find(MapKey(static_cast<TypeClass*>(CP->getType()),
- ConstantKeyData<ConstantClass>::getValType(CP)));
- if (I == Map.end() || I->second != CP) {
- // FIXME: This should not use a linear scan. If this gets to be a
- // performance problem, someone should look at this.
- for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
- /* empty */;
- }
- return I;
- }
-
- ConstantClass *Create(TypeClass *Ty, ValRefType V,
- typename MapTy::iterator I) {
- ConstantClass* Result =
- ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
-
- assert(Result->getType() == Ty && "Type specified is not correct!");
- I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
-
- if (HasLargeKey) // Remember the reverse mapping if needed.
- InverseMap.insert(std::make_pair(Result, I));
-
- return Result;
- }
+template <class ConstantClass> class ConstantUniqueMap {
public:
-
- /// getOrCreate - Return the specified constant from the map, creating it if
- /// necessary.
- ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) {
- MapKey Lookup(Ty, V);
- ConstantClass* Result = nullptr;
-
- typename MapTy::iterator I = Map.find(Lookup);
- // Is it in the map?
- if (I != Map.end())
- Result = I->second;
-
- if (!Result) {
- // If no preexisting value, create one now...
- Result = Create(Ty, V, I);
- }
-
- return Result;
- }
-
- void remove(ConstantClass *CP) {
- typename MapTy::iterator I = FindExistingElement(CP);
- assert(I != Map.end() && "Constant not found in constant table!");
- assert(I->second == CP && "Didn't find correct element?");
-
- if (HasLargeKey) // Remember the reverse mapping if needed.
- InverseMap.erase(CP);
-
- Map.erase(I);
- }
-
- /// MoveConstantToNewSlot - If we are about to change C to be the element
- /// specified by I, update our internal data structures to reflect this
- /// fact.
- void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
- // First, remove the old location of the specified constant in the map.
- typename MapTy::iterator OldI = FindExistingElement(C);
- assert(OldI != Map.end() && "Constant not found in constant table!");
- assert(OldI->second == C && "Didn't find correct element?");
-
- // Remove the old entry from the map.
- Map.erase(OldI);
-
- // Update the inverse map so that we know that this constant is now
- // located at descriptor I.
- if (HasLargeKey) {
- assert(I->second == C && "Bad inversemap entry!");
- InverseMap[C] = I;
- }
- }
+ typedef typename ConstantInfo<ConstantClass>::ValType ValType;
+ typedef typename ConstantInfo<ConstantClass>::TypeClass TypeClass;
+ typedef std::pair<TypeClass *, ValType> LookupKey;
- void dump() const {
- DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
- }
-};
-
-// Unique map for aggregate constants
-template<class TypeClass, class ConstantClass>
-class ConstantAggrUniqueMap {
-public:
- typedef ArrayRef<Constant*> Operands;
- typedef std::pair<TypeClass*, Operands> LookupKey;
private:
struct MapInfo {
- typedef DenseMapInfo<ConstantClass*> ConstantClassInfo;
- typedef DenseMapInfo<Constant*> ConstantInfo;
- typedef DenseMapInfo<TypeClass*> TypeClassInfo;
- static inline ConstantClass* getEmptyKey() {
+ typedef DenseMapInfo<ConstantClass *> ConstantClassInfo;
+ static inline ConstantClass *getEmptyKey() {
return ConstantClassInfo::getEmptyKey();
}
- static inline ConstantClass* getTombstoneKey() {
+ static inline ConstantClass *getTombstoneKey() {
return ConstantClassInfo::getTombstoneKey();
}
static unsigned getHashValue(const ConstantClass *CP) {
- SmallVector<Constant*, 8> CPOperands;
- CPOperands.reserve(CP->getNumOperands());
- for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
- CPOperands.push_back(CP->getOperand(I));
- return getHashValue(LookupKey(CP->getType(), CPOperands));
+ SmallVector<Constant *, 8> Storage;
+ return getHashValue(LookupKey(CP->getType(), ValType(CP, Storage)));
}
static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
return LHS == RHS;
}
static unsigned getHashValue(const LookupKey &Val) {
- return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
- Val.second.end()));
+ return hash_combine(Val.first, Val.second.getHash());
}
static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
- if (LHS.first != RHS->getType()
- || LHS.second.size() != RHS->getNumOperands())
+ if (LHS.first != RHS->getType())
return false;
- for (unsigned I = 0, E = RHS->getNumOperands(); I < E; ++I) {
- if (LHS.second[I] != RHS->getOperand(I))
- return false;
- }
- return true;
+ return LHS.second == RHS;
}
};
+
public:
typedef DenseMap<ConstantClass *, char, MapInfo> MapTy;
private:
- /// Map - This is the main map from the element descriptor to the Constants.
- /// This is the primary way we avoid creating two of the same shape
- /// constant.
MapTy Map;
public:
@@ -696,44 +554,33 @@ public:
typename MapTy::iterator map_end() { return Map.end(); }
void freeConstants() {
- for (typename MapTy::iterator I=Map.begin(), E=Map.end();
- I != E; ++I) {
+ for (auto &I : Map)
// Asserts that use_empty().
- delete I->first;
- }
+ delete I.first;
}
private:
- typename MapTy::iterator findExistingElement(ConstantClass *CP) {
- return Map.find(CP);
- }
-
- ConstantClass *Create(TypeClass *Ty, Operands V, typename MapTy::iterator I) {
- ConstantClass* Result =
- ConstantArrayCreator<ConstantClass,TypeClass>::create(Ty, V);
+ ConstantClass *create(TypeClass *Ty, ValType V) {
+ ConstantClass *Result = V.create(Ty);
assert(Result->getType() == Ty && "Type specified is not correct!");
- Map[Result] = '\0';
+ insert(Result);
return Result;
}
-public:
- /// getOrCreate - Return the specified constant from the map, creating it if
- /// necessary.
- ConstantClass *getOrCreate(TypeClass *Ty, Operands V) {
+public:
+ /// Return the specified constant from the map, creating it if necessary.
+ ConstantClass *getOrCreate(TypeClass *Ty, ValType V) {
LookupKey Lookup(Ty, V);
- ConstantClass* Result = nullptr;
+ ConstantClass *Result = nullptr;
- typename MapTy::iterator I = Map.find_as(Lookup);
- // Is it in the map?
- if (I != Map.end())
+ auto I = find(Lookup);
+ if (I == Map.end())
+ Result = create(Ty, V);
+ else
Result = I->first;
-
- if (!Result) {
- // If no preexisting value, create one now...
- Result = Create(Ty, V, I);
- }
+ assert(Result && "Unexpected nullptr");
return Result;
}
@@ -744,23 +591,44 @@ public:
}
/// Insert the constant into its proper slot.
- void insert(ConstantClass *CP) {
- Map[CP] = '\0';
- }
+ void insert(ConstantClass *CP) { Map[CP] = '\0'; }
/// Remove this constant from the map
void remove(ConstantClass *CP) {
- typename MapTy::iterator I = findExistingElement(CP);
+ typename MapTy::iterator I = Map.find(CP);
assert(I != Map.end() && "Constant not found in constant table!");
assert(I->first == CP && "Didn't find correct element?");
Map.erase(I);
}
- void dump() const {
- DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+ ConstantClass *replaceOperandsInPlace(ArrayRef<Constant *> Operands,
+ ConstantClass *CP, Value *From,
+ Constant *To, unsigned NumUpdated = 0,
+ unsigned OperandNo = ~0u) {
+ LookupKey Lookup(CP->getType(), ValType(Operands, CP));
+ auto I = find(Lookup);
+ if (I != Map.end())
+ return I->first;
+
+ // Update to the new value. Optimize for the case when we have a single
+ // operand that we're changing, but handle bulk updates efficiently.
+ remove(CP);
+ if (NumUpdated == 1) {
+ assert(OperandNo < CP->getNumOperands() && "Invalid index");
+ assert(CP->getOperand(OperandNo) != To && "I didn't contain From!");
+ CP->setOperand(OperandNo, To);
+ } else {
+ for (unsigned I = 0, E = CP->getNumOperands(); I != E; ++I)
+ if (CP->getOperand(I) == From)
+ CP->setOperand(I, To);
+ }
+ insert(CP);
+ return nullptr;
}
+
+ void dump() const { DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n"); }
};
-}
+} // end namespace llvm
#endif
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 87099a6..3576137 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -183,20 +183,22 @@ void LLVMDumpModule(LLVMModuleRef M) {
LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
char **ErrorMessage) {
- std::string error;
- raw_fd_ostream dest(Filename, error, sys::fs::F_Text);
- if (!error.empty()) {
- *ErrorMessage = strdup(error.c_str());
+ std::error_code EC;
+ raw_fd_ostream dest(Filename, EC, sys::fs::F_Text);
+ if (EC) {
+ *ErrorMessage = strdup(EC.message().c_str());
return true;
}
unwrap(M)->print(dest, nullptr);
- if (!error.empty()) {
- *ErrorMessage = strdup(error.c_str());
+ dest.close();
+
+ if (dest.has_error()) {
+ *ErrorMessage = strdup("Error printing to file");
return true;
}
- dest.flush();
+
return false;
}
@@ -558,8 +560,8 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
}
void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
- unwrap<Instruction>(Inst)->setMetadata(KindID,
- MD ? unwrap<MDNode>(MD) : nullptr);
+ unwrap<Instruction>(Inst)
+ ->setMetadata(KindID, MD ? unwrap<MDNode>(MD) : nullptr);
}
/*--.. Conversion functions ................................................--*/
@@ -603,6 +605,11 @@ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
return wrap(cast<User>(V)->getOperand(Index));
}
+LLVMUseRef LLVMGetOperandUse(LLVMValueRef Val, unsigned Index) {
+ Value *V = unwrap(Val);
+ return wrap(&cast<User>(V)->getOperandUse(Index));
+}
+
void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
unwrap<User>(Val)->setOperand(Index, unwrap(Op));
}
@@ -767,6 +774,27 @@ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
}
+double LLVMConstRealGetDouble(LLVMValueRef ConstantVal, LLVMBool *LosesInfo) {
+ ConstantFP *cFP = unwrap<ConstantFP>(ConstantVal) ;
+ Type *Ty = cFP->getType();
+
+ if (Ty->isFloatTy()) {
+ *LosesInfo = false;
+ return cFP->getValueAPF().convertToFloat();
+ }
+
+ if (Ty->isDoubleTy()) {
+ *LosesInfo = false;
+ return cFP->getValueAPF().convertToDouble();
+ }
+
+ bool APFLosesInfo;
+ APFloat APF = cFP->getValueAPF();
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &APFLosesInfo);
+ *LosesInfo = APFLosesInfo;
+ return APF.convertToDouble();
+}
+
/*--.. Operations on composite constants ...................................--*/
LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
@@ -790,11 +818,27 @@ LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
DontNullTerminate);
}
+
+LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef c, unsigned idx) {
+ return wrap(static_cast<ConstantDataSequential*>(unwrap(c))->getElementAsConstant(idx));
+}
+
+LLVMBool LLVMIsConstantString(LLVMValueRef c) {
+ return static_cast<ConstantDataSequential*>(unwrap(c))->isString();
+}
+
+const char *LLVMGetAsString(LLVMValueRef c, size_t* Length) {
+ StringRef str = static_cast<ConstantDataSequential*>(unwrap(c))->getAsString();
+ *Length = str.size();
+ return str.data();
+}
+
LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
LLVMValueRef *ConstantVals, unsigned Length) {
ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
}
+
LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
LLVMBool Packed) {
return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
@@ -1859,12 +1903,27 @@ LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) {
return (LLVMIntPredicate)0;
}
+LLVMRealPredicate LLVMGetFCmpPredicate(LLVMValueRef Inst) {
+ if (FCmpInst *I = dyn_cast<FCmpInst>(unwrap(Inst)))
+ return (LLVMRealPredicate)I->getPredicate();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(unwrap(Inst)))
+ if (CE->getOpcode() == Instruction::FCmp)
+ return (LLVMRealPredicate)CE->getPredicate();
+ return (LLVMRealPredicate)0;
+}
+
LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst) {
if (Instruction *C = dyn_cast<Instruction>(unwrap(Inst)))
return map_to_llvmopcode(C->getOpcode());
return (LLVMOpcode)0;
}
+LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst) {
+ if (Instruction *C = dyn_cast<Instruction>(unwrap(Inst)))
+ return wrap(C->clone());
+ return nullptr;
+}
+
/*--.. Call and invoke instructions ........................................--*/
unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
@@ -1926,6 +1985,34 @@ void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
unwrap<CallInst>(Call)->setTailCall(isTailCall);
}
+/*--.. Operations on terminators ...........................................--*/
+
+unsigned LLVMGetNumSuccessors(LLVMValueRef Term) {
+ return unwrap<TerminatorInst>(Term)->getNumSuccessors();
+}
+
+LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i) {
+ return wrap(unwrap<TerminatorInst>(Term)->getSuccessor(i));
+}
+
+void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block) {
+ return unwrap<TerminatorInst>(Term)->setSuccessor(i,unwrap(block));
+}
+
+/*--.. Operations on branch instructions (only) ............................--*/
+
+LLVMBool LLVMIsConditional(LLVMValueRef Branch) {
+ return unwrap<BranchInst>(Branch)->isConditional();
+}
+
+LLVMValueRef LLVMGetCondition(LLVMValueRef Branch) {
+ return wrap(unwrap<BranchInst>(Branch)->getCondition());
+}
+
+void LLVMSetCondition(LLVMValueRef Branch, LLVMValueRef Cond) {
+ return unwrap<BranchInst>(Branch)->setCondition(unwrap(Cond));
+}
+
/*--.. Operations on switch instructions (only) ............................--*/
LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef Switch) {
@@ -2313,7 +2400,7 @@ static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) {
case LLVMAtomicOrderingSequentiallyConsistent:
return SequentiallyConsistent;
}
-
+
llvm_unreachable("Invalid LLVMAtomicOrdering value!");
}
@@ -2632,10 +2719,9 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(
const char *BufferName,
LLVMBool RequiresNullTerminator) {
- return wrap(MemoryBuffer::getMemBuffer(
- StringRef(InputData, InputDataLength),
- StringRef(BufferName),
- RequiresNullTerminator));
+ return wrap(MemoryBuffer::getMemBuffer(StringRef(InputData, InputDataLength),
+ StringRef(BufferName),
+ RequiresNullTerminator).release());
}
LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
@@ -2643,9 +2729,9 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
size_t InputDataLength,
const char *BufferName) {
- return wrap(MemoryBuffer::getMemBufferCopy(
- StringRef(InputData, InputDataLength),
- StringRef(BufferName)));
+ return wrap(
+ MemoryBuffer::getMemBufferCopy(StringRef(InputData, InputDataLength),
+ StringRef(BufferName)).release());
}
const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf) {
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 218787c..4fe2be6 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -23,10 +23,29 @@
using namespace llvm;
using namespace llvm::dwarf;
-static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
- assert((Tag & LLVMDebugVersionMask) == 0 &&
- "Tag too large for debug encoding!");
- return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+namespace {
+class HeaderBuilder {
+ SmallVector<char, 256> Chars;
+
+public:
+ explicit HeaderBuilder(Twine T) { T.toVector(Chars); }
+ HeaderBuilder(const HeaderBuilder &X) : Chars(X.Chars) {}
+ HeaderBuilder(HeaderBuilder &&X) : Chars(std::move(X.Chars)) {}
+
+ template <class Twineable> HeaderBuilder &concat(Twineable &&X) {
+ Chars.push_back(0);
+ Twine(X).toVector(Chars);
+ return *this;
+ }
+
+ MDString *get(LLVMContext &Context) const {
+ return MDString::get(Context, StringRef(Chars.begin(), Chars.size()));
+ }
+
+ static HeaderBuilder get(unsigned Tag) {
+ return HeaderBuilder("0x" + Twine::utohexstr(Tag));
+ }
+};
}
DIBuilder::DIBuilder(Module &m)
@@ -34,7 +53,6 @@ DIBuilder::DIBuilder(Module &m)
TempRetainTypes(nullptr), TempSubprograms(nullptr), TempGVs(nullptr),
DeclareFn(nullptr), ValueFn(nullptr) {}
-/// finalize - Construct any deferred debug info descriptors.
void DIBuilder::finalize() {
DIArray Enums = getOrCreateArray(AllEnumTypes);
DIType(TempEnumTypes).replaceAllUsesWith(Enums);
@@ -46,7 +64,7 @@ void DIBuilder::finalize() {
// TrackingVHs back into Values.
SmallPtrSet<Value *, 16> RetainSet;
for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
- if (RetainSet.insert(AllRetainTypes[I]))
+ if (RetainSet.insert(AllRetainTypes[I]).second)
RetainValues.push_back(AllRetainTypes[I]);
DIArray RetainTypes = getOrCreateArray(RetainValues);
DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
@@ -55,13 +73,10 @@ void DIBuilder::finalize() {
DIType(TempSubprograms).replaceAllUsesWith(SPs);
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
DISubprogram SP(SPs.getElement(i));
- SmallVector<Value *, 4> Variables;
- if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) {
- for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii)
- Variables.push_back(NMD->getOperand(ii));
- NMD->eraseFromParent();
- }
if (MDNode *Temp = SP.getVariablesNodes()) {
+ SmallVector<Value *, 4> Variables;
+ for (Value *V : PreservedVariables.lookup(SP))
+ Variables.push_back(V);
DIArray AV = getOrCreateArray(Variables);
DIType(Temp).replaceAllUsesWith(AV);
}
@@ -77,8 +92,7 @@ void DIBuilder::finalize() {
DIType(TempImportedModules).replaceAllUsesWith(IMs);
}
-/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
-/// N.
+/// If N is compile unit return NULL otherwise return N.
static MDNode *getNonCompileUnitScope(MDNode *N) {
if (DIDescriptor(N).isCompileUnit())
return nullptr;
@@ -95,8 +109,6 @@ static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
return MDNode::get(VMContext, Pair);
}
-/// createCompileUnit - A CompileUnit provides an anchor for all debugging
-/// information generated during this instance of compilation.
DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
StringRef Directory,
StringRef Producer, bool isOptimized,
@@ -110,7 +122,7 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
"Invalid Language tag");
assert(!Filename.empty() &&
"Unable to create compile unit without filename");
- Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ Value *TElts[] = {HeaderBuilder::get(DW_TAG_base_type).get(VMContext)};
TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
TempRetainTypes = MDNode::getTemporary(VMContext, TElts);
@@ -121,22 +133,18 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
TempImportedModules = MDNode::getTemporary(VMContext, TElts);
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
- createFilePathPair(VMContext, Filename, Directory),
- ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
- MDString::get(VMContext, Producer),
- ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
- MDString::get(VMContext, Flags),
- ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
- TempEnumTypes,
- TempRetainTypes,
- TempSubprograms,
- TempGVs,
- TempImportedModules,
- MDString::get(VMContext, SplitName),
- ConstantInt::get(Type::getInt32Ty(VMContext), Kind)
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_compile_unit)
+ .concat(Lang)
+ .concat(Producer)
+ .concat(isOptimized)
+ .concat(Flags)
+ .concat(RunTimeVer)
+ .concat(SplitName)
+ .concat(Kind)
+ .get(VMContext),
+ createFilePathPair(VMContext, Filename, Directory),
+ TempEnumTypes, TempRetainTypes, TempSubprograms, TempGVs,
+ TempImportedModules};
MDNode *CUNode = MDNode::get(VMContext, Elts);
@@ -158,24 +166,9 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context,
Value *NS, unsigned Line, StringRef Name,
SmallVectorImpl<TrackingVH<MDNode>> &AllImportedModules) {
const MDNode *R;
- if (Name.empty()) {
- Value *Elts[] = {
- GetTagConstant(C, Tag),
- Context,
- NS,
- ConstantInt::get(Type::getInt32Ty(C), Line),
- };
- R = MDNode::get(C, Elts);
- } else {
- Value *Elts[] = {
- GetTagConstant(C, Tag),
- Context,
- NS,
- ConstantInt::get(Type::getInt32Ty(C), Line),
- MDString::get(C, Name)
- };
- R = MDNode::get(C, Elts);
- }
+ Value *Elts[] = {HeaderBuilder::get(Tag).concat(Line).concat(Name).get(C),
+ Context, NS};
+ R = MDNode::get(C, Elts);
DIImportedEntity M(R);
assert(M.Verify() && "Imported module should be valid");
AllImportedModules.push_back(TrackingVH<MDNode>(M));
@@ -197,10 +190,13 @@ DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
}
DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
- DIScope Decl,
+ DIDescriptor Decl,
unsigned Line, StringRef Name) {
+ // Make sure to use the unique identifier based metadata reference for
+ // types that have one.
+ Value *V = Decl.isType() ? static_cast<Value*>(DIType(Decl).getRef()) : Decl;
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
- Context, Decl.getRef(), Line, Name,
+ Context, V, Line, Name,
AllImportedModules);
}
@@ -211,54 +207,44 @@ DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
Context, Imp, Line, Name, AllImportedModules);
}
-/// createFile - Create a file descriptor to hold debugging information
-/// for a file.
DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
- createFilePathPair(VMContext, Filename, Directory)
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_file_type).get(VMContext),
+ createFilePathPair(VMContext, Filename, Directory)};
return DIFile(MDNode::get(VMContext, Elts));
}
-/// createEnumerator - Create a single enumerator value.
DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
assert(!Name.empty() && "Unable to create enumerator without name");
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt64Ty(VMContext), Val)
- };
+ HeaderBuilder::get(dwarf::DW_TAG_enumerator).concat(Name).concat(Val).get(
+ VMContext)};
return DIEnumerator(MDNode::get(VMContext, Elts));
}
-/// \brief Create a DWARF unspecified type.
DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) {
assert(!Name.empty() && "Unable to create type without name");
// Unspecified types are encoded in DIBasicType format. Line number, filename,
// size, alignment, offset and flags are always empty here.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
- nullptr, // Filename
- nullptr, // Unused
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
- ConstantInt::get(Type::getInt32Ty(VMContext), 0) // Encoding
+ HeaderBuilder::get(dwarf::DW_TAG_unspecified_type)
+ .concat(Name)
+ .concat(0)
+ .concat(0)
+ .concat(0)
+ .concat(0)
+ .concat(0)
+ .concat(0)
+ .get(VMContext),
+ nullptr, // Filename
+ nullptr // Unused
};
return DIBasicType(MDNode::get(VMContext, Elts));
}
-/// \brief Create C++11 nullptr type.
DIBasicType DIBuilder::createNullPtrType() {
return createUnspecifiedType("decltype(nullptr)");
}
-/// createBasicType - Create debugging information entry for a basic
-/// type, e.g 'char'.
DIBasicType
DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
uint64_t AlignInBits, unsigned Encoding) {
@@ -266,160 +252,139 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
// Basic types are encoded in DIBasicType format. Line number, filename,
// offset and flags are always empty here.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
- nullptr, // File/directory name
- nullptr, // Unused
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
- ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+ HeaderBuilder::get(dwarf::DW_TAG_base_type)
+ .concat(Name)
+ .concat(0) // Line
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .concat(Encoding)
+ .get(VMContext),
+ nullptr, // Filename
+ nullptr // Unused
};
return DIBasicType(MDNode::get(VMContext, Elts));
}
-/// createQualifiedType - Create debugging information entry for a qualified
-/// type, e.g. 'const int'.
DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
// Qualified types are encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, Tag),
- nullptr, // Filename
- nullptr, // Unused
- MDString::get(VMContext, StringRef()), // Empty name.
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- FromTy.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(Tag)
+ .concat(StringRef()) // Name
+ .concat(0) // Line
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .get(VMContext),
+ nullptr, // Filename
+ nullptr, // Unused
+ FromTy.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createPointerType - Create debugging information entry for a pointer.
DIDerivedType
DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
uint64_t AlignInBits, StringRef Name) {
// Pointer types are encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
- nullptr, // Filename
- nullptr, // Unused
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- PointeeTy.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_pointer_type)
+ .concat(Name)
+ .concat(0) // Line
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .get(VMContext),
+ nullptr, // Filename
+ nullptr, // Unused
+ PointeeTy.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy,
DIType Base) {
// Pointer types are encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
- nullptr, // Filename
- nullptr, // Unused
- nullptr,
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- PointeeTy.getRef(),
- Base.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_ptr_to_member_type)
+ .concat(StringRef())
+ .concat(0) // Line
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .get(VMContext),
+ nullptr, // Filename
+ nullptr, // Unused
+ PointeeTy.getRef(), Base.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createReferenceType - Create debugging information entry for a reference
-/// type.
DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
assert(RTy.isType() && "Unable to create reference type");
// References are encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, Tag),
- nullptr, // Filename
- nullptr, // TheCU,
- nullptr, // Name
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- RTy.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(Tag)
+ .concat(StringRef()) // Name
+ .concat(0) // Line
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .get(VMContext),
+ nullptr, // Filename
+ nullptr, // TheCU,
+ RTy.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createTypedef - Create debugging information entry for a typedef.
DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
unsigned LineNo, DIDescriptor Context) {
// typedefs are encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Context)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- Ty.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_typedef)
+ .concat(Name)
+ .concat(LineNo)
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .get(VMContext),
+ File.getFileNode(),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
+ Ty.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createFriend - Create debugging information entry for a 'friend'.
DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
// typedefs are encoded in DIDerivedType format.
assert(Ty.isType() && "Invalid type!");
assert(FriendTy.isType() && "Invalid friend type!");
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_friend),
- nullptr,
- Ty.getRef(),
- nullptr, // Name
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- FriendTy.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_friend)
+ .concat(StringRef()) // Name
+ .concat(0) // Line
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .get(VMContext),
+ nullptr, Ty.getRef(), FriendTy.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createInheritance - Create debugging information entry to establish
-/// inheritance relationship between two types.
DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
uint64_t BaseOffset,
unsigned Flags) {
assert(Ty.isType() && "Unable to create inheritance");
// TAG_inheritance is encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
- nullptr,
- Ty.getRef(),
- nullptr, // Name
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- BaseTy.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_inheritance)
+ .concat(StringRef()) // Name
+ .concat(0) // Line
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(BaseOffset)
+ .concat(Flags)
+ .get(VMContext),
+ nullptr, Ty.getRef(), BaseTy.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createMemberType - Create debugging information entry for a member.
DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNumber,
uint64_t SizeInBits,
@@ -427,76 +392,41 @@ DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
uint64_t OffsetInBits, unsigned Flags,
DIType Ty) {
// TAG_member is encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_member),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- Ty.getRef()
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(OffsetInBits)
+ .concat(Flags)
+ .get(VMContext),
+ File.getFileNode(),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(),
+ Ty.getRef()};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createStaticMemberType - Create debugging information entry for a
-/// C++ static data member.
-DIDerivedType
-DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
- DIFile File, unsigned LineNumber,
- DIType Ty, unsigned Flags,
- llvm::Value *Val) {
+DIDerivedType DIBuilder::createStaticMemberType(DIDescriptor Scope,
+ StringRef Name, DIFile File,
+ unsigned LineNumber, DIType Ty,
+ unsigned Flags,
+ llvm::Constant *Val) {
// TAG_member is encoded in DIDerivedType format.
Flags |= DIDescriptor::FlagStaticMember;
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_member),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- Ty.getRef(),
- Val
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(0) // Offset
+ .concat(Flags)
+ .get(VMContext),
+ File.getFileNode(),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(),
+ Val};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createObjCIVar - Create debugging information entry for Objective-C
-/// instance variable.
-DIDerivedType
-DIBuilder::createObjCIVar(StringRef Name, DIFile File, unsigned LineNumber,
- uint64_t SizeInBits, uint64_t AlignInBits,
- uint64_t OffsetInBits, unsigned Flags, DIType Ty,
- StringRef PropertyName, StringRef GetterName,
- StringRef SetterName, unsigned PropertyAttributes) {
- // TAG_member is encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_member),
- File.getFileNode(),
- getNonCompileUnitScope(File),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- Ty,
- MDString::get(VMContext, PropertyName),
- MDString::get(VMContext, GetterName),
- MDString::get(VMContext, SetterName),
- ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
- };
- return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-/// createObjCIVar - Create debugging information entry for Objective-C
-/// instance variable.
DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
unsigned LineNumber,
uint64_t SizeInBits,
@@ -504,88 +434,66 @@ DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
uint64_t OffsetInBits, unsigned Flags,
DIType Ty, MDNode *PropertyNode) {
// TAG_member is encoded in DIDerivedType format.
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_member),
- File.getFileNode(),
- getNonCompileUnitScope(File),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- Ty,
- PropertyNode
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(OffsetInBits)
+ .concat(Flags)
+ .get(VMContext),
+ File.getFileNode(), getNonCompileUnitScope(File), Ty,
+ PropertyNode};
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-/// createObjCProperty - Create debugging information entry for Objective-C
-/// property.
DIObjCProperty
DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber,
StringRef GetterName, StringRef SetterName,
unsigned PropertyAttributes, DIType Ty) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property),
- MDString::get(VMContext, Name),
- File,
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- MDString::get(VMContext, GetterName),
- MDString::get(VMContext, SetterName),
- ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes),
- Ty
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_APPLE_property)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(GetterName)
+ .concat(SetterName)
+ .concat(PropertyAttributes)
+ .get(VMContext),
+ File, Ty};
return DIObjCProperty(MDNode::get(VMContext, Elts));
}
-/// createTemplateTypeParameter - Create debugging information for template
-/// type parameter.
DITemplateTypeParameter
DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
DIType Ty, MDNode *File, unsigned LineNo,
unsigned ColumnNo) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
- DIScope(getNonCompileUnitScope(Context)).getRef(),
- MDString::get(VMContext, Name),
- Ty.getRef(),
- File,
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_template_type_parameter)
+ .concat(Name)
+ .concat(LineNo)
+ .concat(ColumnNo)
+ .get(VMContext),
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
+ Ty.getRef(), File};
return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
}
-DITemplateValueParameter
-DIBuilder::createTemplateValueParameter(unsigned Tag, DIDescriptor Context,
- StringRef Name, DIType Ty,
- Value *Val, MDNode *File,
- unsigned LineNo,
- unsigned ColumnNo) {
+static DITemplateValueParameter createTemplateValueParameterHelper(
+ LLVMContext &VMContext, unsigned Tag, DIDescriptor Context, StringRef Name,
+ DIType Ty, Value *Val, MDNode *File, unsigned LineNo, unsigned ColumnNo) {
Value *Elts[] = {
- GetTagConstant(VMContext, Tag),
- DIScope(getNonCompileUnitScope(Context)).getRef(),
- MDString::get(VMContext, Name),
- Ty.getRef(),
- Val,
- File,
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
- };
+ HeaderBuilder::get(Tag).concat(Name).concat(LineNo).concat(ColumnNo).get(
+ VMContext),
+ DIScope(getNonCompileUnitScope(Context)).getRef(), Ty.getRef(), Val,
+ File};
return DITemplateValueParameter(MDNode::get(VMContext, Elts));
}
-/// createTemplateValueParameter - Create debugging information for template
-/// value parameter.
DITemplateValueParameter
DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
- DIType Ty, Value *Val,
- MDNode *File, unsigned LineNo,
- unsigned ColumnNo) {
- return createTemplateValueParameter(dwarf::DW_TAG_template_value_parameter,
- Context, Name, Ty, Val, File, LineNo,
- ColumnNo);
+ DIType Ty, Constant *Val, MDNode *File,
+ unsigned LineNo, unsigned ColumnNo) {
+ return createTemplateValueParameterHelper(
+ VMContext, dwarf::DW_TAG_template_value_parameter, Context, Name, Ty, Val,
+ File, LineNo, ColumnNo);
}
DITemplateValueParameter
@@ -593,8 +501,8 @@ DIBuilder::createTemplateTemplateParameter(DIDescriptor Context, StringRef Name,
DIType Ty, StringRef Val,
MDNode *File, unsigned LineNo,
unsigned ColumnNo) {
- return createTemplateValueParameter(
- dwarf::DW_TAG_GNU_template_template_param, Context, Name, Ty,
+ return createTemplateValueParameterHelper(
+ VMContext, dwarf::DW_TAG_GNU_template_template_param, Context, Name, Ty,
MDString::get(VMContext, Val), File, LineNo, ColumnNo);
}
@@ -603,12 +511,11 @@ DIBuilder::createTemplateParameterPack(DIDescriptor Context, StringRef Name,
DIType Ty, DIArray Val,
MDNode *File, unsigned LineNo,
unsigned ColumnNo) {
- return createTemplateValueParameter(dwarf::DW_TAG_GNU_template_parameter_pack,
- Context, Name, Ty, Val, File, LineNo,
- ColumnNo);
+ return createTemplateValueParameterHelper(
+ VMContext, dwarf::DW_TAG_GNU_template_parameter_pack, Context, Name, Ty,
+ Val, File, LineNo, ColumnNo);
}
-/// createClassType - Create debugging information entry for a class.
DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
DIFile File, unsigned LineNumber,
uint64_t SizeInBits,
@@ -623,23 +530,19 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
"createClassType should be called with a valid Context");
// TAG_class_type is encoded in DICompositeType format.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Context)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom.getRef(),
- Elements,
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- VTableHolder.getRef(),
- TemplateParams,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)
- };
+ HeaderBuilder::get(dwarf::DW_TAG_class_type)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(OffsetInBits)
+ .concat(Flags)
+ .concat(0)
+ .get(VMContext),
+ File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(),
+ DerivedFrom.getRef(), Elements, VTableHolder.getRef(), TemplateParams,
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)};
DICompositeType R(MDNode::get(VMContext, Elts));
assert(R.isCompositeType() &&
"createClassType should return a DICompositeType");
@@ -648,7 +551,6 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
return R;
}
-/// createStructType - Create debugging information entry for a struct.
DICompositeType DIBuilder::createStructType(DIDescriptor Context,
StringRef Name, DIFile File,
unsigned LineNumber,
@@ -661,23 +563,19 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
StringRef UniqueIdentifier) {
// TAG_structure_type is encoded in DICompositeType format.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Context)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom.getRef(),
- Elements,
- ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
- VTableHolder.getRef(),
- nullptr,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)
- };
+ HeaderBuilder::get(dwarf::DW_TAG_structure_type)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(0)
+ .concat(Flags)
+ .concat(RunTimeLang)
+ .get(VMContext),
+ File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(),
+ DerivedFrom.getRef(), Elements, VTableHolder.getRef(), nullptr,
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)};
DICompositeType R(MDNode::get(VMContext, Elts));
assert(R.isCompositeType() &&
"createStructType should return a DICompositeType");
@@ -686,7 +584,6 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
return R;
}
-/// createUnionType - Create debugging information entry for an union.
DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNumber,
uint64_t SizeInBits,
@@ -696,79 +593,64 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
StringRef UniqueIdentifier) {
// TAG_union_type is encoded in DICompositeType format.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- nullptr,
- Elements,
- ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
- nullptr,
- nullptr,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)
- };
+ HeaderBuilder::get(dwarf::DW_TAG_union_type)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(Flags)
+ .concat(RunTimeLang)
+ .get(VMContext),
+ File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(),
+ nullptr, Elements, nullptr, nullptr,
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)};
DICompositeType R(MDNode::get(VMContext, Elts));
if (!UniqueIdentifier.empty())
retainType(R);
return R;
}
-/// createSubroutineType - Create subroutine type.
-DICompositeType DIBuilder::createSubroutineType(DIFile File,
- DIArray ParameterTypes,
- unsigned Flags) {
+DISubroutineType DIBuilder::createSubroutineType(DIFile File,
+ DITypeArray ParameterTypes,
+ unsigned Flags) {
// TAG_subroutine_type is encoded in DICompositeType format.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
- nullptr,
- MDString::get(VMContext, ""),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
- ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags), // Flags
- nullptr,
- ParameterTypes,
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- nullptr,
- nullptr,
- nullptr // Type Identifer
+ HeaderBuilder::get(dwarf::DW_TAG_subroutine_type)
+ .concat(StringRef())
+ .concat(0) // Line
+ .concat(0) // Size
+ .concat(0) // Align
+ .concat(0) // Offset
+ .concat(Flags) // Flags
+ .concat(0)
+ .get(VMContext),
+ nullptr, nullptr, nullptr, ParameterTypes, nullptr, nullptr,
+ nullptr // Type Identifer
};
- return DICompositeType(MDNode::get(VMContext, Elts));
+ return DISubroutineType(MDNode::get(VMContext, Elts));
}
-/// createEnumerationType - Create debugging information entry for an
-/// enumeration.
DICompositeType DIBuilder::createEnumerationType(
DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
DIType UnderlyingType, StringRef UniqueIdentifier) {
// TAG_enumeration_type is encoded in DICompositeType format.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- UnderlyingType.getRef(),
- Elements,
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- nullptr,
- nullptr,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)
- };
+ HeaderBuilder::get(dwarf::DW_TAG_enumeration_type)
+ .concat(Name)
+ .concat(LineNumber)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .concat(0)
+ .get(VMContext),
+ File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(),
+ UnderlyingType.getRef(), Elements, nullptr, nullptr,
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)};
DICompositeType CTy(MDNode::get(VMContext, Elts));
AllEnumTypes.push_back(CTy);
if (!UniqueIdentifier.empty())
@@ -776,114 +658,96 @@ DICompositeType DIBuilder::createEnumerationType(
return CTy;
}
-/// createArrayType - Create debugging information entry for an array.
DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
DIType Ty, DIArray Subscripts) {
// TAG_array_type is encoded in DICompositeType format.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
- nullptr, // Filename/Directory,
- nullptr, // Unused
- MDString::get(VMContext, ""),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), Size),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
- Ty.getRef(),
- Subscripts,
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- nullptr,
- nullptr,
- nullptr // Type Identifer
+ HeaderBuilder::get(dwarf::DW_TAG_array_type)
+ .concat(StringRef())
+ .concat(0) // Line
+ .concat(Size)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(0) // Flags
+ .concat(0)
+ .get(VMContext),
+ nullptr, // Filename/Directory,
+ nullptr, // Unused
+ Ty.getRef(), Subscripts, nullptr, nullptr,
+ nullptr // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
-/// createVectorType - Create debugging information entry for a vector.
DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
DIType Ty, DIArray Subscripts) {
// A vector is an array type with the FlagVector flag applied.
Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
- nullptr, // Filename/Directory,
- nullptr, // Unused
- MDString::get(VMContext, ""),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
- ConstantInt::get(Type::getInt64Ty(VMContext), Size),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), DIType::FlagVector),
- Ty.getRef(),
- Subscripts,
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- nullptr,
- nullptr,
- nullptr // Type Identifer
+ HeaderBuilder::get(dwarf::DW_TAG_array_type)
+ .concat("")
+ .concat(0) // Line
+ .concat(Size)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(DIType::FlagVector)
+ .concat(0)
+ .get(VMContext),
+ nullptr, // Filename/Directory,
+ nullptr, // Unused
+ Ty.getRef(), Subscripts, nullptr, nullptr,
+ nullptr // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
-/// createArtificialType - Create a new DIType with "artificial" flag set.
-DIType DIBuilder::createArtificialType(DIType Ty) {
- if (Ty.isArtificial())
- return Ty;
+static HeaderBuilder setTypeFlagsInHeader(StringRef Header,
+ unsigned FlagsToSet) {
+ DIHeaderFieldIterator I(Header);
+ std::advance(I, 6);
+ unsigned Flags;
+ if (I->getAsInteger(0, Flags))
+ Flags = 0;
+ Flags |= FlagsToSet;
+
+ return HeaderBuilder(Twine(I.getPrefix())).concat(Flags).concat(
+ I.getSuffix());
+}
+
+static DIType createTypeWithFlags(LLVMContext &Context, DIType Ty,
+ unsigned FlagsToSet) {
SmallVector<Value *, 9> Elts;
MDNode *N = Ty;
- assert (N && "Unexpected input DIType!");
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Elts.push_back(N->getOperand(i));
-
- unsigned CurFlags = Ty.getFlags();
- CurFlags = CurFlags | DIType::FlagArtificial;
+ assert(N && "Unexpected input DIType!");
+ // Update header field.
+ Elts.push_back(setTypeFlagsInHeader(Ty.getHeader(), FlagsToSet).get(Context));
+ for (unsigned I = 1, E = N->getNumOperands(); I != E; ++I)
+ Elts.push_back(N->getOperand(I));
- // Flags are stored at this slot.
- // FIXME: Add an enum for this magic value.
- Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+ return DIType(MDNode::get(Context, Elts));
+}
- return DIType(MDNode::get(VMContext, Elts));
+DIType DIBuilder::createArtificialType(DIType Ty) {
+ if (Ty.isArtificial())
+ return Ty;
+ return createTypeWithFlags(VMContext, Ty, DIType::FlagArtificial);
}
-/// createObjectPointerType - Create a new type with both the object pointer
-/// and artificial flags set.
DIType DIBuilder::createObjectPointerType(DIType Ty) {
if (Ty.isObjectPointer())
return Ty;
-
- SmallVector<Value *, 9> Elts;
- MDNode *N = Ty;
- assert (N && "Unexpected input DIType!");
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Elts.push_back(N->getOperand(i));
-
- unsigned CurFlags = Ty.getFlags();
- CurFlags = CurFlags | (DIType::FlagObjectPointer | DIType::FlagArtificial);
-
- // Flags are stored at this slot.
- // FIXME: Add an enum for this magic value.
- Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
-
- return DIType(MDNode::get(VMContext, Elts));
+ unsigned Flags = DIType::FlagObjectPointer | DIType::FlagArtificial;
+ return createTypeWithFlags(VMContext, Ty, Flags);
}
-/// retainType - Retain DIType in a module even if it is not referenced
-/// through debug info anchors.
void DIBuilder::retainType(DIType T) {
AllRetainTypes.push_back(TrackingVH<MDNode>(T));
}
-/// createUnspecifiedParameter - Create unspeicified type descriptor
-/// for the subroutine type.
-DIDescriptor DIBuilder::createUnspecifiedParameter() {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
- };
- return DIDescriptor(MDNode::get(VMContext, Elts));
+DIBasicType DIBuilder::createUnspecifiedParameter() {
+ return DIBasicType();
}
-/// createForwardDecl - Create a temporary forward-declared type that
-/// can be RAUW'd if the full type is seen.
DICompositeType
DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
DIFile F, unsigned Line, unsigned RuntimeLang,
@@ -891,23 +755,20 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
StringRef UniqueIdentifier) {
// Create a temporary MDNode.
Value *Elts[] = {
- GetTagConstant(VMContext, Tag),
- F.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), Line),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl),
- nullptr,
- DIArray(),
- ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
- nullptr,
- nullptr, //TemplateParams
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)
- };
+ HeaderBuilder::get(Tag)
+ .concat(Name)
+ .concat(Line)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(DIDescriptor::FlagFwdDecl)
+ .concat(RuntimeLang)
+ .get(VMContext),
+ F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr,
+ DIArray(), nullptr,
+ nullptr, // TemplateParams
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)};
MDNode *Node = MDNode::get(VMContext, Elts);
DICompositeType RetTy(Node);
assert(RetTy.isCompositeType() &&
@@ -917,123 +778,102 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
return RetTy;
}
-/// createForwardDecl - Create a temporary forward-declared type that
-/// can be RAUW'd if the full type is seen.
DICompositeType DIBuilder::createReplaceableForwardDecl(
unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line,
unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits,
StringRef UniqueIdentifier) {
// Create a temporary MDNode.
Value *Elts[] = {
- GetTagConstant(VMContext, Tag),
- F.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), Line),
- ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
- ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
- ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl),
- nullptr,
- DIArray(),
- ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
- nullptr,
- nullptr, //TemplateParams
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)
- };
+ HeaderBuilder::get(Tag)
+ .concat(Name)
+ .concat(Line)
+ .concat(SizeInBits)
+ .concat(AlignInBits)
+ .concat(0) // Offset
+ .concat(DIDescriptor::FlagFwdDecl)
+ .concat(RuntimeLang)
+ .get(VMContext),
+ F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr,
+ DIArray(), nullptr,
+ nullptr, // TemplateParams
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)};
MDNode *Node = MDNode::getTemporary(VMContext, Elts);
DICompositeType RetTy(Node);
assert(RetTy.isCompositeType() &&
- "createForwardDecl result should be a DIType");
+ "createReplaceableForwardDecl result should be a DIType");
if (!UniqueIdentifier.empty())
retainType(RetTy);
return RetTy;
}
-/// getOrCreateArray - Get a DIArray, create one if required.
DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
return DIArray(MDNode::get(VMContext, Elements));
}
-/// getOrCreateSubrange - Create a descriptor for a value range. This
-/// implicitly uniques the values returned.
+DITypeArray DIBuilder::getOrCreateTypeArray(ArrayRef<Value *> Elements) {
+ SmallVector<llvm::Value *, 16> Elts;
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] && isa<MDNode>(Elements[i]))
+ Elts.push_back(DIType(cast<MDNode>(Elements[i])).getRef());
+ else
+ Elts.push_back(Elements[i]);
+ }
+ return DITypeArray(MDNode::get(VMContext, Elts));
+}
+
DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
- ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
- ConstantInt::get(Type::getInt64Ty(VMContext), Count)
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subrange_type)
+ .concat(Lo)
+ .concat(Count)
+ .get(VMContext)};
return DISubrange(MDNode::get(VMContext, Elts));
}
-/// \brief Create a new descriptor for the specified global.
-DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name,
- StringRef LinkageName,
- DIFile F, unsigned LineNumber,
- DITypeRef Ty, bool isLocalToUnit,
- Value *Val) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_variable),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
- nullptr, // TheCU,
- MDString::get(VMContext, Name),
- MDString::get(VMContext, Name),
- MDString::get(VMContext, LinkageName),
- F,
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- Ty,
- ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
- ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
- Val,
- DIDescriptor()
- };
- MDNode *Node = MDNode::get(VMContext, Elts);
- AllGVs.push_back(Node);
- return DIGlobalVariable(Node);
-}
-
-/// \brief Create a new descriptor for the specified global.
-DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name, DIFile F,
- unsigned LineNumber,
- DITypeRef Ty,
- bool isLocalToUnit,
- Value *Val) {
- return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit,
- Val);
-}
-
-/// createStaticVariable - Create a new descriptor for the specified static
-/// variable.
-DIGlobalVariable DIBuilder::createStaticVariable(DIDescriptor Context,
- StringRef Name,
- StringRef LinkageName,
- DIFile F, unsigned LineNumber,
- DITypeRef Ty,
- bool isLocalToUnit,
- Value *Val, MDNode *Decl) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_variable),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
- getNonCompileUnitScope(Context),
- MDString::get(VMContext, Name),
- MDString::get(VMContext, Name),
- MDString::get(VMContext, LinkageName),
- F,
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
- Ty,
- ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
- ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
- Val,
- DIDescriptor(Decl)
- };
- MDNode *Node = MDNode::get(VMContext, Elts);
- AllGVs.push_back(Node);
- return DIGlobalVariable(Node);
+static DIGlobalVariable createGlobalVariableHelper(
+ LLVMContext &VMContext, DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile F, unsigned LineNumber, DITypeRef Ty,
+ bool isLocalToUnit, Constant *Val, MDNode *Decl, bool isDefinition,
+ std::function<MDNode *(ArrayRef<Value *>)> CreateFunc) {
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_variable)
+ .concat(Name)
+ .concat(Name)
+ .concat(LinkageName)
+ .concat(LineNumber)
+ .concat(isLocalToUnit)
+ .concat(isDefinition)
+ .get(VMContext),
+ DIScope(getNonCompileUnitScope(Context)).getRef(), F, Ty, Val,
+ DIDescriptor(Decl)};
+
+ return DIGlobalVariable(CreateFunc(Elts));
+}
+
+DIGlobalVariable DIBuilder::createGlobalVariable(
+ DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
+ unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
+ MDNode *Decl) {
+ return createGlobalVariableHelper(VMContext, Context, Name, LinkageName, F,
+ LineNumber, Ty, isLocalToUnit, Val, Decl, true,
+ [&] (ArrayRef<Value *> Elts) -> MDNode * {
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ AllGVs.push_back(Node);
+ return Node;
+ });
+}
+
+DIGlobalVariable DIBuilder::createTempGlobalVariableFwdDecl(
+ DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
+ unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
+ MDNode *Decl) {
+ return createGlobalVariableHelper(VMContext, Context, Name, LinkageName, F,
+ LineNumber, Ty, isLocalToUnit, Val, Decl, false,
+ [&] (ArrayRef<Value *> Elts) {
+ return MDNode::getTemporary(VMContext, Elts);
+ });
}
-/// createVariable - Create a new descriptor for the specified variable.
DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
StringRef Name, DIFile File,
unsigned LineNo, DITypeRef Ty,
@@ -1042,24 +882,20 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
DIDescriptor Context(getNonCompileUnitScope(Scope));
assert((!Context || Context.isScope()) &&
"createLocalVariable should be called with a valid Context");
- Value *Elts[] = {
- GetTagConstant(VMContext, Tag),
- getNonCompileUnitScope(Scope),
- MDString::get(VMContext, Name),
- File,
- ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
- Ty,
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- Constant::getNullValue(Type::getInt32Ty(VMContext))
- };
+ Value *Elts[] = {HeaderBuilder::get(Tag)
+ .concat(Name)
+ .concat(LineNo | (ArgNo << 24))
+ .concat(Flags)
+ .get(VMContext),
+ getNonCompileUnitScope(Scope), File, Ty};
MDNode *Node = MDNode::get(VMContext, Elts);
if (AlwaysPreserve) {
// The optimizer may remove local variable. If there is an interest
// to preserve variable info in such situation then stash it in a
// named mdnode.
DISubprogram Fn(getDISubprogram(Scope));
- NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn);
- FnLocals->addOperand(Node);
+ assert(Fn && "Missing subprogram for local variable");
+ PreservedVariables[Fn].push_back(Node);
}
DIVariable RetVar(Node);
assert(RetVar.isVariable() &&
@@ -1067,33 +903,20 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
return RetVar;
}
-/// createComplexVariable - Create a new descriptor for the specified variable
-/// which has a complex address expression for its address.
-DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
- StringRef Name, DIFile F,
- unsigned LineNo,
- DITypeRef Ty,
- ArrayRef<Value *> Addr,
- unsigned ArgNo) {
- assert(Addr.size() > 0 && "complex address is empty");
- Value *Elts[] = {
- GetTagConstant(VMContext, Tag),
- getNonCompileUnitScope(Scope),
- MDString::get(VMContext, Name),
- F,
- ConstantInt::get(Type::getInt32Ty(VMContext),
- (LineNo | (ArgNo << 24))),
- Ty,
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
- MDNode::get(VMContext, Addr)
- };
- return DIVariable(MDNode::get(VMContext, Elts));
+DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Addr) {
+ auto Header = HeaderBuilder::get(DW_TAG_expression);
+ for (int64_t I : Addr)
+ Header.concat(I);
+ Value *Elts[] = {Header.get(VMContext)};
+ return DIExpression(MDNode::get(VMContext, Elts));
+}
+
+DIExpression DIBuilder::createPieceExpression(unsigned OffsetInBytes,
+ unsigned SizeInBytes) {
+ int64_t Addr[] = {dwarf::DW_OP_piece, OffsetInBytes, SizeInBytes};
+ return createExpression(Addr);
}
-/// createFunction - Create a new descriptor for the specified function.
-/// FIXME: this is added for dragonegg. Once we update dragonegg
-/// to call resolve function, this will be removed.
DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
StringRef LinkageName, DIFile File,
unsigned LineNo, DICompositeType Ty,
@@ -1109,7 +932,39 @@ DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
Flags, isOptimized, Fn, TParams, Decl);
}
-/// createFunction - Create a new descriptor for the specified function.
+static DISubprogram
+createFunctionHelper(LLVMContext &VMContext, DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile File, unsigned LineNo,
+ DICompositeType Ty, bool isLocalToUnit, bool isDefinition,
+ unsigned ScopeLine, unsigned Flags, bool isOptimized,
+ Function *Fn, MDNode *TParams, MDNode *Decl, MDNode *Vars,
+ std::function<MDNode *(ArrayRef<Value *>)> CreateFunc) {
+ assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+ "function types should be subroutines");
+ Value *Elts[] = {
+ HeaderBuilder::get(dwarf::DW_TAG_subprogram)
+ .concat(Name)
+ .concat(Name)
+ .concat(LinkageName)
+ .concat(LineNo)
+ .concat(isLocalToUnit)
+ .concat(isDefinition)
+ .concat(0)
+ .concat(0)
+ .concat(Flags)
+ .concat(isOptimized)
+ .concat(ScopeLine)
+ .get(VMContext),
+ File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(), Ty,
+ nullptr, Fn, TParams, Decl, Vars};
+
+ DISubprogram S(CreateFunc(Elts));
+ assert(S.isSubprogram() &&
+ "createFunction should return a valid DISubprogram");
+ return S;
+}
+
+
DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
StringRef LinkageName, DIFile File,
unsigned LineNo, DICompositeType Ty,
@@ -1117,43 +972,36 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
unsigned ScopeLine, unsigned Flags,
bool isOptimized, Function *Fn,
MDNode *TParams, MDNode *Decl) {
- assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
- "function types should be subroutines");
- Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Context)).getRef(),
- MDString::get(VMContext, Name),
- MDString::get(VMContext, Name),
- MDString::get(VMContext, LinkageName),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- Ty,
- ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
- ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- nullptr,
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
- Fn,
- TParams,
- Decl,
- MDNode::getTemporary(VMContext, TElts),
- ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine)
- };
- MDNode *Node = MDNode::get(VMContext, Elts);
+ return createFunctionHelper(VMContext, Context, Name, LinkageName, File,
+ LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
+ Flags, isOptimized, Fn, TParams, Decl,
+ MDNode::getTemporary(VMContext, None),
+ [&] (ArrayRef<Value *> Elts) -> MDNode *{
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ // Create a named metadata so that we
+ // do not lose this mdnode.
+ if (isDefinition)
+ AllSubprograms.push_back(Node);
+ return Node;
+ });
+}
- // Create a named metadata so that we do not lose this mdnode.
- if (isDefinition)
- AllSubprograms.push_back(Node);
- DISubprogram S(Node);
- assert(S.isSubprogram() &&
- "createFunction should return a valid DISubprogram");
- return S;
+DISubprogram
+DIBuilder::createTempFunctionFwdDecl(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile File,
+ unsigned LineNo, DICompositeType Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned ScopeLine, unsigned Flags,
+ bool isOptimized, Function *Fn,
+ MDNode *TParams, MDNode *Decl) {
+ return createFunctionHelper(VMContext, Context, Name, LinkageName, File,
+ LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
+ Flags, isOptimized, Fn, TParams, Decl, nullptr,
+ [&] (ArrayRef<Value *> Elts) {
+ return MDNode::getTemporary(VMContext, Elts);
+ });
}
-/// createMethod - Create a new descriptor for the specified C++ method.
DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
StringRef LinkageName, DIFile F,
unsigned LineNo, DICompositeType Ty,
@@ -1167,29 +1015,22 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
assert(getNonCompileUnitScope(Context) &&
"Methods should have both a Context and a context that isn't "
"the compile unit.");
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
- F.getFileNode(),
- DIScope(Context).getRef(),
- MDString::get(VMContext, Name),
- MDString::get(VMContext, Name),
- MDString::get(VMContext, LinkageName),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- Ty,
- ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
- ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
- ConstantInt::get(Type::getInt32Ty(VMContext), VK),
- ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
- VTableHolder.getRef(),
- ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
- Fn,
- TParam,
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
- nullptr,
- // FIXME: Do we want to use different scope/lines?
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subprogram)
+ .concat(Name)
+ .concat(Name)
+ .concat(LinkageName)
+ .concat(LineNo)
+ .concat(isLocalToUnit)
+ .concat(isDefinition)
+ .concat(VK)
+ .concat(VIndex)
+ .concat(Flags)
+ .concat(isOptimized)
+ .concat(LineNo)
+ // FIXME: Do we want to use different scope/lines?
+ .get(VMContext),
+ F.getFileNode(), DIScope(Context).getRef(), Ty,
+ VTableHolder.getRef(), Fn, TParam, nullptr, nullptr};
MDNode *Node = MDNode::get(VMContext, Elts);
if (isDefinition)
AllSubprograms.push_back(Node);
@@ -1198,32 +1039,26 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
return S;
}
-/// createNameSpace - This creates new descriptor for a namespace
-/// with the specified parent scope.
DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNo) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
- File.getFileNode(),
- getNonCompileUnitScope(Scope),
- MDString::get(VMContext, Name),
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_namespace)
+ .concat(Name)
+ .concat(LineNo)
+ .get(VMContext),
+ File.getFileNode(), getNonCompileUnitScope(Scope)};
DINameSpace R(MDNode::get(VMContext, Elts));
assert(R.Verify() &&
"createNameSpace should return a verifiable DINameSpace");
return R;
}
-/// createLexicalBlockFile - This creates a new MDNode that encapsulates
-/// an existing scope with a new filename.
DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
- DIFile File) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
- File.getFileNode(),
- Scope
- };
+ DIFile File,
+ unsigned Discriminator) {
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block)
+ .concat(Discriminator)
+ .get(VMContext),
+ File.getFileNode(), Scope};
DILexicalBlockFile R(MDNode::get(VMContext, Elts));
assert(
R.Verify() &&
@@ -1232,8 +1067,7 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
}
DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
- unsigned Line, unsigned Col,
- unsigned Discriminator) {
+ unsigned Line, unsigned Col) {
// FIXME: This isn't thread safe nor the right way to defeat MDNode uniquing.
// I believe the right way is to have a self-referential element in the node.
// Also: why do we bother with line/column - they're not used and the
@@ -1243,23 +1077,20 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
// Defeat MDNode uniquing for lexical blocks by using unique id.
static unsigned int unique_id = 0;
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
- File.getFileNode(),
- getNonCompileUnitScope(Scope),
- ConstantInt::get(Type::getInt32Ty(VMContext), Line),
- ConstantInt::get(Type::getInt32Ty(VMContext), Col),
- ConstantInt::get(Type::getInt32Ty(VMContext), Discriminator),
- ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
- };
+ Value *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block)
+ .concat(Line)
+ .concat(Col)
+ .concat(unique_id++)
+ .get(VMContext),
+ File.getFileNode(), getNonCompileUnitScope(Scope)};
DILexicalBlock R(MDNode::get(VMContext, Elts));
assert(R.Verify() &&
"createLexicalBlock should return a verifiable DILexicalBlock");
return R;
}
-/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ DIExpression Expr,
Instruction *InsertBefore) {
assert(Storage && "no storage passed to dbg.declare");
assert(VarInfo.isVariable() &&
@@ -1267,12 +1098,12 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
if (!DeclareFn)
DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
- Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+ Value *Args[] = {MDNode::get(Storage->getContext(), Storage), VarInfo, Expr};
return CallInst::Create(DeclareFn, Args, "", InsertBefore);
}
-/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+ DIExpression Expr,
BasicBlock *InsertAtEnd) {
assert(Storage && "no storage passed to dbg.declare");
assert(VarInfo.isVariable() &&
@@ -1280,7 +1111,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
if (!DeclareFn)
DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
- Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+ Value *Args[] = {MDNode::get(Storage->getContext(), Storage), VarInfo, Expr};
// If this block already has a terminator then insert this intrinsic
// before the terminator.
@@ -1290,9 +1121,9 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
}
-/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
DIVariable VarInfo,
+ DIExpression Expr,
Instruction *InsertBefore) {
assert(V && "no value passed to dbg.value");
assert(VarInfo.isVariable() &&
@@ -1300,15 +1131,15 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
if (!ValueFn)
ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
- Value *Args[] = { MDNode::get(V->getContext(), V),
- ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
- VarInfo };
+ Value *Args[] = {MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo, Expr};
return CallInst::Create(ValueFn, Args, "", InsertBefore);
}
-/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
DIVariable VarInfo,
+ DIExpression Expr,
BasicBlock *InsertAtEnd) {
assert(V && "no value passed to dbg.value");
assert(VarInfo.isVariable() &&
@@ -1316,8 +1147,8 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
if (!ValueFn)
ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
- Value *Args[] = { MDNode::get(V->getContext(), V),
- ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
- VarInfo };
+ Value *Args[] = {MDNode::get(V->getContext(), V),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo, Expr};
return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
}
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index dea05fb..8a057f5 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -55,7 +55,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
// Add padding if necessary to align the data element properly.
if ((StructSize & (TyAlign-1)) != 0)
- StructSize = DataLayout::RoundUpAlignment(StructSize, TyAlign);
+ StructSize = RoundUpToAlignment(StructSize, TyAlign);
// Keep track of maximum alignment constraint.
StructAlignment = std::max(TyAlign, StructAlignment);
@@ -70,7 +70,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
// Add padding to the end of the struct so that it could be put in an array
// and all array elements would be aligned correctly.
if ((StructSize & (StructAlignment-1)) != 0)
- StructSize = DataLayout::RoundUpAlignment(StructSize, StructAlignment);
+ StructSize = RoundUpToAlignment(StructSize, StructAlignment);
}
@@ -179,7 +179,7 @@ void DataLayout::reset(StringRef Desc) {
clear();
LayoutMap = nullptr;
- LittleEndian = false;
+ BigEndian = false;
StackNaturalAlign = 0;
ManglingMode = MM_None;
@@ -239,10 +239,10 @@ void DataLayout::parseSpecifier(StringRef Desc) {
// FIXME: remove this on LLVM 4.0.
break;
case 'E':
- LittleEndian = false;
+ BigEndian = true;
break;
case 'e':
- LittleEndian = true;
+ BigEndian = false;
break;
case 'p': {
// Address space.
@@ -345,6 +345,10 @@ void DataLayout::parseSpecifier(StringRef Desc) {
}
DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) {
+ init(M);
+}
+
+void DataLayout::init(const Module *M) {
const DataLayout *Other = M->getDataLayout();
if (Other)
*this = *Other;
@@ -353,7 +357,7 @@ DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) {
}
bool DataLayout::operator==(const DataLayout &Other) const {
- bool Ret = LittleEndian == Other.LittleEndian &&
+ bool Ret = BigEndian == Other.BigEndian &&
StackNaturalAlign == Other.StackNaturalAlign &&
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths &&
@@ -522,7 +526,7 @@ std::string DataLayout::getStringRepresentation() const {
std::string Result;
raw_string_ostream OS(Result);
- OS << (LittleEndian ? "e" : "E");
+ OS << (BigEndian ? "E" : "e");
switch (ManglingMode) {
case MM_None:
@@ -637,7 +641,7 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
? getPointerABIAlignment(0)
: getPointerPrefAlignment(0));
case Type::PointerTyID: {
- unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
+ unsigned AS = cast<PointerType>(Ty)->getAddressSpace();
return (abi_or_pref
? getPointerABIAlignment(AS)
: getPointerPrefAlignment(AS));
@@ -796,17 +800,17 @@ unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const {
}
DataLayoutPass::DataLayoutPass() : ImmutablePass(ID), DL("") {
- report_fatal_error("Bad DataLayoutPass ctor used. Tool did not specify a "
- "DataLayout to use?");
+ initializeDataLayoutPassPass(*PassRegistry::getPassRegistry());
}
DataLayoutPass::~DataLayoutPass() {}
-DataLayoutPass::DataLayoutPass(const DataLayout &DL)
- : ImmutablePass(ID), DL(DL) {
- initializeDataLayoutPassPass(*PassRegistry::getPassRegistry());
+bool DataLayoutPass::doInitialization(Module &M) {
+ DL.init(&M);
+ return false;
}
-DataLayoutPass::DataLayoutPass(const Module *M) : ImmutablePass(ID), DL(M) {
- initializeDataLayoutPassPass(*PassRegistry::getPassRegistry());
+bool DataLayoutPass::doFinalization(Module &M) {
+ DL.reset("");
+ return false;
}
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 5e39b24..bb5161d 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -46,10 +47,9 @@ bool DIDescriptor::Verify() const {
DILexicalBlockFile(DbgNode).Verify() ||
DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
DIObjCProperty(DbgNode).Verify() ||
- DIUnspecifiedParameter(DbgNode).Verify() ||
DITemplateTypeParameter(DbgNode).Verify() ||
DITemplateValueParameter(DbgNode).Verify() ||
- DIImportedEntity(DbgNode).Verify());
+ DIImportedEntity(DbgNode).Verify() || DIExpression(DbgNode).Verify());
}
static Value *getField(const MDNode *DbgNode, unsigned Elt) {
@@ -138,25 +138,53 @@ void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
}
}
-uint64_t DIVariable::getAddrElement(unsigned Idx) const {
- DIDescriptor ComplexExpr = getDescriptorField(8);
- if (Idx < ComplexExpr->getNumOperands())
- if (auto *CI = dyn_cast_or_null<ConstantInt>(ComplexExpr->getOperand(Idx)))
- return CI->getZExtValue();
+static unsigned DIVariableInlinedAtIndex = 4;
+MDNode *DIVariable::getInlinedAt() const {
+ return getNodeField(DbgNode, DIVariableInlinedAtIndex);
+}
- assert(false && "non-existing complex address element requested");
- return 0;
+/// \brief Return the size reported by the variable's type.
+unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) {
+ DIType Ty = getType().resolve(Map);
+ // Follow derived types until we reach a type that
+ // reports back a size.
+ while (Ty.isDerivedType() && !Ty.getSizeInBits()) {
+ DIDerivedType DT(&*Ty);
+ Ty = DT.getTypeDerivedFrom().resolve(Map);
+ }
+ assert(Ty.getSizeInBits() && "type with size 0");
+ return Ty.getSizeInBits();
+}
+
+uint64_t DIExpression::getElement(unsigned Idx) const {
+ unsigned I = Idx + 1;
+ assert(I < getNumHeaderFields() &&
+ "non-existing complex address element requested");
+ return getHeaderFieldAs<int64_t>(I);
}
-/// getInlinedAt - If this variable is inlined then return inline location.
-MDNode *DIVariable::getInlinedAt() const { return getNodeField(DbgNode, 7); }
+bool DIExpression::isVariablePiece() const {
+ return getNumElements() && getElement(0) == dwarf::DW_OP_piece;
+}
+
+uint64_t DIExpression::getPieceOffset() const {
+ assert(isVariablePiece());
+ return getElement(1);
+}
+
+uint64_t DIExpression::getPieceSize() const {
+ assert(isVariablePiece());
+ return getElement(2);
+}
//===----------------------------------------------------------------------===//
// Predicates
//===----------------------------------------------------------------------===//
-/// isBasicType - Return true if the specified tag is legal for
-/// DIBasicType.
+bool DIDescriptor::isSubroutineType() const {
+ return isCompositeType() && getTag() == dwarf::DW_TAG_subroutine_type;
+}
+
bool DIDescriptor::isBasicType() const {
if (!DbgNode)
return false;
@@ -169,7 +197,6 @@ bool DIDescriptor::isBasicType() const {
}
}
-/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
bool DIDescriptor::isDerivedType() const {
if (!DbgNode)
return false;
@@ -192,8 +219,6 @@ bool DIDescriptor::isDerivedType() const {
}
}
-/// isCompositeType - Return true if the specified tag is legal for
-/// DICompositeType.
bool DIDescriptor::isCompositeType() const {
if (!DbgNode)
return false;
@@ -210,7 +235,6 @@ bool DIDescriptor::isCompositeType() const {
}
}
-/// isVariable - Return true if the specified tag is legal for DIVariable.
bool DIDescriptor::isVariable() const {
if (!DbgNode)
return false;
@@ -223,32 +247,19 @@ bool DIDescriptor::isVariable() const {
}
}
-/// isType - Return true if the specified tag is legal for DIType.
bool DIDescriptor::isType() const {
return isBasicType() || isCompositeType() || isDerivedType();
}
-/// isSubprogram - Return true if the specified tag is legal for
-/// DISubprogram.
bool DIDescriptor::isSubprogram() const {
return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
}
-/// isGlobalVariable - Return true if the specified tag is legal for
-/// DIGlobalVariable.
bool DIDescriptor::isGlobalVariable() const {
return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
getTag() == dwarf::DW_TAG_constant);
}
-/// isUnspecifiedParmeter - Return true if the specified tag is
-/// DW_TAG_unspecified_parameters.
-bool DIDescriptor::isUnspecifiedParameter() const {
- return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
-}
-
-/// isScope - Return true if the specified tag is one of the scope
-/// related tag.
bool DIDescriptor::isScope() const {
if (!DbgNode)
return false;
@@ -265,83 +276,67 @@ bool DIDescriptor::isScope() const {
return isType();
}
-/// isTemplateTypeParameter - Return true if the specified tag is
-/// DW_TAG_template_type_parameter.
bool DIDescriptor::isTemplateTypeParameter() const {
return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
}
-/// isTemplateValueParameter - Return true if the specified tag is
-/// DW_TAG_template_value_parameter.
bool DIDescriptor::isTemplateValueParameter() const {
return DbgNode && (getTag() == dwarf::DW_TAG_template_value_parameter ||
getTag() == dwarf::DW_TAG_GNU_template_template_param ||
getTag() == dwarf::DW_TAG_GNU_template_parameter_pack);
}
-/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
bool DIDescriptor::isCompileUnit() const {
return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
}
-/// isFile - Return true if the specified tag is DW_TAG_file_type.
bool DIDescriptor::isFile() const {
return DbgNode && getTag() == dwarf::DW_TAG_file_type;
}
-/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
bool DIDescriptor::isNameSpace() const {
return DbgNode && getTag() == dwarf::DW_TAG_namespace;
}
-/// isLexicalBlockFile - Return true if the specified descriptor is a
-/// lexical block with an extra file.
bool DIDescriptor::isLexicalBlockFile() const {
return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
- (DbgNode->getNumOperands() == 3);
+ DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 2;
}
-/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
bool DIDescriptor::isLexicalBlock() const {
+ // FIXME: There are always exactly 4 header fields in DILexicalBlock, but
+ // something relies on this returning true for DILexicalBlockFile.
return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
- (DbgNode->getNumOperands() > 3);
+ DbgNode->getNumOperands() == 3 &&
+ (getNumHeaderFields() == 2 || getNumHeaderFields() == 4);
}
-/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
bool DIDescriptor::isSubrange() const {
return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
}
-/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
bool DIDescriptor::isEnumerator() const {
return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
}
-/// isObjCProperty - Return true if the specified tag is DW_TAG_APPLE_property.
bool DIDescriptor::isObjCProperty() const {
return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
}
-/// \brief Return true if the specified tag is DW_TAG_imported_module or
-/// DW_TAG_imported_declaration.
bool DIDescriptor::isImportedEntity() const {
return DbgNode && (getTag() == dwarf::DW_TAG_imported_module ||
getTag() == dwarf::DW_TAG_imported_declaration);
}
+bool DIDescriptor::isExpression() const {
+ return DbgNode && (getTag() == dwarf::DW_TAG_expression);
+}
+
//===----------------------------------------------------------------------===//
// Simple Descriptor Constructors and other Methods
//===----------------------------------------------------------------------===//
-unsigned DIArray::getNumElements() const {
- if (!DbgNode)
- return 0;
- return DbgNode->getNumOperands();
-}
-
-/// replaceAllUsesWith - Replace all uses of the MDNode used by this
-/// type with the one in the passed descriptor.
-void DIType::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) {
+void DIDescriptor::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) {
assert(DbgNode && "Trying to replace an unverified type!");
@@ -362,12 +357,10 @@ void DIType::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) {
const Value *V = cast_or_null<Value>(DN);
Node->replaceAllUsesWith(const_cast<Value *>(V));
MDNode::deleteTemporary(Node);
- DbgNode = D;
+ DbgNode = DN;
}
-/// replaceAllUsesWith - Replace all uses of the MDNode used by this
-/// type with the one in D.
-void DIType::replaceAllUsesWith(MDNode *D) {
+void DIDescriptor::replaceAllUsesWith(MDNode *D) {
assert(DbgNode && "Trying to replace an unverified type!");
assert(DbgNode != D && "This replacement should always happen");
@@ -378,7 +371,6 @@ void DIType::replaceAllUsesWith(MDNode *D) {
MDNode::deleteTemporary(Node);
}
-/// Verify - Verify that a compile unit is well formed.
bool DICompileUnit::Verify() const {
if (!isCompileUnit())
return false;
@@ -388,19 +380,19 @@ bool DICompileUnit::Verify() const {
if (getFilename().empty())
return false;
- return DbgNode->getNumOperands() == 14;
+ return DbgNode->getNumOperands() == 7 && getNumHeaderFields() == 8;
}
-/// Verify - Verify that an ObjC property is well formed.
bool DIObjCProperty::Verify() const {
if (!isObjCProperty())
return false;
// Don't worry about the rest of the strings for now.
- return DbgNode->getNumOperands() == 8;
+ return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 6;
}
-/// Check if a field at position Elt of a MDNode is a MDNode.
+/// \brief Check if a field at position Elt of a MDNode is a MDNode.
+///
/// We currently allow an empty string and an integer.
/// But we don't allow a non-empty string in a MDNode field.
static bool fieldIsMDNode(const MDNode *DbgNode, unsigned Elt) {
@@ -412,41 +404,42 @@ static bool fieldIsMDNode(const MDNode *DbgNode, unsigned Elt) {
return true;
}
-/// Check if a field at position Elt of a MDNode is a MDString.
+/// \brief Check if a field at position Elt of a MDNode is a MDString.
static bool fieldIsMDString(const MDNode *DbgNode, unsigned Elt) {
Value *Fld = getField(DbgNode, Elt);
return !Fld || isa<MDString>(Fld);
}
-/// Check if a value can be a reference to a type.
-static bool isTypeRef(const Value *Val) {
- return !Val ||
- (isa<MDString>(Val) && !cast<MDString>(Val)->getString().empty()) ||
- (isa<MDNode>(Val) && DIType(cast<MDNode>(Val)).isType());
+/// \brief Check if a value can be a reference to a type.
+static bool isTypeRef(const Metadata *MD) {
+ if (!MD)
+ return true;
+ if (auto *S = dyn_cast<MDString>(MD))
+ return !S->getString().empty();
+ if (auto *N = dyn_cast<MDNode>(MD))
+ return DIType(N).isType();
+ return false;
}
-/// Check if a field at position Elt of a MDNode can be a reference to a type.
+/// \brief Check if referenced field might be a type.
static bool fieldIsTypeRef(const MDNode *DbgNode, unsigned Elt) {
- Value *Fld = getField(DbgNode, Elt);
- return isTypeRef(Fld);
+ return isTypeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt)));
}
-/// Check if a value can be a ScopeRef.
-static bool isScopeRef(const Value *Val) {
- return !Val ||
- (isa<MDString>(Val) && !cast<MDString>(Val)->getString().empty()) ||
- // Not checking for Val->isScope() here, because it would work
- // only for lexical scopes and not all subclasses of DIScope.
- isa<MDNode>(Val);
+/// \brief Check if a value can be a ScopeRef.
+static bool isScopeRef(const Metadata *MD) {
+ if (!MD)
+ return true;
+ if (auto *S = dyn_cast<MDString>(MD))
+ return !S->getString().empty();
+ return isa<MDNode>(MD);
}
-/// Check if a field at position Elt of a MDNode can be a ScopeRef.
+/// \brief Check if a field at position Elt of a MDNode can be a ScopeRef.
static bool fieldIsScopeRef(const MDNode *DbgNode, unsigned Elt) {
- Value *Fld = getField(DbgNode, Elt);
- return isScopeRef(Fld);
+ return isScopeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt)));
}
-/// Verify - Verify that a type descriptor is well formed.
bool DIType::Verify() const {
if (!isType())
return false;
@@ -467,6 +460,7 @@ bool DIType::Verify() const {
Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend &&
getFilename().empty())
return false;
+
// DIType is abstract, it should be a BasicType, a DerivedType or
// a CompositeType.
if (isBasicType())
@@ -479,89 +473,113 @@ bool DIType::Verify() const {
return false;
}
-/// Verify - Verify that a basic type descriptor is well formed.
bool DIBasicType::Verify() const {
- return isBasicType() && DbgNode->getNumOperands() == 10;
+ return isBasicType() && DbgNode->getNumOperands() == 3 &&
+ getNumHeaderFields() == 8;
}
-/// Verify - Verify that a derived type descriptor is well formed.
bool DIDerivedType::Verify() const {
- // Make sure DerivedFrom @ field 9 is TypeRef.
- if (!fieldIsTypeRef(DbgNode, 9))
+ // Make sure DerivedFrom @ field 3 is TypeRef.
+ if (!fieldIsTypeRef(DbgNode, 3))
return false;
if (getTag() == dwarf::DW_TAG_ptr_to_member_type)
- // Make sure ClassType @ field 10 is a TypeRef.
- if (!fieldIsTypeRef(DbgNode, 10))
+ // Make sure ClassType @ field 4 is a TypeRef.
+ if (!fieldIsTypeRef(DbgNode, 4))
return false;
- return isDerivedType() && DbgNode->getNumOperands() >= 10 &&
- DbgNode->getNumOperands() <= 14;
+ return isDerivedType() && DbgNode->getNumOperands() >= 4 &&
+ DbgNode->getNumOperands() <= 8 && getNumHeaderFields() >= 7 &&
+ getNumHeaderFields() <= 8;
}
-/// Verify - Verify that a composite type descriptor is well formed.
bool DICompositeType::Verify() const {
if (!isCompositeType())
return false;
- // Make sure DerivedFrom @ field 9 and ContainingType @ field 12 are TypeRef.
- if (!fieldIsTypeRef(DbgNode, 9))
+ // Make sure DerivedFrom @ field 3 and ContainingType @ field 5 are TypeRef.
+ if (!fieldIsTypeRef(DbgNode, 3))
return false;
- if (!fieldIsTypeRef(DbgNode, 12))
+ if (!fieldIsTypeRef(DbgNode, 5))
return false;
- // Make sure the type identifier at field 14 is MDString, it can be null.
- if (!fieldIsMDString(DbgNode, 14))
+ // Make sure the type identifier at field 7 is MDString, it can be null.
+ if (!fieldIsMDString(DbgNode, 7))
return false;
// A subroutine type can't be both & and &&.
if (isLValueReference() && isRValueReference())
return false;
- return DbgNode->getNumOperands() == 15;
+ return DbgNode->getNumOperands() == 8 && getNumHeaderFields() == 8;
}
-/// Verify - Verify that a subprogram descriptor is well formed.
bool DISubprogram::Verify() const {
if (!isSubprogram())
return false;
- // Make sure context @ field 2 is a ScopeRef and type @ field 7 is a MDNode.
+ // Make sure context @ field 2 is a ScopeRef and type @ field 3 is a MDNode.
if (!fieldIsScopeRef(DbgNode, 2))
return false;
- if (!fieldIsMDNode(DbgNode, 7))
+ if (!fieldIsMDNode(DbgNode, 3))
return false;
- // Containing type @ field 12.
- if (!fieldIsTypeRef(DbgNode, 12))
+ // Containing type @ field 4.
+ if (!fieldIsTypeRef(DbgNode, 4))
return false;
// A subprogram can't be both & and &&.
if (isLValueReference() && isRValueReference())
return false;
- return DbgNode->getNumOperands() == 20;
+ // If a DISubprogram has an llvm::Function*, then scope chains from all
+ // instructions within the function should lead to this DISubprogram.
+ if (auto *F = getFunction()) {
+ LLVMContext &Ctxt = F->getContext();
+ for (auto &BB : *F) {
+ for (auto &I : BB) {
+ DebugLoc DL = I.getDebugLoc();
+ if (DL.isUnknown())
+ continue;
+
+ MDNode *Scope = nullptr;
+ MDNode *IA = nullptr;
+ // walk the inlined-at scopes
+ while (DL.getScopeAndInlinedAt(Scope, IA, F->getContext()), IA)
+ DL = DebugLoc::getFromDILocation(IA);
+ DL.getScopeAndInlinedAt(Scope, IA, Ctxt);
+ assert(!IA);
+ while (!DIDescriptor(Scope).isSubprogram()) {
+ DILexicalBlockFile D(Scope);
+ Scope = D.isLexicalBlockFile()
+ ? D.getScope()
+ : DebugLoc::getFromDILexicalBlock(Scope).getScope(Ctxt);
+ }
+ if (!DISubprogram(Scope).describes(F))
+ return false;
+ }
+ }
+ }
+ return DbgNode->getNumOperands() == 9 && getNumHeaderFields() == 12;
}
-/// Verify - Verify that a global variable descriptor is well formed.
bool DIGlobalVariable::Verify() const {
if (!isGlobalVariable())
return false;
if (getDisplayName().empty())
return false;
- // Make sure context @ field 2 is an MDNode.
- if (!fieldIsMDNode(DbgNode, 2))
+ // Make sure context @ field 1 is a ScopeRef.
+ if (!fieldIsScopeRef(DbgNode, 1))
return false;
- // Make sure that type @ field 8 is a DITypeRef.
- if (!fieldIsTypeRef(DbgNode, 8))
+ // Make sure that type @ field 3 is a DITypeRef.
+ if (!fieldIsTypeRef(DbgNode, 3))
return false;
- // Make sure StaticDataMemberDeclaration @ field 12 is MDNode.
- if (!fieldIsMDNode(DbgNode, 12))
+ // Make sure StaticDataMemberDeclaration @ field 5 is MDNode.
+ if (!fieldIsMDNode(DbgNode, 5))
return false;
- return DbgNode->getNumOperands() == 13;
+ return DbgNode->getNumOperands() == 6 && getNumHeaderFields() == 7;
}
-/// Verify - Verify that a variable descriptor is well formed.
bool DIVariable::Verify() const {
if (!isVariable())
return false;
@@ -569,19 +587,31 @@ bool DIVariable::Verify() const {
// Make sure context @ field 1 is an MDNode.
if (!fieldIsMDNode(DbgNode, 1))
return false;
- // Make sure that type @ field 5 is a DITypeRef.
- if (!fieldIsTypeRef(DbgNode, 5))
+ // Make sure that type @ field 3 is a DITypeRef.
+ if (!fieldIsTypeRef(DbgNode, 3))
return false;
- // Variable without a complex expression.
- if (DbgNode->getNumOperands() == 8)
+ // Check the number of header fields, which is common between complex and
+ // simple variables.
+ if (getNumHeaderFields() != 4)
+ return false;
+
+ // Variable without an inline location.
+ if (DbgNode->getNumOperands() == 4)
+ return true;
+
+ // Variable with an inline location.
+ return getInlinedAt() != nullptr && DbgNode->getNumOperands() == 5;
+}
+
+bool DIExpression::Verify() const {
+ // Empty DIExpressions may be represented as a nullptr.
+ if (!DbgNode)
return true;
- // Make sure the complex expression is an MDNode.
- return (DbgNode->getNumOperands() == 9 && fieldIsMDNode(DbgNode, 8));
+ return isExpression() && DbgNode->getNumOperands() == 1;
}
-/// Verify - Verify that a location descriptor is well formed.
bool DILocation::Verify() const {
if (!DbgNode)
return false;
@@ -589,69 +619,59 @@ bool DILocation::Verify() const {
return DbgNode->getNumOperands() == 4;
}
-/// Verify - Verify that a namespace descriptor is well formed.
bool DINameSpace::Verify() const {
if (!isNameSpace())
return false;
- return DbgNode->getNumOperands() == 5;
+ return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 3;
}
-/// \brief Retrieve the MDNode for the directory/file pair.
MDNode *DIFile::getFileNode() const { return getNodeField(DbgNode, 1); }
-/// \brief Verify that the file descriptor is well formed.
bool DIFile::Verify() const {
return isFile() && DbgNode->getNumOperands() == 2;
}
-/// \brief Verify that the enumerator descriptor is well formed.
bool DIEnumerator::Verify() const {
- return isEnumerator() && DbgNode->getNumOperands() == 3;
+ return isEnumerator() && DbgNode->getNumOperands() == 1 &&
+ getNumHeaderFields() == 3;
}
-/// \brief Verify that the subrange descriptor is well formed.
bool DISubrange::Verify() const {
- return isSubrange() && DbgNode->getNumOperands() == 3;
+ return isSubrange() && DbgNode->getNumOperands() == 1 &&
+ getNumHeaderFields() == 3;
}
-/// \brief Verify that the lexical block descriptor is well formed.
bool DILexicalBlock::Verify() const {
- return isLexicalBlock() && DbgNode->getNumOperands() == 7;
+ return isLexicalBlock() && DbgNode->getNumOperands() == 3 &&
+ getNumHeaderFields() == 4;
}
-/// \brief Verify that the file-scoped lexical block descriptor is well formed.
bool DILexicalBlockFile::Verify() const {
- return isLexicalBlockFile() && DbgNode->getNumOperands() == 3;
-}
-
-/// \brief Verify that an unspecified parameter descriptor is well formed.
-bool DIUnspecifiedParameter::Verify() const {
- return isUnspecifiedParameter() && DbgNode->getNumOperands() == 1;
+ return isLexicalBlockFile() && DbgNode->getNumOperands() == 3 &&
+ getNumHeaderFields() == 2;
}
-/// \brief Verify that the template type parameter descriptor is well formed.
bool DITemplateTypeParameter::Verify() const {
- return isTemplateTypeParameter() && DbgNode->getNumOperands() == 7;
+ return isTemplateTypeParameter() && DbgNode->getNumOperands() == 4 &&
+ getNumHeaderFields() == 4;
}
-/// \brief Verify that the template value parameter descriptor is well formed.
bool DITemplateValueParameter::Verify() const {
- return isTemplateValueParameter() && DbgNode->getNumOperands() == 8;
+ return isTemplateValueParameter() && DbgNode->getNumOperands() == 5 &&
+ getNumHeaderFields() == 4;
}
-/// \brief Verify that the imported module descriptor is well formed.
bool DIImportedEntity::Verify() const {
- return isImportedEntity() &&
- (DbgNode->getNumOperands() == 4 || DbgNode->getNumOperands() == 5);
+ return isImportedEntity() && DbgNode->getNumOperands() == 3 &&
+ getNumHeaderFields() == 3;
}
-/// getObjCProperty - Return property node, if this ivar is associated with one.
MDNode *DIDerivedType::getObjCProperty() const {
- return getNodeField(DbgNode, 10);
+ return getNodeField(DbgNode, 4);
}
MDString *DICompositeType::getIdentifier() const {
- return cast_or_null<MDString>(getField(DbgNode, 14));
+ return cast_or_null<MDString>(getField(DbgNode, 7));
}
#ifndef NDEBUG
@@ -669,27 +689,21 @@ static void VerifySubsetOf(const MDNode *LHS, const MDNode *RHS) {
}
#endif
-/// \brief Set the array of member DITypes.
-void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) {
- assert((!TParams || DbgNode->getNumOperands() == 15) &&
- "If you're setting the template parameters this should include a slot "
- "for that!");
+void DICompositeType::setArraysHelper(MDNode *Elements, MDNode *TParams) {
TrackingVH<MDNode> N(*this);
if (Elements) {
#ifndef NDEBUG
// Check that the new list of members contains all the old members as well.
- if (const MDNode *El = cast_or_null<MDNode>(N->getOperand(10)))
+ if (const MDNode *El = cast_or_null<MDNode>(N->getOperand(4)))
VerifySubsetOf(El, Elements);
#endif
- N->replaceOperandWith(10, Elements);
+ N->replaceOperandWith(4, Elements);
}
if (TParams)
- N->replaceOperandWith(13, TParams);
+ N->replaceOperandWith(6, TParams);
DbgNode = N;
}
-/// Generate a reference to this DIType. Uses the type identifier instead
-/// of the actual MDNode if possible, to help type uniquing.
DIScopeRef DIScope::getRef() const {
if (!isCompositeType())
return DIScopeRef(*this);
@@ -699,15 +713,12 @@ DIScopeRef DIScope::getRef() const {
return DIScopeRef(DTy.getIdentifier());
}
-/// \brief Set the containing type.
void DICompositeType::setContainingType(DICompositeType ContainingType) {
TrackingVH<MDNode> N(*this);
- N->replaceOperandWith(12, ContainingType.getRef());
+ N->replaceOperandWith(5, ContainingType.getRef());
DbgNode = N;
}
-/// isInlinedFnArgument - Return true if this variable provides debugging
-/// information for an inlined function arguments.
bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
assert(CurFn && "Invalid function");
if (!getContext().isSubprogram())
@@ -717,8 +728,6 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
return !DISubprogram(getContext()).describes(CurFn);
}
-/// describes - Return true if this subprogram provides debugging
-/// information for the function F.
bool DISubprogram::describes(const Function *F) {
assert(F && "Invalid function");
if (F == getFunction())
@@ -731,27 +740,18 @@ bool DISubprogram::describes(const Function *F) {
return false;
}
-unsigned DISubprogram::isOptimized() const {
- assert(DbgNode && "Invalid subprogram descriptor!");
- if (DbgNode->getNumOperands() == 15)
- return getUnsignedField(14);
- return 0;
-}
-
MDNode *DISubprogram::getVariablesNodes() const {
- return getNodeField(DbgNode, 18);
+ return getNodeField(DbgNode, 8);
}
DIArray DISubprogram::getVariables() const {
- return DIArray(getNodeField(DbgNode, 18));
+ return DIArray(getNodeField(DbgNode, 8));
}
Value *DITemplateValueParameter::getValue() const {
- return getField(DbgNode, 4);
+ return getField(DbgNode, 3);
}
-// If the current node has a parent scope then return that,
-// else return an empty scope.
DIScopeRef DIScope::getContext() const {
if (isType())
@@ -773,7 +773,6 @@ DIScopeRef DIScope::getContext() const {
return DIScopeRef(nullptr);
}
-// If the scope node has a name, return that, else return an empty string.
StringRef DIScope::getName() const {
if (isType())
return DIType(DbgNode).getName();
@@ -800,44 +799,58 @@ StringRef DIScope::getDirectory() const {
}
DIArray DICompileUnit::getEnumTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 13)
+ if (!DbgNode || DbgNode->getNumOperands() < 7)
return DIArray();
- return DIArray(getNodeField(DbgNode, 7));
+ return DIArray(getNodeField(DbgNode, 2));
}
DIArray DICompileUnit::getRetainedTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 13)
+ if (!DbgNode || DbgNode->getNumOperands() < 7)
return DIArray();
- return DIArray(getNodeField(DbgNode, 8));
+ return DIArray(getNodeField(DbgNode, 3));
}
DIArray DICompileUnit::getSubprograms() const {
- if (!DbgNode || DbgNode->getNumOperands() < 13)
+ if (!DbgNode || DbgNode->getNumOperands() < 7)
return DIArray();
- return DIArray(getNodeField(DbgNode, 9));
+ return DIArray(getNodeField(DbgNode, 4));
}
DIArray DICompileUnit::getGlobalVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() < 13)
+ if (!DbgNode || DbgNode->getNumOperands() < 7)
return DIArray();
- return DIArray(getNodeField(DbgNode, 10));
+ return DIArray(getNodeField(DbgNode, 5));
}
DIArray DICompileUnit::getImportedEntities() const {
- if (!DbgNode || DbgNode->getNumOperands() < 13)
+ if (!DbgNode || DbgNode->getNumOperands() < 7)
return DIArray();
- return DIArray(getNodeField(DbgNode, 11));
+ return DIArray(getNodeField(DbgNode, 6));
+}
+
+void DICompileUnit::replaceSubprograms(DIArray Subprograms) {
+ assert(Verify() && "Expected compile unit");
+ if (Subprograms == getSubprograms())
+ return;
+
+ const_cast<MDNode *>(DbgNode)->replaceOperandWith(4, Subprograms);
+}
+
+void DICompileUnit::replaceGlobalVariables(DIArray GlobalVariables) {
+ assert(Verify() && "Expected compile unit");
+ if (GlobalVariables == getGlobalVariables())
+ return;
+
+ const_cast<MDNode *>(DbgNode)->replaceOperandWith(5, GlobalVariables);
}
-/// copyWithNewScope - Return a copy of this location, replacing the
-/// current scope with the given one.
DILocation DILocation::copyWithNewScope(LLVMContext &Ctx,
- DILexicalBlock NewScope) {
+ DILexicalBlockFile NewScope) {
SmallVector<Value *, 10> Elts;
assert(Verify());
for (unsigned I = 0; I < DbgNode->getNumOperands(); ++I) {
@@ -850,78 +863,43 @@ DILocation DILocation::copyWithNewScope(LLVMContext &Ctx,
return DILocation(NewDIL);
}
-/// computeNewDiscriminator - Generate a new discriminator value for this
-/// file and line location.
unsigned DILocation::computeNewDiscriminator(LLVMContext &Ctx) {
std::pair<const char *, unsigned> Key(getFilename().data(), getLineNumber());
return ++Ctx.pImpl->DiscriminatorTable[Key];
}
-/// fixupSubprogramName - Replace contains special characters used
-/// in a typical Objective-C names with '.' in a given string.
-static void fixupSubprogramName(DISubprogram Fn, SmallVectorImpl<char> &Out) {
- StringRef FName =
- Fn.getFunction() ? Fn.getFunction()->getName() : Fn.getName();
- FName = Function::getRealLinkageName(FName);
-
- StringRef Prefix("llvm.dbg.lv.");
- Out.reserve(FName.size() + Prefix.size());
- Out.append(Prefix.begin(), Prefix.end());
-
- bool isObjCLike = false;
- for (size_t i = 0, e = FName.size(); i < e; ++i) {
- char C = FName[i];
- if (C == '[')
- isObjCLike = true;
+DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
+ LLVMContext &VMContext) {
+ assert(DIVariable(DV).Verify() && "Expected a DIVariable");
+ if (!InlinedScope)
+ return cleanseInlinedVariable(DV, VMContext);
- if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
- C == '+' || C == '(' || C == ')'))
- Out.push_back('.');
- else
- Out.push_back(C);
- }
-}
+ // Insert inlined scope.
+ SmallVector<Value *, 8> Elts;
+ for (unsigned I = 0, E = DIVariableInlinedAtIndex; I != E; ++I)
+ Elts.push_back(DV->getOperand(I));
+ Elts.push_back(InlinedScope);
-/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
-/// suitable to hold function specific information.
-NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) {
- SmallString<32> Name;
- fixupSubprogramName(Fn, Name);
- return M.getNamedMetadata(Name.str());
+ DIVariable Inlined(MDNode::get(VMContext, Elts));
+ assert(Inlined.Verify() && "Expected to create a DIVariable");
+ return Inlined;
}
-/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
-/// to hold function specific information.
-NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) {
- SmallString<32> Name;
- fixupSubprogramName(Fn, Name);
- return M.getOrInsertNamedMetadata(Name.str());
-}
+DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
+ assert(DIVariable(DV).Verify() && "Expected a DIVariable");
+ if (!DIVariable(DV).getInlinedAt())
+ return DIVariable(DV);
-/// createInlinedVariable - Create a new inlined variable based on current
-/// variable.
-/// @param DV Current Variable.
-/// @param InlinedScope Location at current variable is inlined.
-DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
- LLVMContext &VMContext) {
- SmallVector<Value *, 16> Elts;
- // Insert inlined scope as 7th element.
- for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
- i == 7 ? Elts.push_back(InlinedScope) : Elts.push_back(DV->getOperand(i));
- return DIVariable(MDNode::get(VMContext, Elts));
-}
+ // Remove inlined scope.
+ SmallVector<Value *, 8> Elts;
+ for (unsigned I = 0, E = DIVariableInlinedAtIndex; I != E; ++I)
+ Elts.push_back(DV->getOperand(I));
-/// cleanseInlinedVariable - Remove inlined scope from the variable.
-DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
- SmallVector<Value *, 16> Elts;
- // Insert inlined scope as 7th element.
- for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
- i == 7 ? Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)))
- : Elts.push_back(DV->getOperand(i));
- return DIVariable(MDNode::get(VMContext, Elts));
+ DIVariable Cleansed(MDNode::get(VMContext, Elts));
+ assert(Cleansed.Verify() && "Expected to create a DIVariable");
+ return Cleansed;
}
-/// getDISubprogram - Find subprogram that is enclosing this scope.
DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
DIDescriptor D(Scope);
if (D.isSubprogram())
@@ -936,7 +914,23 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
return DISubprogram();
}
-/// getDICompositeType - Find underlying composite type.
+DISubprogram llvm::getDISubprogram(const Function *F) {
+ // We look for the first instr that has a debug annotation leading back to F.
+ for (auto &BB : *F) {
+ auto Inst = std::find_if(BB.begin(), BB.end(), [](const Instruction &Inst) {
+ return !Inst.getDebugLoc().isUnknown();
+ });
+ if (Inst == BB.end())
+ continue;
+ DebugLoc DLoc = Inst->getDebugLoc();
+ const MDNode *Scope = DLoc.getScopeNode(F->getParent()->getContext());
+ DISubprogram Subprogram = getDISubprogram(Scope);
+ return Subprogram.describes(F) ? Subprogram : DISubprogram();
+ }
+
+ return DISubprogram();
+}
+
DICompositeType llvm::getDICompositeType(DIType T) {
if (T.isCompositeType())
return DICompositeType(T);
@@ -953,7 +947,6 @@ DICompositeType llvm::getDICompositeType(DIType T) {
return DICompositeType();
}
-/// Update DITypeIdentifierMap by going through retained types of each CU.
DITypeIdentifierMap
llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) {
DITypeIdentifierMap Map;
@@ -1002,7 +995,6 @@ void DebugInfoFinder::InitializeTypeMap(const Module &M) {
}
}
-/// processModule - Process entire module and collect debug info.
void DebugInfoFinder::processModule(const Module &M) {
InitializeTypeMap(M);
if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
@@ -1013,7 +1005,7 @@ void DebugInfoFinder::processModule(const Module &M) {
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
DIGlobalVariable DIG(GVs.getElement(i));
if (addGlobalVariable(DIG)) {
- processScope(DIG.getContext());
+ processScope(DIG.getContext().resolve(TypeIdentifierMap));
processType(DIG.getType().resolve(TypeIdentifierMap));
}
}
@@ -1041,7 +1033,6 @@ void DebugInfoFinder::processModule(const Module &M) {
}
}
-/// processLocation - Process DILocation.
void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) {
if (!Loc)
return;
@@ -1050,7 +1041,6 @@ void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) {
processLocation(M, Loc.getOrigLocation());
}
-/// processType - Process DIType.
void DebugInfoFinder::processType(DIType DT) {
if (!addType(DT))
return;
@@ -1058,7 +1048,13 @@ void DebugInfoFinder::processType(DIType DT) {
if (DT.isCompositeType()) {
DICompositeType DCT(DT);
processType(DCT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
- DIArray DA = DCT.getTypeArray();
+ if (DT.isSubroutineType()) {
+ DITypeArray DTA = DISubroutineType(DT).getTypeArray();
+ for (unsigned i = 0, e = DTA.getNumElements(); i != e; ++i)
+ processType(DTA.getElement(i).resolve(TypeIdentifierMap));
+ return;
+ }
+ DIArray DA = DCT.getElements();
for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
DIDescriptor D = DA.getElement(i);
if (D.isType())
@@ -1100,7 +1096,6 @@ void DebugInfoFinder::processScope(DIScope Scope) {
}
}
-/// processSubprogram - Process DISubprogram.
void DebugInfoFinder::processSubprogram(DISubprogram SP) {
if (!addSubprogram(SP))
return;
@@ -1121,7 +1116,6 @@ void DebugInfoFinder::processSubprogram(DISubprogram SP) {
}
}
-/// processDeclare - Process DbgDeclareInst.
void DebugInfoFinder::processDeclare(const Module &M,
const DbgDeclareInst *DDI) {
MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
@@ -1133,7 +1127,7 @@ void DebugInfoFinder::processDeclare(const Module &M,
if (!DV.isVariable())
return;
- if (!NodesSeen.insert(DV))
+ if (!NodesSeen.insert(DV).second)
return;
processScope(DIVariable(N).getContext());
processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
@@ -1149,53 +1143,49 @@ void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
if (!DV.isVariable())
return;
- if (!NodesSeen.insert(DV))
+ if (!NodesSeen.insert(DV).second)
return;
processScope(DIVariable(N).getContext());
processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
}
-/// addType - Add type into Tys.
bool DebugInfoFinder::addType(DIType DT) {
if (!DT)
return false;
- if (!NodesSeen.insert(DT))
+ if (!NodesSeen.insert(DT).second)
return false;
TYs.push_back(DT);
return true;
}
-/// addCompileUnit - Add compile unit into CUs.
bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
if (!CU)
return false;
- if (!NodesSeen.insert(CU))
+ if (!NodesSeen.insert(CU).second)
return false;
CUs.push_back(CU);
return true;
}
-/// addGlobalVariable - Add global variable into GVs.
bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
if (!DIG)
return false;
- if (!NodesSeen.insert(DIG))
+ if (!NodesSeen.insert(DIG).second)
return false;
GVs.push_back(DIG);
return true;
}
-// addSubprogram - Add subprgoram into SPs.
bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
if (!SP)
return false;
- if (!NodesSeen.insert(SP))
+ if (!NodesSeen.insert(SP).second)
return false;
SPs.push_back(SP);
@@ -1209,7 +1199,7 @@ bool DebugInfoFinder::addScope(DIScope Scope) {
// as null for now.
if (Scope->getNumOperands() == 0)
return false;
- if (!NodesSeen.insert(Scope))
+ if (!NodesSeen.insert(Scope).second)
return false;
Scopes.push_back(Scope);
return true;
@@ -1219,13 +1209,11 @@ bool DebugInfoFinder::addScope(DIScope Scope) {
// DIDescriptor: dump routines for all descriptors.
//===----------------------------------------------------------------------===//
-/// dump - Print descriptor to dbgs() with a newline.
void DIDescriptor::dump() const {
print(dbgs());
dbgs() << '\n';
}
-/// print - Print descriptor.
void DIDescriptor::print(raw_ostream &OS) const {
if (!DbgNode)
return;
@@ -1259,6 +1247,8 @@ void DIDescriptor::print(raw_ostream &OS) const {
DINameSpace(DbgNode).printInternal(OS);
} else if (this->isScope()) {
DIScope(DbgNode).printInternal(OS);
+ } else if (this->isExpression()) {
+ DIExpression(DbgNode).printInternal(OS);
}
}
@@ -1311,6 +1301,8 @@ void DIType::printInternal(raw_ostream &OS) const {
OS << " [private]";
else if (isProtected())
OS << " [protected]";
+ else if (isPublic())
+ OS << " [public]";
if (isArtificial())
OS << " [artificial]";
@@ -1341,7 +1333,7 @@ void DIDerivedType::printInternal(raw_ostream &OS) const {
void DICompositeType::printInternal(raw_ostream &OS) const {
DIType::printInternal(OS);
- DIArray A = getTypeArray();
+ DIArray A = getElements();
OS << " [" << A.getNumElements() << " elements]";
}
@@ -1370,6 +1362,8 @@ void DISubprogram::printInternal(raw_ostream &OS) const {
OS << " [private]";
else if (isProtected())
OS << " [protected]";
+ else if (isPublic())
+ OS << " [public]";
if (isLValueReference())
OS << " [reference]";
@@ -1406,6 +1400,30 @@ void DIVariable::printInternal(raw_ostream &OS) const {
OS << " [line " << getLineNumber() << ']';
}
+void DIExpression::printInternal(raw_ostream &OS) const {
+ for (unsigned I = 0; I < getNumElements(); ++I) {
+ uint64_t OpCode = getElement(I);
+ OS << " [" << OperationEncodingString(OpCode);
+ switch (OpCode) {
+ case DW_OP_plus: {
+ OS << " " << getElement(++I);
+ break;
+ }
+ case DW_OP_piece: {
+ unsigned Offset = getElement(++I);
+ unsigned Size = getElement(++I);
+ OS << " offset=" << Offset << ", size=" << Size;
+ break;
+ }
+ default:
+ // Else bail out early. This may be a line table entry.
+ OS << "Unknown]";
+ return;
+ }
+ OS << "]";
+ }
+}
+
void DIObjCProperty::printInternal(raw_ostream &OS) const {
StringRef Name = getObjCPropertyName();
if (!Name.empty())
@@ -1449,30 +1467,22 @@ void DIVariable::printExtendedName(raw_ostream &OS) const {
}
}
-/// Specialize constructor to make sure it has the correct type.
-template <> DIRef<DIScope>::DIRef(const Value *V) : Val(V) {
+template <> DIRef<DIScope>::DIRef(const Metadata *V) : Val(V) {
assert(isScopeRef(V) && "DIScopeRef should be a MDString or MDNode");
}
-template <> DIRef<DIType>::DIRef(const Value *V) : Val(V) {
+template <> DIRef<DIType>::DIRef(const Metadata *V) : Val(V) {
assert(isTypeRef(V) && "DITypeRef should be a MDString or MDNode");
}
-/// Specialize getFieldAs to handle fields that are references to DIScopes.
template <>
DIScopeRef DIDescriptor::getFieldAs<DIScopeRef>(unsigned Elt) const {
- return DIScopeRef(getField(DbgNode, Elt));
+ return DIScopeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
}
-/// Specialize getFieldAs to handle fields that are references to DITypes.
template <> DITypeRef DIDescriptor::getFieldAs<DITypeRef>(unsigned Elt) const {
- return DITypeRef(getField(DbgNode, Elt));
+ return DITypeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
}
-/// Strip debug info in the module if it exists.
-/// To do this, we remove all calls to the debugger intrinsics and any named
-/// metadata for debugging. We also remove debug locations for instructions.
-/// Return true if module is modified.
bool llvm::StripDebugInfo(Module &M) {
-
bool Changed = false;
// Remove all of the calls to the debugger intrinsics, and remove them from
@@ -1519,7 +1529,6 @@ bool llvm::StripDebugInfo(Module &M) {
return Changed;
}
-/// Return Debug Info Metadata Version by checking module flags.
unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
Value *Val = M.getModuleFlag("Debug Info Version");
if (!Val)
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index e8bdcce..718da85 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -79,14 +79,8 @@ MDNode *DebugLoc::getScopeNode(const LLVMContext &Ctx) const {
DebugLoc DebugLoc::getFnDebugLoc(const LLVMContext &Ctx) const {
const MDNode *Scope = getScopeNode(Ctx);
DISubprogram SP = getDISubprogram(Scope);
- if (SP.isSubprogram()) {
- // Check for number of operands since the compatibility is
- // cheap here. FIXME: Name the magic constant.
- if (SP->getNumOperands() > 19)
- return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
- else
- return DebugLoc::get(SP.getLineNumber(), 0, SP);
- }
+ if (SP.isSubprogram())
+ return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
return DebugLoc();
}
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 2727063..37cce2b 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -127,20 +127,20 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
DP << getMsg();
}
-bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const {
+bool DiagnosticInfoOptimizationBase::isLocationAvailable() const {
return getDebugLoc().isUnknown() == false;
}
-void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename,
- unsigned *Line,
- unsigned *Column) const {
+void DiagnosticInfoOptimizationBase::getLocation(StringRef *Filename,
+ unsigned *Line,
+ unsigned *Column) const {
DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext()));
*Filename = DIL.getFilename();
*Line = DIL.getLineNumber();
*Column = DIL.getColumnNumber();
}
-const std::string DiagnosticInfoOptimizationRemarkBase::getLocationStr() const {
+const std::string DiagnosticInfoOptimizationBase::getLocationStr() const {
StringRef Filename("<unknown>");
unsigned Line = 0;
unsigned Column = 0;
@@ -149,7 +149,7 @@ const std::string DiagnosticInfoOptimizationRemarkBase::getLocationStr() const {
return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
}
-void DiagnosticInfoOptimizationRemarkBase::print(DiagnosticPrinter &DP) const {
+void DiagnosticInfoOptimizationBase::print(DiagnosticPrinter &DP) const {
DP << getLocationStr() << ": " << getMsg();
}
@@ -189,3 +189,20 @@ void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx,
Ctx.diagnose(
DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg));
}
+
+bool DiagnosticInfoOptimizationFailure::isEnabled() const {
+ // Only print warnings.
+ return getSeverity() == DS_Warning;
+}
+
+void llvm::emitLoopVectorizeWarning(LLVMContext &Ctx, const Function &Fn,
+ const DebugLoc &DLoc, const Twine &Msg) {
+ Ctx.diagnose(DiagnosticInfoOptimizationFailure(
+ Fn, DLoc, Twine("loop not vectorized: " + Msg)));
+}
+
+void llvm::emitLoopInterleaveWarning(LLVMContext &Ctx, const Function &Fn,
+ const DebugLoc &DLoc, const Twine &Msg) {
+ Ctx.diagnose(DiagnosticInfoOptimizationFailure(
+ Fn, DLoc, Twine("loop not interleaved: " + Msg)));
+}
diff --git a/lib/IR/DiagnosticPrinter.cpp b/lib/IR/DiagnosticPrinter.cpp
index 5e16026..f25fc20 100644
--- a/lib/IR/DiagnosticPrinter.cpp
+++ b/lib/IR/DiagnosticPrinter.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the a diagnostic printer relying on raw_ostream.
+// This file defines a diagnostic printer relying on raw_ostream.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 1443571..32b2ec5 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -77,11 +77,17 @@ unsigned Argument::getArgNo() const {
}
/// hasNonNullAttr - Return true if this argument has the nonnull attribute on
-/// it in its containing function.
+/// it in its containing function. Also returns true if at least one byte is
+/// known to be dereferenceable and the pointer is in addrspace(0).
bool Argument::hasNonNullAttr() const {
if (!getType()->isPointerTy()) return false;
- return getParent()->getAttributes().
- hasAttribute(getArgNo()+1, Attribute::NonNull);
+ if (getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::NonNull))
+ return true;
+ else if (getDereferenceableBytes() > 0 &&
+ getType()->getPointerAddressSpace() == 0)
+ return true;
+ return false;
}
/// hasByValAttr - Return true if this argument has the byval attribute on it
@@ -113,6 +119,12 @@ unsigned Argument::getParamAlignment() const {
}
+uint64_t Argument::getDereferenceableBytes() const {
+ assert(getType()->isPointerTy() &&
+ "Only pointers have dereferenceable bytes");
+ return getParent()->getDereferenceableBytes(getArgNo()+1);
+}
+
/// hasNestAttr - Return true if this argument has the nest attribute on
/// it in its containing function.
bool Argument::hasNestAttr() const {
@@ -154,6 +166,20 @@ bool Argument::hasReturnedAttr() const {
hasAttribute(getArgNo()+1, Attribute::Returned);
}
+/// hasZExtAttr - Return true if this argument has the zext attribute on it in
+/// its containing function.
+bool Argument::hasZExtAttr() const {
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::ZExt);
+}
+
+/// hasSExtAttr Return true if this argument has the sext attribute on it in its
+/// containing function.
+bool Argument::hasSExtAttr() const {
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::SExt);
+}
+
/// Return true if this argument has the readonly or readnone attribute on it
/// in its containing function.
bool Argument::onlyReadsMemory() const {
@@ -187,6 +213,12 @@ void Argument::removeAttr(AttributeSet AS) {
// Helper Methods in Function
//===----------------------------------------------------------------------===//
+bool Function::isMaterializable() const {
+ return getGlobalObjectSubClassData();
+}
+
+void Function::setIsMaterializable(bool V) { setGlobalObjectSubClassData(V); }
+
LLVMContext &Function::getContext() const {
return getType()->getContext();
}
@@ -215,12 +247,13 @@ void Function::eraseFromParent() {
// Function Implementation
//===----------------------------------------------------------------------===//
-Function::Function(FunctionType *Ty, LinkageTypes Linkage,
- const Twine &name, Module *ParentModule)
- : GlobalObject(PointerType::getUnqual(Ty),
- Value::FunctionVal, nullptr, 0, Linkage, name) {
+Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
+ Module *ParentModule)
+ : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal, nullptr, 0,
+ Linkage, name) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
"invalid return type");
+ setIsMaterializable(false);
SymTab = new ValueSymbolTable();
// If the function has arguments, mark them as lazily built.
@@ -292,6 +325,8 @@ void Function::setParent(Module *parent) {
// delete.
//
void Function::dropAllReferences() {
+ setIsMaterializable(false);
+
for (iterator I = begin(), E = end(); I != E; ++I)
I->dropAllReferences();
@@ -420,6 +455,33 @@ unsigned Function::lookupIntrinsicID() const {
return 0;
}
+/// Returns a stable mangling for the type specified for use in the name
+/// mangling scheme used by 'any' types in intrinsic signatures.
+static std::string getMangledTypeStr(Type* Ty) {
+ std::string Result;
+ if (PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
+ Result += "p" + llvm::utostr(PTyp->getAddressSpace()) +
+ getMangledTypeStr(PTyp->getElementType());
+ } else if (ArrayType* ATyp = dyn_cast<ArrayType>(Ty)) {
+ Result += "a" + llvm::utostr(ATyp->getNumElements()) +
+ getMangledTypeStr(ATyp->getElementType());
+ } else if (StructType* STyp = dyn_cast<StructType>(Ty)) {
+ if (!STyp->isLiteral())
+ Result += STyp->getName();
+ else
+ llvm_unreachable("TODO: implement literal types");
+ } else if (FunctionType* FT = dyn_cast<FunctionType>(Ty)) {
+ Result += "f_" + getMangledTypeStr(FT->getReturnType());
+ for (size_t i = 0; i < FT->getNumParams(); i++)
+ Result += getMangledTypeStr(FT->getParamType(i));
+ if (FT->isVarArg())
+ Result += "vararg";
+ Result += "f"; //ensure distinguishable
+ } else if (Ty)
+ Result += EVT::getEVT(Ty).getEVTString();
+ return Result;
+}
+
std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
assert(id < num_intrinsics && "Invalid intrinsic ID!");
static const char * const Table[] = {
@@ -432,12 +494,7 @@ std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
return Table[id];
std::string Result(Table[id]);
for (unsigned i = 0; i < Tys.size(); ++i) {
- if (PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
- Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) +
- EVT::getEVT(PTyp->getElementType()).getEVTString();
- }
- else if (Tys[i])
- Result += "." + EVT::getEVT(Tys[i]).getEVTString();
+ Result += "." + getMangledTypeStr(Tys[i]);
}
return Result;
}
@@ -467,19 +524,20 @@ enum IIT_Info {
IIT_ARG = 15,
// Values from 16+ are only encodable with the inefficient encoding.
- IIT_MMX = 16,
- IIT_METADATA = 17,
- IIT_EMPTYSTRUCT = 18,
- IIT_STRUCT2 = 19,
- IIT_STRUCT3 = 20,
- IIT_STRUCT4 = 21,
- IIT_STRUCT5 = 22,
- IIT_EXTEND_ARG = 23,
- IIT_TRUNC_ARG = 24,
- IIT_ANYPTR = 25,
- IIT_V1 = 26,
- IIT_VARARG = 27,
- IIT_HALF_VEC_ARG = 28
+ IIT_V64 = 16,
+ IIT_MMX = 17,
+ IIT_METADATA = 18,
+ IIT_EMPTYSTRUCT = 19,
+ IIT_STRUCT2 = 20,
+ IIT_STRUCT3 = 21,
+ IIT_STRUCT4 = 22,
+ IIT_STRUCT5 = 23,
+ IIT_EXTEND_ARG = 24,
+ IIT_TRUNC_ARG = 25,
+ IIT_ANYPTR = 26,
+ IIT_V1 = 27,
+ IIT_VARARG = 28,
+ IIT_HALF_VEC_ARG = 29
};
@@ -550,6 +608,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32));
DecodeIITType(NextElt, Infos, OutputTable);
return;
+ case IIT_V64:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 64));
+ DecodeIITType(NextElt, Infos, OutputTable);
+ return;
case IIT_PTR:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
DecodeIITType(NextElt, Infos, OutputTable);
@@ -666,7 +728,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
assert(D.Struct_NumElements <= 5 && "Can't handle this yet");
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
Elts[i] = DecodeFixedType(Infos, Tys, Context);
- return StructType::get(Context, ArrayRef<Type*>(Elts,D.Struct_NumElements));
+ return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements));
}
case IITDescriptor::Argument:
@@ -708,6 +770,12 @@ FunctionType *Intrinsic::getType(LLVMContext &Context,
while (!TableRef.empty())
ArgTys.push_back(DecodeFixedType(TableRef, Tys, Context));
+ // DecodeFixedType returns Void for IITDescriptor::Void and IITDescriptor::VarArg
+ // If we see void type as the type of the last argument, it is vararg intrinsic
+ if (!ArgTys.empty() && ArgTys.back()->isVoidTy()) {
+ ArgTys.pop_back();
+ return FunctionType::get(ResultTy, ArgTys, true);
+ }
return FunctionType::get(ResultTy, ArgTys, false);
}
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 1667401..245c500 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -298,7 +298,8 @@ uint64_t GCOVFunction::getExitCount() const {
/// dump - Dump GCOVFunction content to dbgs() for debugging purposes.
void GCOVFunction::dump() const {
- dbgs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n";
+ dbgs() << "===== " << Name << " (" << Ident << ") @ " << Filename << ":"
+ << LineNumber << "\n";
for (const auto &Block : Blocks)
Block->dump();
}
@@ -517,11 +518,11 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
if (Options.NoOutput)
return llvm::make_unique<raw_null_ostream>();
- std::string ErrorInfo;
- auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath.str().c_str(),
- ErrorInfo, sys::fs::F_Text);
- if (!ErrorInfo.empty()) {
- errs() << ErrorInfo << "\n";
+ std::error_code EC;
+ auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath.str(), EC,
+ sys::fs::F_Text);
+ if (EC) {
+ errs() << EC.message() << "\n";
return llvm::make_unique<raw_null_ostream>();
}
return std::move(OS);
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 244e3e4..e181d62 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -29,13 +29,15 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
bool GlobalValue::isMaterializable() const {
- return getParent() && getParent()->isMaterializable(this);
+ if (const Function *F = dyn_cast<Function>(this))
+ return F->isMaterializable();
+ return false;
}
bool GlobalValue::isDematerializable() const {
return getParent() && getParent()->isDematerializable(this);
}
-bool GlobalValue::Materialize(std::string *ErrInfo) {
- return getParent()->Materialize(this, ErrInfo);
+std::error_code GlobalValue::materialize() {
+ return getParent()->materialize(this);
}
void GlobalValue::Dematerialize() {
getParent()->Dematerialize(this);
@@ -77,10 +79,24 @@ void GlobalObject::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
assert(Align <= MaximumAlignment &&
"Alignment is greater than MaximumAlignment!");
- setGlobalValueSubClassData(Log2_32(Align) + 1);
+ unsigned AlignmentData = Log2_32(Align) + 1;
+ unsigned OldData = getGlobalValueSubClassData();
+ setGlobalValueSubClassData((OldData & ~AlignmentMask) | AlignmentData);
assert(getAlignment() == Align && "Alignment representation error!");
}
+unsigned GlobalObject::getGlobalObjectSubClassData() const {
+ unsigned ValueData = getGlobalValueSubClassData();
+ return ValueData >> AlignmentBits;
+}
+
+void GlobalObject::setGlobalObjectSubClassData(unsigned Val) {
+ unsigned OldData = getGlobalValueSubClassData();
+ setGlobalValueSubClassData((OldData & AlignmentMask) |
+ (Val << AlignmentBits));
+ assert(getGlobalObjectSubClassData() == Val && "representation error");
+}
+
void GlobalObject::copyAttributesFrom(const GlobalValue *Src) {
const auto *GV = cast<GlobalObject>(Src);
GlobalValue::copyAttributesFrom(GV);
@@ -117,7 +133,7 @@ bool GlobalValue::isDeclaration() const {
// Functions are definitions if they have a body.
if (const Function *F = dyn_cast<Function>(this))
- return F->empty();
+ return F->empty() && !F->isMaterializable();
// Aliases are always definitions.
assert(isa<GlobalAlias>(this));
@@ -230,6 +246,7 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
GlobalObject::copyAttributesFrom(Src);
const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
setThreadLocalMode(SrcVar->getThreadLocalMode());
+ setExternallyInitialized(SrcVar->isExternallyInitialized());
}
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index 435e54f..a4c5d97 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -62,7 +62,8 @@ static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
CallInst *IRBuilderBase::
CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
- bool isVolatile, MDNode *TBAATag) {
+ bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
+ MDNode *NoAliasTag) {
Ptr = getCastedInt8PtrValue(Ptr);
Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
Type *Tys[] = { Ptr->getType(), Size->getType() };
@@ -74,13 +75,20 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
// Set the TBAA info if present.
if (TBAATag)
CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
+ if (ScopeTag)
+ CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+
+ if (NoAliasTag)
+ CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+
return CI;
}
CallInst *IRBuilderBase::
CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
- bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) {
+ bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag,
+ MDNode *ScopeTag, MDNode *NoAliasTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
@@ -98,13 +106,20 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
// Set the TBAA Struct info if present.
if (TBAAStructTag)
CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
-
+
+ if (ScopeTag)
+ CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+
+ if (NoAliasTag)
+ CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+
return CI;
}
CallInst *IRBuilderBase::
CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
- bool isVolatile, MDNode *TBAATag) {
+ bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
+ MDNode *NoAliasTag) {
Dst = getCastedInt8PtrValue(Dst);
Src = getCastedInt8PtrValue(Src);
@@ -118,7 +133,13 @@ CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
// Set the TBAA info if present.
if (TBAATag)
CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
+ if (ScopeTag)
+ CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+
+ if (NoAliasTag)
+ CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+
return CI;
}
@@ -151,3 +172,14 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
return createCallHelper(TheFn, Ops, this);
}
+
+CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
+ assert(Cond->getType() == getInt1Ty() &&
+ "an assumption condition must be of type i1");
+
+ Value *Ops[] = { Cond };
+ Module *M = BB->getParent()->getParent();
+ Value *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
+ return createCallHelper(FnAssume, Ops, this);
+}
+
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index a3e1da3b1..16d874f 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -91,6 +91,10 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
if (*I == '~') {
Type = isClobber;
++I;
+
+ // '{' must immediately follow '~'.
+ if (I != E && *I != '{')
+ return true;
} else if (*I == '=') {
++I;
Type = isOutput;
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 86421c4..3ee66f5 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -143,6 +143,11 @@ void Instruction::setFastMathFlags(FastMathFlags FMF) {
cast<FPMathOperator>(this)->setFastMathFlags(FMF);
}
+void Instruction::copyFastMathFlags(FastMathFlags FMF) {
+ assert(isa<FPMathOperator>(this) && "copying fast-math flag on invalid op");
+ cast<FPMathOperator>(this)->copyFastMathFlags(FMF);
+}
+
/// Determine whether the unsafe-algebra flag is set.
bool Instruction::hasUnsafeAlgebra() const {
assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
@@ -175,7 +180,7 @@ bool Instruction::hasAllowReciprocal() const {
/// Convenience function for getting all the fast-math flags, which must be an
/// operator which supports these flags. See LangRef.html for the meaning of
-/// these flats.
+/// these flags.
FastMathFlags Instruction::getFastMathFlags() const {
assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->getFastMathFlags();
@@ -183,7 +188,7 @@ FastMathFlags Instruction::getFastMathFlags() const {
/// Copy I's fast-math flags
void Instruction::copyFastMathFlags(const Instruction *I) {
- setFastMathFlags(I->getFastMathFlags());
+ copyFastMathFlags(I->getFastMathFlags());
}
@@ -438,6 +443,21 @@ bool Instruction::mayWriteToMemory() const {
}
}
+bool Instruction::isAtomic() const {
+ switch (getOpcode()) {
+ default:
+ return false;
+ case Instruction::AtomicCmpXchg:
+ case Instruction::AtomicRMW:
+ case Instruction::Fence:
+ return true;
+ case Instruction::Load:
+ return cast<LoadInst>(this)->getOrdering() != NotAtomic;
+ case Instruction::Store:
+ return cast<StoreInst>(this)->getOrdering() != NotAtomic;
+ }
+}
+
bool Instruction::mayThrow() const {
if (const CallInst *CI = dyn_cast<CallInst>(this))
return !CI->doesNotThrow();
@@ -528,7 +548,7 @@ Instruction *Instruction::clone() const {
// Otherwise, enumerate and copy over metadata from the old instruction to the
// new one.
- SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
+ SmallVector<std::pair<unsigned, MDNode *>, 4> TheMDs;
getAllMetadataOtherThanDebugLoc(TheMDs);
for (const auto &MD : TheMDs)
New->setMetadata(MD.first, MD.second);
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index a5ceacb..57a4f0b 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -364,8 +364,9 @@ bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
/// IsConstantOne - Return true only if val is constant int 1
static bool IsConstantOne(Value *val) {
- assert(val && "IsConstantOne does not work with NULL val");
- return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+ assert(val && "IsConstantOne does not work with nullptr val");
+ const ConstantInt *CVal = dyn_cast<ConstantInt>(val);
+ return CVal && CVal->isOne();
}
static Instruction *createMalloc(Instruction *InsertBefore,
@@ -418,7 +419,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
Value *MallocFunc = MallocF;
if (!MallocFunc)
// prototype malloc as "void *malloc(size_t)"
- MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
+ MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, nullptr);
PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
CallInst *MCall = nullptr;
Instruction *Result = nullptr;
@@ -491,7 +492,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
Type *VoidTy = Type::getVoidTy(M->getContext());
Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
// prototype free as "void free(void*)"
- Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
+ Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, nullptr);
CallInst* Result = nullptr;
Value *PtrCast = Source;
if (InsertBefore) {
@@ -2030,6 +2031,39 @@ bool BinaryOperator::isExact() const {
return cast<PossiblyExactOperator>(this)->isExact();
}
+void BinaryOperator::copyIRFlags(const Value *V) {
+ // Copy the wrapping flags.
+ if (auto *OB = dyn_cast<OverflowingBinaryOperator>(V)) {
+ setHasNoSignedWrap(OB->hasNoSignedWrap());
+ setHasNoUnsignedWrap(OB->hasNoUnsignedWrap());
+ }
+
+ // Copy the exact flag.
+ if (auto *PE = dyn_cast<PossiblyExactOperator>(V))
+ setIsExact(PE->isExact());
+
+ // Copy the fast-math flags.
+ if (auto *FP = dyn_cast<FPMathOperator>(V))
+ copyFastMathFlags(FP->getFastMathFlags());
+}
+
+void BinaryOperator::andIRFlags(const Value *V) {
+ if (auto *OB = dyn_cast<OverflowingBinaryOperator>(V)) {
+ setHasNoSignedWrap(hasNoSignedWrap() & OB->hasNoSignedWrap());
+ setHasNoUnsignedWrap(hasNoUnsignedWrap() & OB->hasNoUnsignedWrap());
+ }
+
+ if (auto *PE = dyn_cast<PossiblyExactOperator>(V))
+ setIsExact(isExact() & PE->isExact());
+
+ if (auto *FP = dyn_cast<FPMathOperator>(V)) {
+ FastMathFlags FM = getFastMathFlags();
+ FM &= FP->getFastMathFlags();
+ copyFastMathFlags(FM);
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// FPMathOperator Class
//===----------------------------------------------------------------------===//
@@ -2039,7 +2073,7 @@ bool BinaryOperator::isExact() const {
/// default precision.
float FPMathOperator::getFPAccuracy() const {
const MDNode *MD =
- cast<Instruction>(this)->getMetadata(LLVMContext::MD_fpmath);
+ cast<Instruction>(this)->getMetadata(LLVMContext::MD_fpmath);
if (!MD)
return 0.0;
ConstantFP *Accuracy = cast<ConstantFP>(MD->getOperand(0));
@@ -2478,11 +2512,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
if (Ty->isIntOrIntVectorTy())
return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
- Type *STy = S->getType();
- if (STy->getPointerAddressSpace() != Ty->getPointerAddressSpace())
- return Create(Instruction::AddrSpaceCast, S, Ty, Name, InsertAtEnd);
-
- return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+ return CreatePointerBitCastOrAddrSpaceCast(S, Ty, Name, InsertAtEnd);
}
/// @brief Create a BitCast or a PtrToInt cast instruction
@@ -2500,14 +2530,36 @@ CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
if (Ty->isIntOrIntVectorTy())
return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
- Type *STy = S->getType();
- if (STy->getPointerAddressSpace() != Ty->getPointerAddressSpace())
+ return CreatePointerBitCastOrAddrSpaceCast(S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreatePointerBitCastOrAddrSpaceCast(
+ Value *S, Type *Ty,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd) {
+ assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
+ assert(Ty->isPtrOrPtrVectorTy() && "Invalid cast");
+
+ if (S->getType()->getPointerAddressSpace() != Ty->getPointerAddressSpace())
+ return Create(Instruction::AddrSpaceCast, S, Ty, Name, InsertAtEnd);
+
+ return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreatePointerBitCastOrAddrSpaceCast(
+ Value *S, Type *Ty,
+ const Twine &Name,
+ Instruction *InsertBefore) {
+ assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
+ assert(Ty->isPtrOrPtrVectorTy() && "Invalid cast");
+
+ if (S->getType()->getPointerAddressSpace() != Ty->getPointerAddressSpace())
return Create(Instruction::AddrSpaceCast, S, Ty, Name, InsertBefore);
return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
}
-CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty,
bool isSigned, const Twine &Name,
Instruction *InsertBefore) {
assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index de825f0..c62bc09 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -66,6 +66,33 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
unsigned InvariantLdId = getMDKindID("invariant.load");
assert(InvariantLdId == MD_invariant_load && "invariant.load kind id drifted");
(void)InvariantLdId;
+
+ // Create the 'alias.scope' metadata kind.
+ unsigned AliasScopeID = getMDKindID("alias.scope");
+ assert(AliasScopeID == MD_alias_scope && "alias.scope kind id drifted");
+ (void)AliasScopeID;
+
+ // Create the 'noalias' metadata kind.
+ unsigned NoAliasID = getMDKindID("noalias");
+ assert(NoAliasID == MD_noalias && "noalias kind id drifted");
+ (void)NoAliasID;
+
+ // Create the 'nontemporal' metadata kind.
+ unsigned NonTemporalID = getMDKindID("nontemporal");
+ assert(NonTemporalID == MD_nontemporal && "nontemporal kind id drifted");
+ (void)NonTemporalID;
+
+ // Create the 'llvm.mem.parallel_loop_access' metadata kind.
+ unsigned MemParallelLoopAccessID = getMDKindID("llvm.mem.parallel_loop_access");
+ assert(MemParallelLoopAccessID == MD_mem_parallel_loop_access &&
+ "mem_parallel_loop_access kind id drifted");
+ (void)MemParallelLoopAccessID;
+
+
+ // Create the 'nonnull' metadata kind.
+ unsigned NonNullID = getMDKindID("nonnull");
+ assert(NonNullID == MD_nonnull && "nonnull kind id drifted");
+ (void)NonNullID;
}
LLVMContext::~LLVMContext() { delete pImpl; }
@@ -102,9 +129,11 @@ void *LLVMContext::getInlineAsmDiagnosticContext() const {
}
void LLVMContext::setDiagnosticHandler(DiagnosticHandlerTy DiagnosticHandler,
- void *DiagnosticContext) {
+ void *DiagnosticContext,
+ bool RespectFilters) {
pImpl->DiagnosticHandler = DiagnosticHandler;
pImpl->DiagnosticContext = DiagnosticContext;
+ pImpl->RespectDiagnosticFilters = RespectFilters;
}
LLVMContext::DiagnosticHandlerTy LLVMContext::getDiagnosticHandler() const {
@@ -135,13 +164,7 @@ void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) {
diagnose(DiagnosticInfoInlineAsm(*I, ErrorStr));
}
-void LLVMContext::diagnose(const DiagnosticInfo &DI) {
- // If there is a report handler, use it.
- if (pImpl->DiagnosticHandler) {
- pImpl->DiagnosticHandler(DI, pImpl->DiagnosticContext);
- return;
- }
-
+static bool isDiagnosticEnabled(const DiagnosticInfo &DI) {
// Optimization remarks are selective. They need to check whether the regexp
// pattern, passed via one of the -pass-remarks* flags, matches the name of
// the pass that is emitting the diagnostic. If there is no match, ignore the
@@ -149,19 +172,32 @@ void LLVMContext::diagnose(const DiagnosticInfo &DI) {
switch (DI.getKind()) {
case llvm::DK_OptimizationRemark:
if (!cast<DiagnosticInfoOptimizationRemark>(DI).isEnabled())
- return;
+ return false;
break;
case llvm::DK_OptimizationRemarkMissed:
if (!cast<DiagnosticInfoOptimizationRemarkMissed>(DI).isEnabled())
- return;
+ return false;
break;
case llvm::DK_OptimizationRemarkAnalysis:
if (!cast<DiagnosticInfoOptimizationRemarkAnalysis>(DI).isEnabled())
- return;
+ return false;
break;
default:
break;
}
+ return true;
+}
+
+void LLVMContext::diagnose(const DiagnosticInfo &DI) {
+ // If there is a report handler, use it.
+ if (pImpl->DiagnosticHandler) {
+ if (!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI))
+ pImpl->DiagnosticHandler(DI, pImpl->DiagnosticContext);
+ return;
+ }
+
+ if (!isDiagnosticEnabled(DI))
+ return;
// Otherwise, print the message with a prefix based on the severity.
std::string MsgStorage;
@@ -217,9 +253,10 @@ unsigned LLVMContext::getMDKindID(StringRef Name) const {
assert(isValidName(Name) && "Invalid MDNode name");
// If this is new, assign it its ID.
- return
- pImpl->CustomMDKindNames.GetOrCreateValue(
- Name, pImpl->CustomMDKindNames.size()).second;
+ return pImpl->CustomMDKindNames.insert(std::make_pair(
+ Name,
+ pImpl->CustomMDKindNames.size()))
+ .first->second;
}
/// getHandlerNames - Populate client supplied smallvector using custome
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index 4c2791f..3fd0bb3 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -40,6 +40,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
InlineAsmDiagContext = nullptr;
DiagnosticHandler = nullptr;
DiagnosticContext = nullptr;
+ RespectDiagnosticFilters = false;
YieldCallback = nullptr;
YieldOpaqueHandle = nullptr;
NamedStructTypesUniqueID = 0;
@@ -75,7 +76,7 @@ LLVMContextImpl::~LLVMContextImpl() {
// Free the constants. This is important to do here to ensure that they are
// freed before the LeakDetector is torn down.
std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
- DropReferences());
+ DropFirst());
std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
DropFirst());
std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
@@ -121,20 +122,19 @@ LLVMContextImpl::~LLVMContextImpl() {
// Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
// and the NonUniquedMDNodes sets, so copy the values out first.
- SmallVector<MDNode*, 8> MDNodes;
+ SmallVector<GenericMDNode *, 8> MDNodes;
MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
- for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
- I != E; ++I)
- MDNodes.push_back(&*I);
+ MDNodes.append(MDNodeSet.begin(), MDNodeSet.end());
MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
- for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
- E = MDNodes.end(); I != E; ++I)
- (*I)->destroy();
+ for (GenericMDNode *I : MDNodes)
+ I->dropAllReferences();
+ for (GenericMDNode *I : MDNodes)
+ delete I;
assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
"Destroying all MDNodes didn't empty the Context's sets.");
// Destroy MDStrings.
- DeleteContainerSeconds(MDStringCache);
+ MDStringCache.clear();
}
// ConstantsContext anchors
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 808c239..e743ec3 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LLVMCONTEXT_IMPL_H
-#define LLVM_LLVMCONTEXT_IMPL_H
+#ifndef LLVM_LIB_IR_LLVMCONTEXTIMPL_H
+#define LLVM_LIB_IR_LLVMCONTEXTIMPL_H
#include "AttributeImpl.h"
#include "ConstantsContext.h"
@@ -22,6 +22,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -150,7 +151,7 @@ struct FunctionTypeKeyInfo {
ReturnType(R), Params(P), isVarArg(V) {}
KeyTy(const FunctionType* FT) :
ReturnType(FT->getReturnType()),
- Params(ArrayRef<Type*>(FT->param_begin(), FT->param_end())),
+ Params(makeArrayRef(FT->param_begin(), FT->param_end())),
isVarArg(FT->isVarArg()) {}
bool operator==(const KeyTy& that) const {
if (ReturnType != that.ReturnType)
@@ -190,23 +191,52 @@ struct FunctionTypeKeyInfo {
}
};
-// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a
-// shortcut to avoid comparing all operands.
-template<> struct FoldingSetTrait<MDNode> : DefaultFoldingSetTrait<MDNode> {
- static bool Equals(const MDNode &X, const FoldingSetNodeID &ID,
- unsigned IDHash, FoldingSetNodeID &TempID) {
- assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?");
- // First, check if the cached hashes match. If they don't we can skip the
- // expensive operand walk.
- if (X.Hash != IDHash)
- return false;
+/// \brief DenseMapInfo for GenericMDNode.
+///
+/// Note that we don't need the is-function-local bit, since that's implicit in
+/// the operands.
+struct GenericMDNodeInfo {
+ struct KeyTy {
+ ArrayRef<Value *> Ops;
+ unsigned Hash;
+
+ KeyTy(ArrayRef<Value *> Ops)
+ : Ops(Ops), Hash(hash_combine_range(Ops.begin(), Ops.end())) {}
+
+ KeyTy(GenericMDNode *N, SmallVectorImpl<Value *> &Storage) {
+ Storage.resize(N->getNumOperands());
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Storage[I] = N->getOperand(I);
+ Ops = Storage;
+ Hash = hash_combine_range(Ops.begin(), Ops.end());
+ }
- // If they match we have to compare the operands.
- X.Profile(TempID);
- return TempID == ID;
+ bool operator==(const GenericMDNode *RHS) const {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ if (Hash != RHS->getHash() || Ops.size() != RHS->getNumOperands())
+ return false;
+ for (unsigned I = 0, E = Ops.size(); I != E; ++I)
+ if (Ops[I] != RHS->getOperand(I))
+ return false;
+ return true;
+ }
+ };
+ static inline GenericMDNode *getEmptyKey() {
+ return DenseMapInfo<GenericMDNode *>::getEmptyKey();
+ }
+ static inline GenericMDNode *getTombstoneKey() {
+ return DenseMapInfo<GenericMDNode *>::getTombstoneKey();
}
- static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) {
- return X.Hash; // Return cached hash.
+ static unsigned getHashValue(const KeyTy &Key) { return Key.Hash; }
+ static unsigned getHashValue(const GenericMDNode *U) {
+ return U->getHash();
+ }
+ static bool isEqual(const KeyTy &LHS, const GenericMDNode *RHS) {
+ return LHS == RHS;
+ }
+ static bool isEqual(const GenericMDNode *LHS, const GenericMDNode *RHS) {
+ return LHS == RHS;
}
};
@@ -244,6 +274,7 @@ public:
LLVMContext::DiagnosticHandlerTy DiagnosticHandler;
void *DiagnosticContext;
+ bool RespectDiagnosticFilters;
LLVMContext::YieldCallbackTy YieldCallback;
void *YieldOpaqueHandle;
@@ -260,25 +291,25 @@ public:
FoldingSet<AttributeSetImpl> AttrsLists;
FoldingSet<AttributeSetNode> AttrsSetNodes;
- StringMap<Value*> MDStringCache;
+ StringMap<MDString> MDStringCache;
- FoldingSet<MDNode> MDNodeSet;
+ DenseSet<GenericMDNode *, GenericMDNodeInfo> MDNodeSet;
// MDNodes may be uniqued or not uniqued. When they're not uniqued, they
// aren't in the MDNodeSet, but they're still shared between objects, so no
// one object can destroy them. This set allows us to at least destroy them
// on Context destruction.
- SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
-
+ SmallPtrSet<GenericMDNode *, 1> NonUniquedMDNodes;
+
DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
- typedef ConstantAggrUniqueMap<ArrayType, ConstantArray> ArrayConstantsTy;
+ typedef ConstantUniqueMap<ConstantArray> ArrayConstantsTy;
ArrayConstantsTy ArrayConstants;
- typedef ConstantAggrUniqueMap<StructType, ConstantStruct> StructConstantsTy;
+ typedef ConstantUniqueMap<ConstantStruct> StructConstantsTy;
StructConstantsTy StructConstants;
- typedef ConstantAggrUniqueMap<VectorType, ConstantVector> VectorConstantsTy;
+ typedef ConstantUniqueMap<ConstantVector> VectorConstantsTy;
VectorConstantsTy VectorConstants;
DenseMap<PointerType*, ConstantPointerNull*> CPNConstants;
@@ -289,12 +320,10 @@ public:
DenseMap<std::pair<const Function *, const BasicBlock *>, BlockAddress *>
BlockAddresses;
- ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
- ExprConstants;
+ ConstantUniqueMap<ConstantExpr> ExprConstants;
+
+ ConstantUniqueMap<InlineAsm> InlineAsms;
- ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
- InlineAsm> InlineAsms;
-
ConstantInt *TheTrueVal;
ConstantInt *TheFalseVal;
diff --git a/lib/IR/LeaksContext.h b/lib/IR/LeaksContext.h
index 52ac170..3e485ab 100644
--- a/lib/IR/LeaksContext.h
+++ b/lib/IR/LeaksContext.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_IR_LEAKSCONTEXT_H
-#define LLVM_IR_LEAKSCONTEXT_H
+#ifndef LLVM_LIB_IR_LEAKSCONTEXT_H
+#define LLVM_LIB_IR_LEAKSCONTEXT_H
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Value.h"
@@ -95,4 +95,4 @@ private:
}
-#endif // LLVM_IR_LEAKSCONTEXT_H
+#endif
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index d3f3482..28fa74c 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -573,9 +573,8 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
return;
SmallPtrSet<Pass *, 8> &LU = DMI->second;
- for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
- E = LU.end(); I != E; ++I) {
- LastUses.push_back(*I);
+ for (Pass *LUP : LU) {
+ LastUses.push_back(LUP);
}
}
@@ -1404,11 +1403,8 @@ void FunctionPassManager::add(Pass *P) {
/// so, return true.
///
bool FunctionPassManager::run(Function &F) {
- if (F.isMaterializable()) {
- std::string errstr;
- if (F.Materialize(&errstr))
- report_fatal_error("Error reading bitcode file: " + Twine(errstr));
- }
+ if (std::error_code EC = F.materialize())
+ report_fatal_error("Error reading bitcode file: " + EC.message());
return FPM->run(F);
}
@@ -1684,7 +1680,7 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
if (!FoundPass) {
FoundPass = RequiredPass;
// This should be guaranteed to add RequiredPass to the passmanager given
- // that we checked for an avaiable analysis above.
+ // that we checked for an available analysis above.
FPP->add(RequiredPass);
}
// Register P as the last user of FoundPass or RequiredPass.
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
index 65cdf38..3ec613c 100644
--- a/lib/IR/MDBuilder.cpp
+++ b/lib/IR/MDBuilder.cpp
@@ -60,10 +60,17 @@ MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
return MDNode::get(Context, Range);
}
-MDNode *MDBuilder::createAnonymousTBAARoot() {
+MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
// To ensure uniqueness the root node is self-referential.
- MDNode *Dummy = MDNode::getTemporary(Context, ArrayRef<Value *>());
- MDNode *Root = MDNode::get(Context, Dummy);
+ MDNode *Dummy = MDNode::getTemporary(Context, None);
+
+ SmallVector<Value *, 3> Args(1, Dummy);
+ if (Extra)
+ Args.push_back(Extra);
+ if (!Name.empty())
+ Args.push_back(createString(Name));
+ MDNode *Root = MDNode::get(Context, Args);
+
// At this point we have
// !0 = metadata !{} <- dummy
// !1 = metadata !{metadata !0} <- root
@@ -93,6 +100,15 @@ MDNode *MDBuilder::createTBAANode(StringRef Name, MDNode *Parent,
}
}
+MDNode *MDBuilder::createAliasScopeDomain(StringRef Name) {
+ return MDNode::get(Context, createString(Name));
+}
+
+MDNode *MDBuilder::createAliasScope(StringRef Name, MDNode *Domain) {
+ Value *Ops[2] = { createString(Name), Domain };
+ return MDNode::get(Context, Ops);
+}
+
/// \brief Return metadata for a tbaa.struct node with the given
/// struct field descriptions.
MDNode *MDBuilder::createTBAAStructNode(ArrayRef<TBAAStructField> Fields) {
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index 27d973b..5eeb797 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -22,23 +22,25 @@ using namespace llvm;
static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
Mangler::ManglerPrefixTy PrefixTy,
- const DataLayout &DL, bool UseAt) {
+ const DataLayout &DL, char Prefix) {
SmallString<256> TmpData;
StringRef Name = GVName.toStringRef(TmpData);
assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
+ // No need to do anything special if the global has the special "do not
+ // mangle" flag in the name.
+ if (Name[0] == '\1') {
+ OS << Name.substr(1);
+ return;
+ }
+
if (PrefixTy == Mangler::Private)
OS << DL.getPrivateGlobalPrefix();
else if (PrefixTy == Mangler::LinkerPrivate)
OS << DL.getLinkerPrivateGlobalPrefix();
- if (UseAt) {
- OS << '@';
- } else {
- char Prefix = DL.getGlobalPrefix();
- if (Prefix != '\0')
- OS << Prefix;
- }
+ if (Prefix != '\0')
+ OS << Prefix;
// If this is a simple string that doesn't need escaping, just append it.
OS << Name;
@@ -46,7 +48,8 @@ static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
void Mangler::getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
ManglerPrefixTy PrefixTy) const {
- return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, false);
+ char Prefix = DL->getGlobalPrefix();
+ return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, Prefix);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
@@ -56,11 +59,21 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
return getNameWithPrefix(OS, GVName, PrefixTy);
}
-/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
-/// a suffix on their name indicating the number of words of arguments they
-/// take.
-static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
- const DataLayout &TD) {
+static bool hasByteCountSuffix(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_VectorCall:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Microsoft fastcall and stdcall functions require a suffix on their name
+/// indicating the number of words of arguments they take.
+static void addByteCountSuffix(raw_ostream &OS, const Function *F,
+ const DataLayout &TD) {
// Calculate arguments size total.
unsigned ArgWords = 0;
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
@@ -69,8 +82,9 @@ static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
// 'Dereference' type in case of byval or inalloca parameter attribute.
if (AI->hasByValOrInAllocaAttr())
Ty = cast<PointerType>(Ty)->getElementType();
- // Size should be aligned to DWORD boundary
- ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ // Size should be aligned to pointer size.
+ unsigned PtrSize = TD.getPointerSize();
+ ArgWords += RoundUpToAlignment(TD.getTypeAllocSize(Ty), PtrSize);
}
OS << '@' << ArgWords;
@@ -99,41 +113,41 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
}
StringRef Name = GV->getName();
-
- // No need to do anything special if the global has the special "do not
- // mangle" flag in the name.
- if (Name[0] == '\1') {
- OS << Name.substr(1);
- return;
- }
-
- bool UseAt = false;
- const Function *MSFunc = nullptr;
- CallingConv::ID CC;
- if (DL->hasMicrosoftFastStdCallMangling()) {
- if ((MSFunc = dyn_cast<Function>(GV))) {
- CC = MSFunc->getCallingConv();
- // fastcall functions need to start with @ instead of _.
- if (CC == CallingConv::X86_FastCall)
- UseAt = true;
- }
+ char Prefix = DL->getGlobalPrefix();
+
+ // Mangle functions with Microsoft calling conventions specially. Only do
+ // this mangling for x86_64 vectorcall and 32-bit x86.
+ const Function *MSFunc = dyn_cast<Function>(GV);
+ if (Name.startswith("\01"))
+ MSFunc = nullptr; // Don't mangle when \01 is present.
+ CallingConv::ID CC =
+ MSFunc ? MSFunc->getCallingConv() : (unsigned)CallingConv::C;
+ if (!DL->hasMicrosoftFastStdCallMangling() &&
+ CC != CallingConv::X86_VectorCall)
+ MSFunc = nullptr;
+ if (MSFunc) {
+ if (CC == CallingConv::X86_FastCall)
+ Prefix = '@'; // fastcall functions have an @ prefix instead of _.
+ else if (CC == CallingConv::X86_VectorCall)
+ Prefix = '\0'; // vectorcall functions have no prefix.
}
- getNameWithPrefixx(OS, Name, PrefixTy, *DL, UseAt);
+ getNameWithPrefixx(OS, Name, PrefixTy, *DL, Prefix);
if (!MSFunc)
return;
- // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
- // add it.
- // fastcall and stdcall functions usually need @42 at the end to specify
- // the argument info.
+ // If we are supposed to add a microsoft-style suffix for stdcall, fastcall,
+ // or vectorcall, add it. These functions have a suffix of @N where N is the
+ // cumulative byte size of all of the parameters to the function in decimal.
+ if (CC == CallingConv::X86_VectorCall)
+ OS << '@'; // vectorcall functions use a double @ suffix.
FunctionType *FT = MSFunc->getFunctionType();
- if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+ if (hasByteCountSuffix(CC) &&
// "Pure" variadic functions do not receive @0 suffix.
(!FT->isVarArg() || FT->getNumParams() == 0 ||
(FT->getNumParams() == 1 && MSFunc->hasStructRetAttr())))
- AddFastCallStdCallSuffix(OS, MSFunc, *DL);
+ addByteCountSuffix(OS, MSFunc, *DL);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 59137e4..27ba9f7 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -25,25 +25,34 @@
#include "llvm/IR/LeakDetector.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
+
using namespace llvm;
+Metadata::Metadata(LLVMContext &Context, unsigned ID)
+ : Value(Type::getMetadataTy(Context), ID) {}
+
//===----------------------------------------------------------------------===//
// MDString implementation.
//
void MDString::anchor() { }
-MDString::MDString(LLVMContext &C)
- : Value(Type::getMetadataTy(C), Value::MDStringVal) {}
-
MDString *MDString::get(LLVMContext &Context, StringRef Str) {
- LLVMContextImpl *pImpl = Context.pImpl;
- StringMapEntry<Value*> &Entry =
- pImpl->MDStringCache.GetOrCreateValue(Str);
- Value *&S = Entry.getValue();
- if (!S) S = new MDString(Context);
- S->setValueName(&Entry);
- return cast<MDString>(S);
+ auto &Store = Context.pImpl->MDStringCache;
+ auto I = Store.find(Str);
+ if (I != Store.end())
+ return &I->second;
+
+ auto *Entry =
+ StringMapEntry<MDString>::Create(Str, Store.getAllocator(), Context);
+ bool WasInserted = Store.insert(Entry);
+ (void)WasInserted;
+ assert(WasInserted && "Expected entry to be inserted");
+ return &Entry->second;
+}
+
+StringRef MDString::getString() const {
+ return StringMapEntry<MDString>::GetStringMapEntryFromValue(*this).first();
}
//===----------------------------------------------------------------------===//
@@ -57,26 +66,25 @@ class MDNodeOperand : public CallbackVH {
MDNodeOperand *Cur = this;
while (Cur->getValPtrInt() != 1)
- --Cur;
+ ++Cur;
assert(Cur->getValPtrInt() == 1 &&
- "Couldn't find the beginning of the operand list!");
- return reinterpret_cast<MDNode*>(Cur) - 1;
+ "Couldn't find the end of the operand list!");
+ return reinterpret_cast<MDNode *>(Cur + 1);
}
public:
- MDNodeOperand(Value *V) : CallbackVH(V) {}
+ MDNodeOperand() {}
virtual ~MDNodeOperand();
void set(Value *V) {
- unsigned IsFirst = this->getValPtrInt();
+ unsigned IsLast = this->getValPtrInt();
this->setValPtr(V);
- this->setAsFirstOperand(IsFirst);
+ this->setAsLastOperand(IsLast);
}
- /// setAsFirstOperand - Accessor method to mark the operand as the first in
- /// the list.
- void setAsFirstOperand(unsigned V) { this->setValPtrInt(V); }
+ /// \brief Accessor method to mark the operand as the first in the list.
+ void setAsLastOperand(unsigned I) { this->setValPtrInt(I); }
void deleted() override;
void allUsesReplacedWith(Value *NV) override;
@@ -98,12 +106,11 @@ void MDNodeOperand::allUsesReplacedWith(Value *NV) {
// MDNode implementation.
//
-/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
-/// the end of the MDNode.
+/// \brief Get the MDNodeOperand's coallocated on the end of the MDNode.
static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
// Use <= instead of < to permit a one-past-the-end address.
assert(Op <= N->getNumOperands() && "Invalid operand number");
- return reinterpret_cast<MDNodeOperand*>(N + 1) + Op;
+ return reinterpret_cast<MDNodeOperand *>(N) - N->getNumOperands() + Op;
}
void MDNode::replaceOperandWith(unsigned i, Value *Val) {
@@ -111,40 +118,54 @@ void MDNode::replaceOperandWith(unsigned i, Value *Val) {
replaceOperand(Op, Val);
}
-MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
-: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
+void *MDNode::operator new(size_t Size, unsigned NumOps) {
+ void *Ptr = ::operator new(Size + NumOps * sizeof(MDNodeOperand));
+ MDNodeOperand *Op = static_cast<MDNodeOperand *>(Ptr);
+ if (NumOps) {
+ MDNodeOperand *Last = Op + NumOps;
+ for (; Op != Last; ++Op)
+ new (Op) MDNodeOperand();
+ (Op - 1)->setAsLastOperand(1);
+ }
+ return Op;
+}
+
+void MDNode::operator delete(void *Mem) {
+ MDNode *N = static_cast<MDNode *>(Mem);
+ MDNodeOperand *Op = static_cast<MDNodeOperand *>(Mem);
+ for (unsigned I = 0, E = N->NumOperands; I != E; ++I)
+ (--Op)->~MDNodeOperand();
+ ::operator delete(Op);
+}
+
+MDNode::MDNode(LLVMContext &C, unsigned ID, ArrayRef<Value *> Vals,
+ bool isFunctionLocal)
+ : Metadata(C, ID) {
NumOperands = Vals.size();
if (isFunctionLocal)
setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
- // Initialize the operand list, which is co-allocated on the end of the node.
+ // Initialize the operand list.
unsigned i = 0;
- for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
- Op != E; ++Op, ++i) {
- new (Op) MDNodeOperand(Vals[i]);
-
- // Mark the first MDNodeOperand as being the first in the list of operands.
- if (i == 0)
- Op->setAsFirstOperand(1);
- }
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op + NumOperands;
+ Op != E; ++Op, ++i)
+ Op->set(Vals[i]);
}
-/// ~MDNode - Destroy MDNode.
-MDNode::~MDNode() {
- assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
- "Not being destroyed through destroy()?");
+GenericMDNode::~GenericMDNode() {
LLVMContextImpl *pImpl = getType()->getContext().pImpl;
if (isNotUniqued()) {
pImpl->NonUniquedMDNodes.erase(this);
} else {
- pImpl->MDNodeSet.RemoveNode(this);
+ pImpl->MDNodeSet.erase(this);
}
+}
- // Destroy the operands.
- for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+void GenericMDNode::dropAllReferences() {
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op + NumOperands;
Op != E; ++Op)
- Op->~MDNodeOperand();
+ Op->set(nullptr);
}
static const Function *getFunctionForValue(Value *V) {
@@ -201,16 +222,7 @@ const Function *MDNode::getFunction() const {
#endif
}
-// destroy - Delete this node. Only when there are no uses.
-void MDNode::destroy() {
- setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
- // Placement delete, then free the memory.
- this->~MDNode();
- free(this);
-}
-
-/// isFunctionLocalValue - Return true if this is a value that would require a
-/// function-local MDNode.
+/// \brief Check if the Value would require a function-local MDNode.
static bool isFunctionLocalValue(Value *V) {
return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
(isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
@@ -218,21 +230,14 @@ static bool isFunctionLocalValue(Value *V) {
MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
FunctionLocalness FL, bool Insert) {
- LLVMContextImpl *pImpl = Context.pImpl;
-
- // Add all the operand pointers. Note that we don't have to add the
- // isFunctionLocal bit because that's implied by the operands.
- // Note that if the operands are later nulled out, the node will be
- // removed from the uniquing map.
- FoldingSetNodeID ID;
- for (Value *V : Vals)
- ID.AddPointer(V);
-
- void *InsertPoint;
- MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+ auto &Store = Context.pImpl->MDNodeSet;
- if (N || !Insert)
- return N;
+ GenericMDNodeInfo::KeyTy Key(Vals);
+ auto I = Store.find_as(Key);
+ if (I != Store.end())
+ return *I;
+ if (!Insert)
+ return nullptr;
bool isFunctionLocal = false;
switch (FL) {
@@ -254,15 +259,11 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
}
// Coallocate space for the node and Operands together, then placement new.
- void *Ptr = malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
- N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
-
- // Cache the operand hash.
- N->Hash = ID.ComputeHash();
-
- // InsertPoint will have been set by the FindNodeOrInsertPos call.
- pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+ GenericMDNode *N =
+ new (Vals.size()) GenericMDNode(Context, Vals, isFunctionLocal);
+ N->Hash = Key.Hash;
+ Store.insert(N);
return N;
}
@@ -281,48 +282,33 @@ MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) {
}
MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) {
- MDNode *N =
- (MDNode *)malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
- N = new (N) MDNode(Context, Vals, FL_No);
- N->setValueSubclassData(N->getSubclassDataFromValue() |
- NotUniquedBit);
+ MDNode *N = new (Vals.size()) MDNodeFwdDecl(Context, Vals, FL_No);
+ N->setValueSubclassData(N->getSubclassDataFromValue() | NotUniquedBit);
LeakDetector::addGarbageObject(N);
return N;
}
void MDNode::deleteTemporary(MDNode *N) {
assert(N->use_empty() && "Temporary MDNode has uses!");
- assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
- "Deleting a non-temporary uniqued node!");
- assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
- "Deleting a non-temporary non-uniqued node!");
+ assert(isa<MDNodeFwdDecl>(N) && "Expected forward declaration");
assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
"Temporary MDNode does not have NotUniquedBit set!");
- assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
- "Temporary MDNode has DestroyFlag set!");
LeakDetector::removeGarbageObject(N);
- N->destroy();
+ delete cast<MDNodeFwdDecl>(N);
}
-/// getOperand - Return specified operand.
+/// \brief Return specified operand.
Value *MDNode::getOperand(unsigned i) const {
assert(i < getNumOperands() && "Invalid operand number");
return *getOperandPtr(const_cast<MDNode*>(this), i);
}
-void MDNode::Profile(FoldingSetNodeID &ID) const {
- // Add all the operand pointers. Note that we don't have to add the
- // isFunctionLocal bit because that's implied by the operands.
- // Note that if the operands are later nulled out, the node will be
- // removed from the uniquing map.
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- ID.AddPointer(getOperand(i));
-}
-
void MDNode::setIsNotUniqued() {
setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
LLVMContextImpl *pImpl = getType()->getContext().pImpl;
- pImpl->NonUniquedMDNodes.insert(this);
+ auto *G = cast<GenericMDNode>(this);
+ G->Hash = 0;
+ pImpl->NonUniquedMDNodes.insert(G);
}
// Replace value from this node's operand list.
@@ -350,44 +336,45 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
if (From == To)
return;
- // Update the operand.
- Op->set(To);
-
// If this node is already not being uniqued (because one of the operands
// already went to null), then there is nothing else to do here.
- if (isNotUniqued()) return;
+ if (isNotUniqued()) {
+ Op->set(To);
+ return;
+ }
- LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+ auto &Store = getContext().pImpl->MDNodeSet;
+ auto *N = cast<GenericMDNode>(this);
- // Remove "this" from the context map. FoldingSet doesn't have to reprofile
- // this node to remove it, so we don't care what state the operands are in.
- pImpl->MDNodeSet.RemoveNode(this);
+ // Remove "this" from the context map.
+ Store.erase(N);
+
+ // Update the operand.
+ Op->set(To);
// If we are dropping an argument to null, we choose to not unique the MDNode
// anymore. This commonly occurs during destruction, and uniquing these
// brings little reuse. Also, this means we don't need to include
- // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
+ // isFunctionLocal bits in the hash for MDNodes.
if (!To) {
setIsNotUniqued();
return;
}
- // Now that the node is out of the folding set, get ready to reinsert it.
- // First, check to see if another node with the same operands already exists
- // in the set. If so, then this node is redundant.
- FoldingSetNodeID ID;
- Profile(ID);
- void *InsertPoint;
- if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
- replaceAllUsesWith(N);
- destroy();
+ // Now that the node is out of the table, get ready to reinsert it. First,
+ // check to see if another node with the same operands already exists in the
+ // set. If so, then this node is redundant.
+ SmallVector<Value *, 8> Vals;
+ GenericMDNodeInfo::KeyTy Key(N, Vals);
+ auto I = Store.find_as(Key);
+ if (I != Store.end()) {
+ N->replaceAllUsesWith(*I);
+ delete N;
return;
}
- // Cache the operand hash.
- Hash = ID.ComputeHash();
- // InsertPoint will have been set by the FindNodeOrInsertPos call.
- pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+ N->Hash = Key.Hash;
+ Store.insert(N);
// If this MDValue was previously function-local but no longer is, clear
// its function-local flag.
@@ -406,6 +393,41 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
}
}
+MDNode *MDNode::concatenate(MDNode *A, MDNode *B) {
+ if (!A)
+ return B;
+ if (!B)
+ return A;
+
+ SmallVector<Value *, 4> Vals(A->getNumOperands() +
+ B->getNumOperands());
+
+ unsigned j = 0;
+ for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i)
+ Vals[j++] = A->getOperand(i);
+ for (unsigned i = 0, ie = B->getNumOperands(); i != ie; ++i)
+ Vals[j++] = B->getOperand(i);
+
+ return MDNode::get(A->getContext(), Vals);
+}
+
+MDNode *MDNode::intersect(MDNode *A, MDNode *B) {
+ if (!A || !B)
+ return nullptr;
+
+ SmallVector<Value *, 4> Vals;
+ for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i) {
+ Value *V = A->getOperand(i);
+ for (unsigned j = 0, je = B->getNumOperands(); j != je; ++j)
+ if (V == B->getOperand(j)) {
+ Vals.push_back(V);
+ break;
+ }
+ }
+
+ return MDNode::get(A->getContext(), Vals);
+}
+
MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
if (!A || !B)
return nullptr;
@@ -524,49 +546,41 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
//
static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
- return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
+ return *(SmallVector<TrackingVH<MDNode>, 4> *)Operands;
}
NamedMDNode::NamedMDNode(const Twine &N)
- : Name(N.str()), Parent(nullptr),
- Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
-}
+ : Name(N.str()), Parent(nullptr),
+ Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {}
NamedMDNode::~NamedMDNode() {
dropAllReferences();
delete &getNMDOps(Operands);
}
-/// getNumOperands - Return number of NamedMDNode operands.
unsigned NamedMDNode::getNumOperands() const {
return (unsigned)getNMDOps(Operands).size();
}
-/// getOperand - Return specified operand.
MDNode *NamedMDNode::getOperand(unsigned i) const {
assert(i < getNumOperands() && "Invalid Operand number!");
- return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
+ return &*getNMDOps(Operands)[i];
}
-/// addOperand - Add metadata Operand.
void NamedMDNode::addOperand(MDNode *M) {
assert(!M->isFunctionLocal() &&
"NamedMDNode operands must not be function-local!");
getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
}
-/// eraseFromParent - Drop all references and remove the node from parent
-/// module.
void NamedMDNode::eraseFromParent() {
getParent()->eraseNamedMetadata(this);
}
-/// dropAllReferences - Remove all uses and clear node vector.
void NamedMDNode::dropAllReferences() {
getNMDOps(Operands).clear();
}
-/// getName - Return a constant reference to this named metadata's name.
StringRef NamedMDNode::getName() const {
return StringRef(Name);
}
@@ -576,7 +590,8 @@ StringRef NamedMDNode::getName() const {
//
void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
- if (!Node && !hasMetadata()) return;
+ if (!Node && !hasMetadata())
+ return;
setMetadata(getContext().getMDKindID(Kind), Node);
}
@@ -632,7 +647,8 @@ void Instruction::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
/// node. This updates/replaces metadata if already present, or removes it if
/// Node is null.
void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
- if (!Node && !hasMetadata()) return;
+ if (!Node && !hasMetadata())
+ return;
// Handle 'dbg' as a special case since it is not stored in the hash table.
if (KindID == LLVMContext::MD_dbg) {
@@ -687,6 +703,12 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
// Otherwise, removing an entry that doesn't exist on the instruction.
}
+void Instruction::setAAMetadata(const AAMDNodes &N) {
+ setMetadata(LLVMContext::MD_tbaa, N.TBAA);
+ setMetadata(LLVMContext::MD_alias_scope, N.Scope);
+ setMetadata(LLVMContext::MD_noalias, N.NoAlias);
+}
+
MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
// Handle 'dbg' as a special case since it is not stored in the hash table.
if (KindID == LLVMContext::MD_dbg)
@@ -703,8 +725,8 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
return nullptr;
}
-void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
- MDNode*> > &Result) const {
+void Instruction::getAllMetadataImpl(
+ SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
Result.clear();
// Handle 'dbg' as a special case since it is not stored in the hash table.
@@ -728,9 +750,8 @@ void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
array_pod_sort(Result.begin(), Result.end());
}
-void Instruction::
-getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
- MDNode*> > &Result) const {
+void Instruction::getAllMetadataOtherThanDebugLocImpl(
+ SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
Result.clear();
assert(hasMetadataHashEntry() &&
getContext().pImpl->MetadataStore.count(this) &&
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index f1b1f9a..14e534b 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -259,6 +259,17 @@ void Module::eraseNamedMetadata(NamedMDNode *NMD) {
NamedMDList.erase(NMD);
}
+bool Module::isValidModFlagBehavior(Value *V, ModFlagBehavior &MFB) {
+ if (ConstantInt *Behavior = dyn_cast<ConstantInt>(V)) {
+ uint64_t Val = Behavior->getLimitedValue();
+ if (Val >= ModFlagBehaviorFirstVal && Val <= ModFlagBehaviorLastVal) {
+ MFB = static_cast<ModFlagBehavior>(Val);
+ return true;
+ }
+ }
+ return false;
+}
+
/// getModuleFlagsMetadata - Returns the module flags in the provided vector.
void Module::
getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
@@ -266,15 +277,15 @@ getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
if (!ModFlags) return;
for (const MDNode *Flag : ModFlags->operands()) {
- if (Flag->getNumOperands() >= 3 && isa<ConstantInt>(Flag->getOperand(0)) &&
+ ModFlagBehavior MFB;
+ if (Flag->getNumOperands() >= 3 &&
+ isValidModFlagBehavior(Flag->getOperand(0), MFB) &&
isa<MDString>(Flag->getOperand(1))) {
// Check the operands of the MDNode before accessing the operands.
// The verifier will actually catch these failures.
- ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
MDString *Key = cast<MDString>(Flag->getOperand(1));
Value *Val = Flag->getOperand(2);
- Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
- Key, Val));
+ Flags.push_back(ModuleFlagEntry(MFB, Key, Val));
}
}
}
@@ -378,28 +389,17 @@ void Module::setMaterializer(GVMaterializer *GVM) {
Materializer.reset(GVM);
}
-bool Module::isMaterializable(const GlobalValue *GV) const {
- if (Materializer)
- return Materializer->isMaterializable(GV);
- return false;
-}
-
bool Module::isDematerializable(const GlobalValue *GV) const {
if (Materializer)
return Materializer->isDematerializable(GV);
return false;
}
-bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+std::error_code Module::materialize(GlobalValue *GV) {
if (!Materializer)
- return false;
+ return std::error_code();
- std::error_code EC = Materializer->Materialize(GV);
- if (!EC)
- return false;
- if (ErrInfo)
- *ErrInfo = EC.message();
- return true;
+ return Materializer->materialize(GV);
}
void Module::Dematerialize(GlobalValue *GV) {
@@ -413,13 +413,10 @@ std::error_code Module::materializeAll() {
return Materializer->MaterializeModule(this);
}
-std::error_code Module::materializeAllPermanently(bool ReleaseBuffer) {
+std::error_code Module::materializeAllPermanently() {
if (std::error_code EC = materializeAll())
return EC;
- if (ReleaseBuffer)
- Materializer->releaseBuffer();
-
Materializer.reset();
return std::error_code();
}
@@ -455,9 +452,20 @@ unsigned Module::getDwarfVersion() const {
}
Comdat *Module::getOrInsertComdat(StringRef Name) {
- Comdat C;
- StringMapEntry<Comdat> &Entry =
- ComdatSymTab.GetOrCreateValue(Name, std::move(C));
+ auto &Entry = *ComdatSymTab.insert(std::make_pair(Name, Comdat())).first;
Entry.second.Name = &Entry;
return &Entry.second;
}
+
+PICLevel::Level Module::getPICLevel() const {
+ Value *Val = getModuleFlag("PIC Level");
+
+ if (Val == NULL)
+ return PICLevel::Default;
+
+ return static_cast<PICLevel::Level>(cast<ConstantInt>(Val)->getZExtValue());
+}
+
+void Module::setPICLevel(PICLevel::Level PL) {
+ addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL);
+}
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index 0defb6a..2e2a7cb 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -53,7 +53,7 @@ ModuleAnalysisManager::getResultImpl(void *PassID, Module *M) {
// If we don't have a cached result for this module, look up the pass and run
// it to produce a result, which we then add to the cache.
if (Inserted)
- RI->second = std::move(lookupPass(PassID).run(M, this));
+ RI->second = lookupPass(PassID).run(M, this);
return *RI->second;
}
diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp
index 91940a9..b879fef 100644
--- a/lib/IR/PassRegistry.cpp
+++ b/lib/IR/PassRegistry.cpp
@@ -36,8 +36,7 @@ PassRegistry *PassRegistry::getPassRegistry() {
// Accessors
//
-PassRegistry::~PassRegistry() {
-}
+PassRegistry::~PassRegistry() {}
const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
sys::SmartScopedReader<true> Guard(Lock);
@@ -58,77 +57,62 @@ const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
sys::SmartScopedWriter<true> Guard(Lock);
bool Inserted =
- PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ PassInfoMap.insert(std::make_pair(PI.getTypeInfo(), &PI)).second;
assert(Inserted && "Pass registered multiple times!");
(void)Inserted;
PassInfoStringMap[PI.getPassArgument()] = &PI;
-
+
// Notify any listeners.
- for (std::vector<PassRegistrationListener*>::iterator
- I = Listeners.begin(), E = Listeners.end(); I != E; ++I)
- (*I)->passRegistered(&PI);
-
- if (ShouldFree) ToFree.push_back(std::unique_ptr<const PassInfo>(&PI));
-}
+ for (auto *Listener : Listeners)
+ Listener->passRegistered(&PI);
-void PassRegistry::unregisterPass(const PassInfo &PI) {
- sys::SmartScopedWriter<true> Guard(Lock);
- MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
- assert(I != PassInfoMap.end() && "Pass registered but not in map!");
-
- // Remove pass from the map.
- PassInfoMap.erase(I);
- PassInfoStringMap.erase(PI.getPassArgument());
+ if (ShouldFree)
+ ToFree.push_back(std::unique_ptr<const PassInfo>(&PI));
}
void PassRegistry::enumerateWith(PassRegistrationListener *L) {
sys::SmartScopedReader<true> Guard(Lock);
- for (auto I = PassInfoMap.begin(), E = PassInfoMap.end(); I != E; ++I)
- L->passEnumerate(I->second);
+ for (auto PassInfoPair : PassInfoMap)
+ L->passEnumerate(PassInfoPair.second);
}
-
/// Analysis Group Mechanisms.
-void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
const void *PassID,
- PassInfo& Registeree,
- bool isDefault,
+ PassInfo &Registeree, bool isDefault,
bool ShouldFree) {
- PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID));
+ PassInfo *InterfaceInfo = const_cast<PassInfo *>(getPassInfo(InterfaceID));
if (!InterfaceInfo) {
// First reference to Interface, register it now.
registerPass(Registeree);
InterfaceInfo = &Registeree;
}
- assert(Registeree.isAnalysisGroup() &&
+ assert(Registeree.isAnalysisGroup() &&
"Trying to join an analysis group that is a normal pass!");
if (PassID) {
- PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+ PassInfo *ImplementationInfo = const_cast<PassInfo *>(getPassInfo(PassID));
assert(ImplementationInfo &&
"Must register pass before adding to AnalysisGroup!");
sys::SmartScopedWriter<true> Guard(Lock);
-
+
// Make sure we keep track of the fact that the implementation implements
// the interface.
ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
- AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
- assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
- "Cannot add a pass to the same analysis group more than once!");
- AGI.Implementations.insert(ImplementationInfo);
if (isDefault) {
assert(InterfaceInfo->getNormalCtor() == nullptr &&
"Default implementation for analysis group already specified!");
- assert(ImplementationInfo->getNormalCtor() &&
- "Cannot specify pass as default if it does not have a default ctor");
+ assert(
+ ImplementationInfo->getNormalCtor() &&
+ "Cannot specify pass as default if it does not have a default ctor");
InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
InterfaceInfo->setTargetMachineCtor(
ImplementationInfo->getTargetMachineCtor());
}
}
-
+
if (ShouldFree)
ToFree.push_back(std::unique_ptr<const PassInfo>(&Registeree));
}
@@ -140,7 +124,7 @@ void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
sys::SmartScopedWriter<true> Guard(Lock);
-
+
auto I = std::find(Listeners.begin(), Listeners.end(), L);
Listeners.erase(I);
}
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index 8302597..a18f982 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
-#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+#ifndef LLVM_LIB_IR_SYMBOLTABLELISTTRAITSIMPL_H
+#define LLVM_LIB_IR_SYMBOLTABLELISTTRAITSIMPL_H
#include "llvm/IR/SymbolTableListTraits.h"
#include "llvm/IR/ValueSymbolTable.h"
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 1efde47..0458b5f 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -89,9 +89,13 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
// At this point we have only various mismatches of the first class types
// remaining and ptr->ptr. Just select the lossless conversions. Everything
- // else is not lossless.
- if (this->isPointerTy())
- return Ty->isPointerTy();
+ // else is not lossless. Conservatively assume we can't losslessly convert
+ // between pointers with different address spaces.
+ if (const PointerType *PTy = dyn_cast<PointerType>(this)) {
+ if (const PointerType *OtherPTy = dyn_cast<PointerType>(Ty))
+ return PTy->getAddressSpace() == OtherPTy->getAddressSpace();
+ return false;
+ }
return false; // Other types have no identity values
}
@@ -155,7 +159,7 @@ int Type::getFPMantissaWidth() const {
/// isSizedDerivedType - Derived types like structures and arrays are sized
/// iff all of the members of the type are sized as well. Since asking for
/// their size is relatively uncommon, move this operation out of line.
-bool Type::isSizedDerivedType(SmallPtrSet<const Type*, 4> *Visited) const {
+bool Type::isSizedDerivedType(SmallPtrSetImpl<const Type*> *Visited) const {
if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
return ATy->getElementType()->isSized(Visited);
@@ -454,10 +458,11 @@ void StructType::setName(StringRef Name) {
}
// Look up the entry for the name.
- EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
-
+ auto IterBool =
+ getContext().pImpl->NamedStructTypes.insert(std::make_pair(Name, this));
+
// While we have a name collision, try a random rename.
- if (Entry->getValue()) {
+ if (!IterBool.second) {
SmallString<64> TempStr(Name);
TempStr.push_back('.');
raw_svector_ostream TmpStream(TempStr);
@@ -467,19 +472,16 @@ void StructType::setName(StringRef Name) {
TempStr.resize(NameSize + 1);
TmpStream.resync();
TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
-
- Entry = &getContext().pImpl->
- NamedStructTypes.GetOrCreateValue(TmpStream.str());
- } while (Entry->getValue());
- }
- // Okay, we found an entry that isn't used. It's us!
- Entry->setValue(this);
+ IterBool = getContext().pImpl->NamedStructTypes.insert(
+ std::make_pair(TmpStream.str(), this));
+ } while (!IterBool.second);
+ }
// Delete the old string data.
if (SymbolTableEntry)
((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
- SymbolTableEntry = Entry;
+ SymbolTableEntry = &*IterBool.first;
}
//===----------------------------------------------------------------------===//
@@ -506,7 +508,9 @@ StructType *StructType::get(Type *type, ...) {
StructFields.push_back(type);
type = va_arg(ap, llvm::Type*);
}
- return llvm::StructType::get(Ctx, StructFields);
+ auto *Ret = llvm::StructType::get(Ctx, StructFields);
+ va_end(ap);
+ return Ret;
}
StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements,
@@ -547,16 +551,18 @@ StructType *StructType::create(StringRef Name, Type *type, ...) {
StructFields.push_back(type);
type = va_arg(ap, llvm::Type*);
}
- return llvm::StructType::create(Ctx, StructFields, Name);
+ auto *Ret = llvm::StructType::create(Ctx, StructFields, Name);
+ va_end(ap);
+ return Ret;
}
-bool StructType::isSized(SmallPtrSet<const Type*, 4> *Visited) const {
+bool StructType::isSized(SmallPtrSetImpl<const Type*> *Visited) const {
if ((getSubclassData() & SCDB_IsSized) != 0)
return true;
if (isOpaque())
return false;
- if (Visited && !Visited->insert(this))
+ if (Visited && !Visited->insert(this).second)
return false;
// Okay, our struct is sized if all of the elements are, but if one of the
@@ -591,6 +597,7 @@ void StructType::setBody(Type *type, ...) {
type = va_arg(ap, llvm::Type*);
}
setBody(StructFields);
+ va_end(ap);
}
bool StructType::isValidElementType(Type *ElemTy) {
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index 689b903..6796075 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -40,7 +40,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
}
// Get types from functions.
- SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDForInst;
for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
incorporateType(FI->getType());
diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp
index 047861c..cae845d 100644
--- a/lib/IR/Use.cpp
+++ b/lib/IR/Use.cpp
@@ -52,7 +52,7 @@ unsigned Use::getOperandNo() const {
// Sets up the waymarking algorithm's tags for a series of Uses. See the
// algorithm details here:
//
-// http://www.llvm.org/docs/ProgrammersManual.html#UserLayout
+// http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm
//
Use *Use::initTags(Use *const Start, Use *Stop) {
ptrdiff_t Done = 0;
diff --git a/lib/IR/UseListOrder.cpp b/lib/IR/UseListOrder.cpp
new file mode 100644
index 0000000..d064e67
--- /dev/null
+++ b/lib/IR/UseListOrder.cpp
@@ -0,0 +1,43 @@
+//===- UseListOrder.cpp - Implement Use List Order ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement structures and command-line options for preserving use-list order.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/UseListOrder.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> PreserveBitcodeUseListOrder(
+ "preserve-bc-use-list-order",
+ cl::desc("Experimental support to preserve bitcode use-list order."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> PreserveAssemblyUseListOrder(
+ "preserve-ll-use-list-order",
+ cl::desc("Experimental support to preserve assembly use-list order."),
+ cl::init(false), cl::Hidden);
+
+bool llvm::shouldPreserveBitcodeUseListOrder() {
+ return PreserveBitcodeUseListOrder;
+}
+
+bool llvm::shouldPreserveAssemblyUseListOrder() {
+ return PreserveAssemblyUseListOrder;
+}
+
+void llvm::setPreserveBitcodeUseListOrder(bool ShouldPreserve) {
+ PreserveBitcodeUseListOrder = ShouldPreserve;
+}
+
+void llvm::setPreserveAssemblyUseListOrder(bool ShouldPreserve) {
+ PreserveAssemblyUseListOrder = ShouldPreserve;
+}
diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp
index 9406828..ee83eac 100644
--- a/lib/IR/User.cpp
+++ b/lib/IR/User.cpp
@@ -20,9 +20,6 @@ namespace llvm {
void User::anchor() {}
-// replaceUsesOfWith - Replaces all references to the "From" definition with
-// references to the "To" definition.
-//
void User::replaceUsesOfWith(Value *From, Value *To) {
if (From == To) return; // Duh what?
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 35c241a..4e0c11f1 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -15,6 +15,7 @@
#include "LLVMContextImpl.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -44,7 +45,8 @@ static inline Type *checkType(Type *Ty) {
Value::Value(Type *ty, unsigned scid)
: VTy(checkType(ty)), UseList(nullptr), Name(nullptr), SubclassID(scid),
- HasValueHandle(0), SubclassOptionalData(0), SubclassData(0) {
+ HasValueHandle(0), SubclassOptionalData(0), SubclassData(0),
+ NumOperands(0) {
// FIXME: Why isn't this in the subclass gunk??
// Note, we cannot call isa<CallInst> before the CallInst has been
// constructed.
@@ -87,8 +89,6 @@ Value::~Value() {
LeakDetector::removeGarbageObject(this);
}
-/// hasNUses - Return true if this Value has exactly N users.
-///
bool Value::hasNUses(unsigned N) const {
const_use_iterator UI = use_begin(), E = use_end();
@@ -97,9 +97,6 @@ bool Value::hasNUses(unsigned N) const {
return UI == E;
}
-/// hasNUsesOrMore - Return true if this value has N users or more. This is
-/// logically equivalent to getNumUses() >= N.
-///
bool Value::hasNUsesOrMore(unsigned N) const {
const_use_iterator UI = use_begin(), E = use_end();
@@ -109,8 +106,6 @@ bool Value::hasNUsesOrMore(unsigned N) const {
return true;
}
-/// isUsedInBasicBlock - Return true if this value is used in the specified
-/// basic block.
bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
// This can be computed either by scanning the instructions in BB, or by
// scanning the use list of this Value. Both lists can be very long, but
@@ -132,10 +127,6 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
return false;
}
-
-/// getNumUses - This method computes the number of uses of this Value. This
-/// is a linear time operation. Use hasOneUse or hasNUses to check for specific
-/// values.
unsigned Value::getNumUses() const {
return (unsigned)std::distance(use_begin(), use_end());
}
@@ -235,9 +226,6 @@ void Value::setName(const Twine &NewName) {
Name = ST->createValueName(NameRef, this);
}
-
-/// takeName - transfer the name from V to this value, setting V's name to
-/// empty. It is an error to call V->takeName(V).
void Value::takeName(Value *V) {
assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!");
@@ -302,9 +290,9 @@ void Value::takeName(Value *V) {
}
#ifndef NDEBUG
-static bool contains(SmallPtrSet<ConstantExpr *, 4> &Cache, ConstantExpr *Expr,
+static bool contains(SmallPtrSetImpl<ConstantExpr *> &Cache, ConstantExpr *Expr,
Constant *C) {
- if (!Cache.insert(Expr))
+ if (!Cache.insert(Expr).second)
return false;
for (auto &O : Expr->operands()) {
@@ -413,7 +401,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
return V;
}
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
- } while (Visited.insert(V));
+ } while (Visited.insert(V).second);
return V;
}
@@ -454,7 +442,8 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
return V;
Offset = GEPOffset;
V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+ Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
V = cast<Operator>(V)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
V = GA->getAliasee();
@@ -462,7 +451,7 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
return V;
}
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
- } while (Visited.insert(V));
+ } while (Visited.insert(V).second);
return V;
}
@@ -471,10 +460,12 @@ Value *Value::stripInBoundsOffsets() {
return stripPointerCastsAndOffsets<PSK_InBounds>(this);
}
-/// isDereferenceablePointer - Test if this value is always a pointer to
-/// allocated and suitably aligned memory for a simple load or store.
+/// \brief Check if Value is always a dereferenceable pointer.
+///
+/// Test if V is always a pointer to allocated and suitably aligned memory for
+/// a simple load or store.
static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
- SmallPtrSet<const Value *, 32> &Visited) {
+ SmallPtrSetImpl<const Value *> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
@@ -504,14 +495,34 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return !GV->hasExternalWeakLinkage();
- // byval arguments are ok.
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValAttr();
+ // byval arguments are okay. Arguments specifically marked as
+ // dereferenceable are okay too.
+ if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (A->hasByValAttr())
+ return true;
+ else if (uint64_t Bytes = A->getDereferenceableBytes()) {
+ Type *Ty = V->getType()->getPointerElementType();
+ if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
+ return true;
+ }
+
+ return false;
+ }
+
+ // Return values from call sites specifically marked as dereferenceable are
+ // also okay.
+ if (ImmutableCallSite CS = V) {
+ if (uint64_t Bytes = CS.getDereferenceableBytes(0)) {
+ Type *Ty = V->getType()->getPointerElementType();
+ if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
+ return true;
+ }
+ }
// For GEPs, determine if the indexing lands within the allocated object.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// Conservatively require that the base pointer be fully dereferenceable.
- if (!Visited.insert(GEP->getOperand(0)))
+ if (!Visited.insert(GEP->getOperand(0)).second)
return false;
if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited))
return false;
@@ -543,21 +554,39 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
return true;
}
+ if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
+ return isDereferenceablePointer(ASC->getOperand(0), DL, Visited);
+
// If we don't know, assume the worst.
return false;
}
-/// isDereferenceablePointer - Test if this value is always a pointer to
-/// allocated and suitably aligned memory for a simple load or store.
bool Value::isDereferenceablePointer(const DataLayout *DL) const {
+ // When dereferenceability information is provided by a dereferenceable
+ // attribute, we know exactly how many bytes are dereferenceable. If we can
+ // determine the exact offset to the attributed variable, we can use that
+ // information here.
+ Type *Ty = getType()->getPointerElementType();
+ if (Ty->isSized() && DL) {
+ APInt Offset(DL->getTypeStoreSizeInBits(getType()), 0);
+ const Value *BV = stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
+
+ APInt DerefBytes(Offset.getBitWidth(), 0);
+ if (const Argument *A = dyn_cast<Argument>(BV))
+ DerefBytes = A->getDereferenceableBytes();
+ else if (ImmutableCallSite CS = BV)
+ DerefBytes = CS.getDereferenceableBytes(0);
+
+ if (DerefBytes.getBoolValue() && Offset.isNonNegative()) {
+ if (DerefBytes.uge(Offset + DL->getTypeStoreSize(Ty)))
+ return true;
+ }
+ }
+
SmallPtrSet<const Value *, 32> Visited;
return ::isDereferenceablePointer(this, DL, Visited);
}
-/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
-/// return the value in the PHI node corresponding to PredBB. If not, return
-/// ourself. This is useful if you want to know the value something has in a
-/// predecessor block.
Value *Value::DoPHITranslation(const BasicBlock *CurBB,
const BasicBlock *PredBB) {
PHINode *PN = dyn_cast<PHINode>(this);
@@ -568,12 +597,29 @@ Value *Value::DoPHITranslation(const BasicBlock *CurBB,
LLVMContext &Value::getContext() const { return VTy->getContext(); }
+void Value::reverseUseList() {
+ if (!UseList || !UseList->Next)
+ // No need to reverse 0 or 1 uses.
+ return;
+
+ Use *Head = UseList;
+ Use *Current = UseList->Next;
+ Head->Next = nullptr;
+ while (Current) {
+ Use *Next = Current->Next;
+ Current->Next = Head;
+ Head->setPrev(&Current->Next);
+ Head = Current;
+ Current = Next;
+ }
+ UseList = Head;
+ Head->setPrev(&UseList);
+}
+
//===----------------------------------------------------------------------===//
// ValueHandleBase Class
//===----------------------------------------------------------------------===//
-/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
-/// List is known to point into the existing use list.
void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
assert(List && "Handle list is null?");
@@ -597,7 +643,6 @@ void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
Next->setPrevPtr(&Next);
}
-/// AddToUseList - Add this ValueHandle to the use list for VP.
void ValueHandleBase::AddToUseList() {
assert(VP.getPointer() && "Null pointer doesn't have a use list!");
@@ -641,7 +686,6 @@ void ValueHandleBase::AddToUseList() {
}
}
-/// RemoveFromUseList - Remove this ValueHandle from its current use list.
void ValueHandleBase::RemoveFromUseList() {
assert(VP.getPointer() && VP.getPointer()->HasValueHandle &&
"Pointer doesn't have a use list!");
@@ -729,6 +773,8 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
assert(Old != New && "Changing value into itself!");
+ assert(Old->getType() == New->getType() &&
+ "replaceAllUses of value with new value of different type!");
// Get the linked list base, which is guaranteed to exist since the
// HasValueHandle flag is set.
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index e9e979a..2b23f6d 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -56,11 +56,10 @@ void ValueSymbolTable::reinsertValue(Value* V) {
raw_svector_ostream(UniqueName) << ++LastUnique;
// Try insert the vmap entry with this suffix.
- ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
- if (!NewName.getValue()) {
+ auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
+ if (IterBool.second) {
// Newly inserted name. Success!
- NewName.setValue(V);
- V->Name = &NewName;
+ V->Name = &*IterBool.first;
//DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
return;
}
@@ -78,12 +77,11 @@ void ValueSymbolTable::removeValueName(ValueName *V) {
/// auto-renames the name and returns that instead.
ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
// In the common case, the name is not already in the symbol table.
- ValueName &Entry = vmap.GetOrCreateValue(Name);
- if (!Entry.getValue()) {
- Entry.setValue(V);
+ auto IterBool = vmap.insert(std::make_pair(Name, V));
+ if (IterBool.second) {
//DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
// << *V << "\n");
- return &Entry;
+ return &*IterBool.first;
}
// Otherwise, there is a naming conflict. Rename this value.
@@ -95,12 +93,11 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
raw_svector_ostream(UniqueName) << ++LastUnique;
// Try insert the vmap entry with this suffix.
- ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
- if (!NewName.getValue()) {
- // Newly inserted name. Success!
- NewName.setValue(V);
- //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
- return &NewName;
+ auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
+ if (IterBool.second) {
+ // DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V <<
+ // "\n");
+ return &*IterBool.first;
}
}
}
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 314bad3..9698dbd 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -257,7 +257,7 @@ private:
void visitGlobalVariable(const GlobalVariable &GV);
void visitGlobalAlias(const GlobalAlias &GA);
void visitAliaseeSubExpr(const GlobalAlias &A, const Constant &C);
- void visitAliaseeSubExpr(SmallPtrSet<const GlobalAlias *, 4> &Visited,
+ void visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias *> &Visited,
const GlobalAlias &A, const Constant &C);
void visitNamedMDNode(const NamedMDNode &NMD);
void visitMDNode(MDNode &MD, Function *F);
@@ -269,6 +269,8 @@ private:
SmallVectorImpl<const MDNode *> &Requirements);
void visitFunction(const Function &F);
void visitBasicBlock(BasicBlock &BB);
+ void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty);
+
// InstVisitor overrides...
using InstVisitor<Verifier>::visit;
@@ -375,11 +377,13 @@ void Verifier::visit(Instruction &I) {
void Verifier::visitGlobalValue(const GlobalValue &GV) {
- Assert1(!GV.isDeclaration() || GV.isMaterializable() ||
- GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(),
+ Assert1(!GV.isDeclaration() || GV.hasExternalLinkage() ||
+ GV.hasExternalWeakLinkage(),
"Global is external, but doesn't have external or weak linkage!",
&GV);
+ Assert1(GV.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &GV);
Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
"Only global variables can have appending linkage!", &GV);
@@ -476,7 +480,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
while (!WorkStack.empty()) {
const Value *V = WorkStack.pop_back_val();
- if (!Visited.insert(V))
+ if (!Visited.insert(V).second)
continue;
if (const User *U = dyn_cast<User>(V)) {
@@ -500,13 +504,13 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) {
visitAliaseeSubExpr(Visited, GA, C);
}
-void Verifier::visitAliaseeSubExpr(SmallPtrSet<const GlobalAlias *, 4> &Visited,
+void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
const GlobalAlias &GA, const Constant &C) {
if (const auto *GV = dyn_cast<GlobalValue>(&C)) {
Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA);
if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) {
- Assert1(Visited.insert(GA2), "Aliases cannot form a cycle", &GA);
+ Assert1(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
Assert1(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias",
&GA);
@@ -564,7 +568,7 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
void Verifier::visitMDNode(MDNode &MD, Function *F) {
// Only visit each node once. Metadata can be mutually recursive, so this
// avoids infinite recursion here, as well as being an optimization.
- if (!MDNodes.insert(&MD))
+ if (!MDNodes.insert(&MD).second)
return;
for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
@@ -605,11 +609,10 @@ void Verifier::visitComdat(const Comdat &C) {
Assert1(GV,
"comdat selection kind requires a global value with the same name",
&C);
- // The Module is invalid if the GlobalValue has local linkage. Allowing
- // otherwise opens us up to seeing the underling global value get renamed if
- // collisions occur.
+ // The Module is invalid if the GlobalValue has private linkage. Entities
+ // with private linkage don't have entries in the symbol table.
if (GV)
- Assert1(!GV->hasLocalLinkage(), "comdat global value has local linkage",
+ Assert1(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
GV);
}
@@ -672,24 +675,23 @@ Verifier::visitModuleFlag(const MDNode *Op,
// constant int), the flag ID (an MDString), and the value.
Assert1(Op->getNumOperands() == 3,
"incorrect number of operands in module flag", Op);
- ConstantInt *Behavior = dyn_cast<ConstantInt>(Op->getOperand(0));
+ Module::ModFlagBehavior MFB;
+ if (!Module::isValidModFlagBehavior(Op->getOperand(0), MFB)) {
+ Assert1(
+ dyn_cast<ConstantInt>(Op->getOperand(0)),
+ "invalid behavior operand in module flag (expected constant integer)",
+ Op->getOperand(0));
+ Assert1(false,
+ "invalid behavior operand in module flag (unexpected constant)",
+ Op->getOperand(0));
+ }
MDString *ID = dyn_cast<MDString>(Op->getOperand(1));
- Assert1(Behavior,
- "invalid behavior operand in module flag (expected constant integer)",
- Op->getOperand(0));
- unsigned BehaviorValue = Behavior->getZExtValue();
Assert1(ID,
"invalid ID operand in module flag (expected metadata string)",
Op->getOperand(1));
// Sanity check the values for behaviors with additional requirements.
- switch (BehaviorValue) {
- default:
- Assert1(false,
- "invalid behavior operand in module flag (unexpected constant)",
- Op->getOperand(0));
- break;
-
+ switch (MFB) {
case Module::Error:
case Module::Warning:
case Module::Override:
@@ -725,7 +727,7 @@ Verifier::visitModuleFlag(const MDNode *Op,
}
// Unless this is a "requires" flag, check the ID is unique.
- if (BehaviorValue != Module::Require) {
+ if (MFB != Module::Require) {
bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second;
Assert1(Inserted,
"module flag identifiers must be unique (or of 'require' type)",
@@ -1054,20 +1056,19 @@ void Verifier::visitFunction(const Function &F) {
"Attribute 'builtin' can only be applied to a callsite.", &F);
// Check that this function meets the restrictions on this calling convention.
+ // Sometimes varargs is used for perfectly forwarding thunks, so some of these
+ // restrictions can be lifted.
switch (F.getCallingConv()) {
default:
- break;
case CallingConv::C:
break;
case CallingConv::Fast:
case CallingConv::Cold:
- case CallingConv::X86_FastCall:
- case CallingConv::X86_ThisCall:
case CallingConv::Intel_OCL_BI:
case CallingConv::PTX_Kernel:
case CallingConv::PTX_Device:
- Assert1(!F.isVarArg(),
- "Varargs functions must have C calling conventions!", &F);
+ Assert1(!F.isVarArg(), "Calling convention does not support varargs or "
+ "perfect forwarding!", &F);
break;
}
@@ -1175,6 +1176,12 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
}
}
}
+
+ // Check that all instructions have their parent pointers set up correctly.
+ for (auto &I : BB)
+ {
+ Assert(I.getParent() == &BB, "Instruction has bogus parent pointer!");
+ }
}
void Verifier::visitTerminatorInst(TerminatorInst &I) {
@@ -1217,7 +1224,7 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
Assert1(i.getCaseValue()->getType() == SwitchTy,
"Switch constants must all be same type as switch value!", &SI);
- Assert2(Constants.insert(i.getCaseValue()),
+ Assert2(Constants.insert(i.getCaseValue()).second,
"Duplicate integer as switch case", &SI, i.getCaseValue());
}
@@ -1886,12 +1893,63 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
}
+void Verifier::visitRangeMetadata(Instruction& I,
+ MDNode* Range, Type* Ty) {
+ assert(Range &&
+ Range == I.getMetadata(LLVMContext::MD_range) &&
+ "precondition violation");
+
+ unsigned NumOperands = Range->getNumOperands();
+ Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
+ unsigned NumRanges = NumOperands / 2;
+ Assert1(NumRanges >= 1, "It should have at least one range!", Range);
+
+ ConstantRange LastRange(1); // Dummy initial value
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i));
+ Assert1(Low, "The lower limit must be an integer!", Low);
+ ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1));
+ Assert1(High, "The upper limit must be an integer!", High);
+ Assert1(High->getType() == Low->getType() &&
+ High->getType() == Ty, "Range types must match instruction type!",
+ &I);
+
+ APInt HighV = High->getValue();
+ APInt LowV = Low->getValue();
+ ConstantRange CurRange(LowV, HighV);
+ Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(),
+ "Range must not be empty!", Range);
+ if (i != 0) {
+ Assert1(CurRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
+ Range);
+ Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
+ Range);
+ }
+ LastRange = ConstantRange(LowV, HighV);
+ }
+ if (NumRanges > 2) {
+ APInt FirstLow =
+ dyn_cast<ConstantInt>(Range->getOperand(0))->getValue();
+ APInt FirstHigh =
+ dyn_cast<ConstantInt>(Range->getOperand(1))->getValue();
+ ConstantRange FirstRange(FirstLow, FirstHigh);
+ Assert1(FirstRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
+ Range);
+ }
+}
+
void Verifier::visitLoadInst(LoadInst &LI) {
PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
Assert1(PTy, "Load operand must be a pointer.", &LI);
Type *ElTy = PTy->getElementType();
Assert2(ElTy == LI.getType(),
"Load result type does not match pointer operand type!", &LI, ElTy);
+ Assert1(LI.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &LI);
if (LI.isAtomic()) {
Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
"Load cannot have Release ordering", &LI);
@@ -1911,52 +1969,6 @@ void Verifier::visitLoadInst(LoadInst &LI) {
"Non-atomic load cannot have SynchronizationScope specified", &LI);
}
- if (MDNode *Range = LI.getMetadata(LLVMContext::MD_range)) {
- unsigned NumOperands = Range->getNumOperands();
- Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
- unsigned NumRanges = NumOperands / 2;
- Assert1(NumRanges >= 1, "It should have at least one range!", Range);
-
- ConstantRange LastRange(1); // Dummy initial value
- for (unsigned i = 0; i < NumRanges; ++i) {
- ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i));
- Assert1(Low, "The lower limit must be an integer!", Low);
- ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1));
- Assert1(High, "The upper limit must be an integer!", High);
- Assert1(High->getType() == Low->getType() &&
- High->getType() == ElTy, "Range types must match load type!",
- &LI);
-
- APInt HighV = High->getValue();
- APInt LowV = Low->getValue();
- ConstantRange CurRange(LowV, HighV);
- Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(),
- "Range must not be empty!", Range);
- if (i != 0) {
- Assert1(CurRange.intersectWith(LastRange).isEmptySet(),
- "Intervals are overlapping", Range);
- Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
- Range);
- Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
- Range);
- }
- LastRange = ConstantRange(LowV, HighV);
- }
- if (NumRanges > 2) {
- APInt FirstLow =
- dyn_cast<ConstantInt>(Range->getOperand(0))->getValue();
- APInt FirstHigh =
- dyn_cast<ConstantInt>(Range->getOperand(1))->getValue();
- ConstantRange FirstRange(FirstLow, FirstHigh);
- Assert1(FirstRange.intersectWith(LastRange).isEmptySet(),
- "Intervals are overlapping", Range);
- Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
- Range);
- }
-
-
- }
-
visitInstruction(LI);
}
@@ -1967,6 +1979,8 @@ void Verifier::visitStoreInst(StoreInst &SI) {
Assert2(ElTy == SI.getOperand(0)->getType(),
"Stored value type does not match pointer operand type!",
&SI, ElTy);
+ Assert1(SI.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &SI);
if (SI.isAtomic()) {
Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
"Store cannot have Acquire ordering", &SI);
@@ -1998,6 +2012,8 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
&AI);
Assert1(AI.getArraySize()->getType()->isIntegerTy(),
"Alloca array size must have integer type", &AI);
+ Assert1(AI.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &AI);
visitInstruction(AI);
}
@@ -2207,11 +2223,15 @@ void Verifier::visitInstruction(Instruction &I) {
if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
// Check to make sure that the "address of" an intrinsic function is never
// taken.
- Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 : 0),
+ Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 :
+ isa<InvokeInst>(I) ? e-3 : 0),
"Cannot take the address of an intrinsic!", &I);
Assert1(!F->isIntrinsic() || isa<CallInst>(I) ||
- F->getIntrinsicID() == Intrinsic::donothing,
- "Cannot invoke an intrinsinc other than donothing", &I);
+ F->getIntrinsicID() == Intrinsic::donothing ||
+ F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
+ F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64,
+ "Cannot invoke an intrinsinc other than"
+ " donothing or patchpoint", &I);
Assert1(F->getParent() == M, "Referencing function in another module!",
&I);
} else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
@@ -2239,7 +2259,7 @@ void Verifier::visitInstruction(Instruction &I) {
while (!Stack.empty()) {
const ConstantExpr *V = Stack.pop_back_val();
- if (!Visited.insert(V))
+ if (!Visited.insert(V).second)
continue;
VerifyConstantExprBitcastType(V);
@@ -2267,9 +2287,19 @@ void Verifier::visitInstruction(Instruction &I) {
}
}
- MDNode *MD = I.getMetadata(LLVMContext::MD_range);
- Assert1(!MD || isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
- "Ranges are only for loads, calls and invokes!", &I);
+ if (MDNode *Range = I.getMetadata(LLVMContext::MD_range)) {
+ Assert1(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Ranges are only for loads, calls and invokes!", &I);
+ visitRangeMetadata(I, Range, I.getType());
+ }
+
+ if (I.getMetadata(LLVMContext::MD_nonnull)) {
+ Assert1(I.getType()->isPointerTy(),
+ "nonnull applies only to pointer types", &I);
+ Assert1(isa<LoadInst>(I),
+ "nonnull applies only to load instructions, use attributes"
+ " for calls or invokes", &I);
+ }
InstsInThisBlock.insert(&I);
}
@@ -2608,7 +2638,7 @@ bool llvm::verifyModule(const Module &M, raw_ostream *OS) {
bool Broken = false;
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration())
+ if (!I->isDeclaration() && !I->isMaterializable())
Broken |= !V.verify(*I);
// Note that this function's return value is inverted from what you would
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index f8d2f5a..7bc6f07 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -29,28 +29,27 @@ namespace llvm {
static const char *const TimeIRParsingGroupName = "LLVM IR Parsing";
static const char *const TimeIRParsingName = "Parse IR";
-static Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
- LLVMContext &Context) {
+static std::unique_ptr<Module>
+getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
+ LLVMContext &Context) {
if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
(const unsigned char *)Buffer->getBufferEnd())) {
- std::string ErrMsg;
- ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context);
+ ErrorOr<Module *> ModuleOrErr =
+ getLazyBitcodeModule(std::move(Buffer), Context);
if (std::error_code EC = ModuleOrErr.getError()) {
Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
EC.message());
- // getLazyBitcodeModule does not take ownership of the Buffer in the
- // case of an error.
- delete Buffer;
return nullptr;
}
- return ModuleOrErr.get();
+ return std::unique_ptr<Module>(ModuleOrErr.get());
}
- return ParseAssembly(Buffer, nullptr, Err, Context);
+ return parseAssembly(Buffer->getMemBufferRef(), Err, Context);
}
-Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
- LLVMContext &Context) {
+std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename,
+ SMDiagnostic &Err,
+ LLVMContext &Context) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = FileOrErr.getError()) {
@@ -59,33 +58,29 @@ Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err
return nullptr;
}
- return getLazyIRModule(FileOrErr.get().release(), Err, Context);
+ return getLazyIRModule(std::move(FileOrErr.get()), Err, Context);
}
-Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
- LLVMContext &Context) {
+std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
+ LLVMContext &Context) {
NamedRegionTimer T(TimeIRParsingName, TimeIRParsingGroupName,
TimePassesIsEnabled);
- if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
- (const unsigned char *)Buffer->getBufferEnd())) {
+ if (isBitcode((const unsigned char *)Buffer.getBufferStart(),
+ (const unsigned char *)Buffer.getBufferEnd())) {
ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(Buffer, Context);
- Module *M = nullptr;
- if (std::error_code EC = ModuleOrErr.getError())
- Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+ if (std::error_code EC = ModuleOrErr.getError()) {
+ Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
EC.message());
- else
- M = ModuleOrErr.get();
- // parseBitcodeFile does not take ownership of the Buffer.
- return M;
+ return nullptr;
+ }
+ return std::unique_ptr<Module>(ModuleOrErr.get());
}
- return ParseAssembly(MemoryBuffer::getMemBuffer(
- Buffer->getBuffer(), Buffer->getBufferIdentifier()),
- nullptr, Err, Context);
+ return parseAssembly(Buffer, Err, Context);
}
-Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
- LLVMContext &Context) {
+std::unique_ptr<Module> llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err,
+ LLVMContext &Context) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = FileOrErr.getError()) {
@@ -94,7 +89,7 @@ Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
return nullptr;
}
- return ParseIR(FileOrErr.get().get(), Err, Context);
+ return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context);
}
//===----------------------------------------------------------------------===//
@@ -107,7 +102,8 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
SMDiagnostic Diag;
std::unique_ptr<MemoryBuffer> MB(unwrap(MemBuf));
- *OutM = wrap(ParseIR(MB.get(), Diag, *unwrap(ContextRef)));
+ *OutM =
+ wrap(parseIR(MB->getMemBufferRef(), Diag, *unwrap(ContextRef)).release());
if(!*OutM) {
if (OutMessage) {
diff --git a/lib/LTO/LLVMBuild.txt b/lib/LTO/LLVMBuild.txt
index 29ed92c..b9178e9 100644
--- a/lib/LTO/LLVMBuild.txt
+++ b/lib/LTO/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = LTO
parent = Libraries
-required_libraries = BitReader BitWriter Core IPA IPO InstCombine Linker MC MCParser ObjCARC Object Scalar Support Target TransformUtils
+required_libraries = BitReader BitWriter Core IPA IPO InstCombine Linker MC ObjCARC Object Scalar Support Target TransformUtils CodeGen
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 335197a..c663d43 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -48,6 +48,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/ObjCARC.h"
@@ -63,18 +64,30 @@ const char* LTOCodeGenerator::getVersionString() {
}
LTOCodeGenerator::LTOCodeGenerator()
- : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)),
- TargetMach(nullptr), EmitDwarfDebugInfo(false),
- ScopeRestrictionsDone(false), CodeModel(LTO_CODEGEN_PIC_MODEL_DEFAULT),
- NativeObjectFile(nullptr), DiagHandler(nullptr), DiagContext(nullptr) {
+ : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) {
+ initialize();
+}
+
+LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
+ : OwnedContext(std::move(Context)), Context(*OwnedContext),
+ IRLinker(new Module("ld-temp.o", *OwnedContext)) {
+ initialize();
+}
+
+void LTOCodeGenerator::initialize() {
+ TargetMach = nullptr;
+ EmitDwarfDebugInfo = false;
+ ScopeRestrictionsDone = false;
+ CodeModel = LTO_CODEGEN_PIC_MODEL_DEFAULT;
+ DiagHandler = nullptr;
+ DiagContext = nullptr;
+
initializeLTOPasses();
}
LTOCodeGenerator::~LTOCodeGenerator() {
delete TargetMach;
- delete NativeObjectFile;
TargetMach = nullptr;
- NativeObjectFile = nullptr;
IRLinker.deleteModule();
@@ -107,14 +120,18 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeFunctionAttrsPass(R);
initializeGlobalsModRefPass(R);
initializeLICMPass(R);
+ initializeMergedLoadStoreMotionPass(R);
initializeGVNPass(R);
initializeMemCpyOptPass(R);
initializeDCEPass(R);
initializeCFGSimplifyPassPass(R);
}
-bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
- bool ret = IRLinker.linkInModule(&mod->getModule(), &errMsg);
+bool LTOCodeGenerator::addModule(LTOModule *mod) {
+ assert(&mod->getModule().getContext() == &Context &&
+ "Expected module in same context");
+
+ bool ret = IRLinker.linkInModule(&mod->getModule());
const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
for (int i = 0, e = undefs.size(); i != e; ++i)
@@ -161,9 +178,9 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
applyScopeRestrictions();
// create output file
- std::string ErrInfo;
- tool_output_file Out(path, ErrInfo, sys::fs::F_None);
- if (!ErrInfo.empty()) {
+ std::error_code EC;
+ tool_output_file Out(path, EC, sys::fs::F_None);
+ if (EC) {
errMsg = "could not open bitcode file for writing: ";
errMsg += path;
return false;
@@ -188,6 +205,7 @@ bool LTOCodeGenerator::compile_to_file(const char** name,
bool disableOpt,
bool disableInline,
bool disableGVNLoadPRE,
+ bool disableVectorization,
std::string& errMsg) {
// make unique temp .o file to put generated object file
SmallString<128> Filename;
@@ -202,8 +220,9 @@ bool LTOCodeGenerator::compile_to_file(const char** name,
// generate object file
tool_output_file objFile(Filename.c_str(), FD);
- bool genResult = generateObjectFile(objFile.os(), disableOpt, disableInline,
- disableGVNLoadPRE, errMsg);
+ bool genResult =
+ generateObjectFile(objFile.os(), disableOpt, disableInline,
+ disableGVNLoadPRE, disableVectorization, errMsg);
objFile.os().close();
if (objFile.os().has_error()) {
objFile.os().clear_error();
@@ -226,15 +245,13 @@ const void* LTOCodeGenerator::compile(size_t* length,
bool disableOpt,
bool disableInline,
bool disableGVNLoadPRE,
+ bool disableVectorization,
std::string& errMsg) {
const char *name;
if (!compile_to_file(&name, disableOpt, disableInline, disableGVNLoadPRE,
- errMsg))
+ disableVectorization, errMsg))
return nullptr;
- // remove old buffer if compile() called twice
- delete NativeObjectFile;
-
// read .o file into memory buffer
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFile(name, -1, false);
@@ -243,7 +260,7 @@ const void* LTOCodeGenerator::compile(size_t* length,
sys::fs::remove(NativeObjectPath);
return nullptr;
}
- NativeObjectFile = BufferOrErr.get().release();
+ NativeObjectFile = std::move(*BufferOrErr);
// remove temp files
sys::fs::remove(NativeObjectPath);
@@ -298,8 +315,7 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
- else if (Triple.getArch() == llvm::Triple::arm64 ||
- Triple.getArch() == llvm::Triple::aarch64)
+ else if (Triple.getArch() == llvm::Triple::aarch64)
MCpu = "cyclone";
}
@@ -311,9 +327,9 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
void LTOCodeGenerator::
applyRestriction(GlobalValue &GV,
- const ArrayRef<StringRef> &Libcalls,
+ ArrayRef<StringRef> Libcalls,
std::vector<const char*> &MustPreserveList,
- SmallPtrSet<GlobalValue*, 8> &AsmUsed,
+ SmallPtrSetImpl<GlobalValue*> &AsmUsed,
Mangler &Mangler) {
// There are no restrictions to apply to declarations.
if (GV.isDeclaration())
@@ -342,7 +358,7 @@ applyRestriction(GlobalValue &GV,
}
static void findUsedValues(GlobalVariable *LLVMUsed,
- SmallPtrSet<GlobalValue*, 8> &UsedValues) {
+ SmallPtrSetImpl<GlobalValue*> &UsedValues) {
if (!LLVMUsed) return;
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
@@ -390,12 +406,13 @@ void LTOCodeGenerator::applyScopeRestrictions() {
passes.add(createDebugInfoVerifierPass());
// mark which symbols can not be internalized
- Mangler Mangler(TargetMach->getDataLayout());
+ Mangler Mangler(TargetMach->getSubtargetImpl()->getDataLayout());
std::vector<const char*> MustPreserveList;
SmallPtrSet<GlobalValue*, 8> AsmUsed;
std::vector<StringRef> Libcalls;
TargetLibraryInfo TLI(Triple(TargetMach->getTargetTriple()));
- accumulateAndSortLibcalls(Libcalls, TLI, TargetMach->getTargetLowering());
+ accumulateAndSortLibcalls(
+ Libcalls, TLI, TargetMach->getSubtargetImpl()->getTargetLowering());
for (Module::iterator f = mergedModule->begin(),
e = mergedModule->end(); f != e; ++f)
@@ -444,6 +461,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
bool DisableOpt,
bool DisableInline,
bool DisableGVNLoadPRE,
+ bool DisableVectorization,
std::string &errMsg) {
if (!this->determineTarget(errMsg))
return false;
@@ -456,35 +474,27 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
// Instantiate the pass manager to organize the passes.
PassManager passes;
- // Start off with a verification pass.
- passes.add(createVerifierPass());
- passes.add(createDebugInfoVerifierPass());
-
// Add an appropriate DataLayout instance for this module...
- mergedModule->setDataLayout(TargetMach->getDataLayout());
- passes.add(new DataLayoutPass(mergedModule));
-
- // Add appropriate TargetLibraryInfo for this module.
- passes.add(new TargetLibraryInfo(Triple(TargetMach->getTargetTriple())));
-
- TargetMach->addAnalysisPasses(passes);
-
- // Enabling internalize here would use its AllButMain variant. It
- // keeps only main if it exists and does nothing for libraries. Instead
- // we create the pass ourselves with the symbol list provided by the linker.
- if (!DisableOpt)
- PassManagerBuilder().populateLTOPassManager(passes,
- /*Internalize=*/false,
- !DisableInline,
- DisableGVNLoadPRE);
-
- // Make sure everything is still good.
- passes.add(createVerifierPass());
- passes.add(createDebugInfoVerifierPass());
+ mergedModule->setDataLayout(TargetMach->getSubtargetImpl()->getDataLayout());
+
+ Triple TargetTriple(TargetMach->getTargetTriple());
+ PassManagerBuilder PMB;
+ PMB.DisableGVNLoadPRE = DisableGVNLoadPRE;
+ PMB.LoopVectorize = !DisableVectorization;
+ PMB.SLPVectorize = !DisableVectorization;
+ if (!DisableInline)
+ PMB.Inliner = createFunctionInliningPass();
+ PMB.LibraryInfo = new TargetLibraryInfo(TargetTriple);
+ if (DisableOpt)
+ PMB.OptLevel = 0;
+ PMB.VerifyInput = true;
+ PMB.VerifyOutput = true;
+
+ PMB.populateLTOPassManager(passes, TargetMach);
PassManager codeGenPasses;
- codeGenPasses.add(new DataLayoutPass(mergedModule));
+ codeGenPasses.add(new DataLayoutPass());
formatted_raw_ostream Out(out);
@@ -571,5 +581,6 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler,
return Context.setDiagnosticHandler(nullptr, nullptr);
// Register the LTOCodeGenerator stub in the LLVMContext to forward the
// diagnostic to the external DiagHandler.
- Context.setDiagnosticHandler(LTOCodeGenerator::DiagnosticHandler, this);
+ Context.setDiagnosticHandler(LTOCodeGenerator::DiagnosticHandler, this,
+ /* RespectFilters */ true);
}
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 844c0f2..4108ef2 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -15,6 +15,7 @@
#include "llvm/LTO/LTOModule.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -28,6 +29,8 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -39,32 +42,51 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include <system_error>
using namespace llvm;
+using namespace llvm::object;
LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
llvm::TargetMachine *TM)
: IRFile(std::move(Obj)), _target(TM) {}
+LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
+ llvm::TargetMachine *TM,
+ std::unique_ptr<LLVMContext> Context)
+ : OwnedContext(std::move(Context)), IRFile(std::move(Obj)), _target(TM) {}
+
+LTOModule::~LTOModule() {}
+
/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
/// bitcode.
-bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
- return sys::fs::identify_magic(StringRef((const char *)mem, length)) ==
- sys::fs::file_magic::bitcode;
+bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
+ ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
+ MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>"));
+ return bool(BCData);
}
-bool LTOModule::isBitcodeFile(const char *path) {
- sys::fs::file_magic type;
- if (sys::fs::identify_magic(path, type))
+bool LTOModule::isBitcodeFile(const char *Path) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(Path);
+ if (!BufferOrErr)
return false;
- return type == sys::fs::file_magic::bitcode;
+
+ ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
+ BufferOrErr.get()->getMemBufferRef());
+ return bool(BCData);
}
-bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer,
- StringRef triplePrefix) {
- std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
- return StringRef(Triple).startswith(triplePrefix);
+bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
+ StringRef TriplePrefix) {
+ ErrorOr<MemoryBufferRef> BCOrErr =
+ IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
+ if (!BCOrErr)
+ return false;
+ LLVMContext Context;
+ std::string Triple = getBitcodeTargetTriple(*BCOrErr, Context);
+ return StringRef(Triple).startswith(TriplePrefix);
}
LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
@@ -75,7 +97,9 @@ LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
errMsg = EC.message();
return nullptr;
}
- return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
+ std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
+ return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
+ &getGlobalContext());
}
LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
@@ -94,23 +118,50 @@ LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
errMsg = EC.message();
return nullptr;
}
- return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
+ std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
+ return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
+ &getGlobalContext());
}
LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
TargetOptions options,
std::string &errMsg, StringRef path) {
- std::unique_ptr<MemoryBuffer> buffer(makeBuffer(mem, length, path));
- if (!buffer)
- return nullptr;
- return makeLTOModule(std::move(buffer), options, errMsg);
+ return createInContext(mem, length, options, errMsg, path,
+ &getGlobalContext());
+}
+
+LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length,
+ TargetOptions options,
+ std::string &errMsg,
+ StringRef path) {
+ return createInContext(mem, length, options, errMsg, path, nullptr);
}
-LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer,
- TargetOptions options,
- std::string &errMsg) {
- ErrorOr<Module *> MOrErr =
- getLazyBitcodeModule(Buffer.get(), getGlobalContext());
+LTOModule *LTOModule::createInContext(const void *mem, size_t length,
+ TargetOptions options,
+ std::string &errMsg, StringRef path,
+ LLVMContext *Context) {
+ StringRef Data((const char *)mem, length);
+ MemoryBufferRef Buffer(Data, path);
+ return makeLTOModule(Buffer, options, errMsg, Context);
+}
+
+LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
+ TargetOptions options, std::string &errMsg,
+ LLVMContext *Context) {
+ std::unique_ptr<LLVMContext> OwnedContext;
+ if (!Context) {
+ OwnedContext = llvm::make_unique<LLVMContext>();
+ Context = OwnedContext.get();
+ }
+
+ ErrorOr<MemoryBufferRef> MBOrErr =
+ IRObjectFile::findBitcodeInMemBuffer(Buffer);
+ if (std::error_code EC = MBOrErr.getError()) {
+ errMsg = EC.message();
+ return nullptr;
+ }
+ ErrorOr<Module *> MOrErr = parseBitcodeFile(*MBOrErr, *Context);
if (std::error_code EC = MOrErr.getError()) {
errMsg = EC.message();
return nullptr;
@@ -138,20 +189,22 @@ LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
- else if (Triple.getArch() == llvm::Triple::arm64 ||
- Triple.getArch() == llvm::Triple::aarch64)
+ else if (Triple.getArch() == llvm::Triple::aarch64)
CPU = "cyclone";
}
TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
options);
- M->materializeAllPermanently(true);
- M->setDataLayout(target->getDataLayout());
+ M->setDataLayout(target->getSubtargetImpl()->getDataLayout());
std::unique_ptr<object::IRObjectFile> IRObj(
- new object::IRObjectFile(std::move(Buffer), std::move(M)));
+ new object::IRObjectFile(Buffer, std::move(M)));
- LTOModule *Ret = new LTOModule(std::move(IRObj), target);
+ LTOModule *Ret;
+ if (OwnedContext)
+ Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext));
+ else
+ Ret = new LTOModule(std::move(IRObj), target);
if (Ret->parseSymbols(errMsg)) {
delete Ret;
@@ -164,8 +217,8 @@ LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer,
}
/// Create a MemoryBuffer from a memory range with an optional name.
-MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length,
- StringRef name) {
+std::unique_ptr<MemoryBuffer>
+LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) {
const char *startPtr = (const char*)mem;
return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
}
@@ -196,27 +249,24 @@ void LTOModule::addObjCClass(const GlobalVariable *clgv) {
// second slot in __OBJC,__class is pointer to superclass name
std::string superclassName;
if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
- NameAndAttributes info;
- StringMap<NameAndAttributes>::value_type &entry =
- _undefines.GetOrCreateValue(superclassName);
- if (!entry.getValue().name) {
- const char *symbolName = entry.getKey().data();
- info.name = symbolName;
+ auto IterBool =
+ _undefines.insert(std::make_pair(superclassName, NameAndAttributes()));
+ if (IterBool.second) {
+ NameAndAttributes &info = IterBool.first->second;
+ info.name = IterBool.first->first().data();
info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
info.isFunction = false;
info.symbol = clgv;
- entry.setValue(info);
}
}
// third slot in __OBJC,__class is pointer to class name
std::string className;
if (objcClassNameFromExpression(c->getOperand(2), className)) {
- StringSet::value_type &entry = _defines.GetOrCreateValue(className);
- entry.setValue(1);
+ auto Iter = _defines.insert(className).first;
NameAndAttributes info;
- info.name = entry.getKey().data();
+ info.name = Iter->first().data();
info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
info.isFunction = false;
@@ -235,19 +285,17 @@ void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
return;
- NameAndAttributes info;
- StringMap<NameAndAttributes>::value_type &entry =
- _undefines.GetOrCreateValue(targetclassName);
+ auto IterBool =
+ _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
- if (entry.getValue().name)
+ if (!IterBool.second)
return;
- const char *symbolName = entry.getKey().data();
- info.name = symbolName;
+ NameAndAttributes &info = IterBool.first->second;
+ info.name = IterBool.first->first().data();
info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
info.isFunction = false;
info.symbol = clgv;
- entry.setValue(info);
}
/// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
@@ -256,18 +304,17 @@ void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
return;
- NameAndAttributes info;
- StringMap<NameAndAttributes>::value_type &entry =
- _undefines.GetOrCreateValue(targetclassName);
- if (entry.getValue().name)
+ auto IterBool =
+ _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
+
+ if (!IterBool.second)
return;
- const char *symbolName = entry.getKey().data();
- info.name = symbolName;
+ NameAndAttributes &info = IterBool.first->second;
+ info.name = IterBool.first->first().data();
info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
info.isFunction = false;
info.symbol = clgv;
- entry.setValue(info);
}
void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
@@ -348,30 +395,6 @@ void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
addDefinedSymbol(Name, F, true);
}
-static bool canBeHidden(const GlobalValue *GV) {
- // FIXME: this is duplicated with another static function in AsmPrinter.cpp
- GlobalValue::LinkageTypes L = GV->getLinkage();
-
- if (L != GlobalValue::LinkOnceODRLinkage)
- return false;
-
- if (GV->hasUnnamedAddr())
- return true;
-
- // If it is a non constant variable, it needs to be uniqued across shared
- // objects.
- if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
- if (!Var->isConstant())
- return false;
- }
-
- GlobalStatus GS;
- if (GlobalStatus::analyzeGlobal(GV, GS))
- return false;
-
- return !GS.IsCompared;
-}
-
void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
bool isFunction) {
// set alignment part log2() can have rounding errors
@@ -405,17 +428,16 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
attr |= LTO_SYMBOL_SCOPE_HIDDEN;
else if (def->hasProtectedVisibility())
attr |= LTO_SYMBOL_SCOPE_PROTECTED;
- else if (canBeHidden(def))
+ else if (canBeOmittedFromSymbolTable(def))
attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
else
attr |= LTO_SYMBOL_SCOPE_DEFAULT;
- StringSet::value_type &entry = _defines.GetOrCreateValue(Name);
- entry.setValue(1);
+ auto Iter = _defines.insert(Name).first;
// fill information structure
NameAndAttributes info;
- StringRef NameRef = entry.getKey();
+ StringRef NameRef = Iter->first();
info.name = NameRef.data();
assert(info.name[NameRef.size()] == '\0');
info.attributes = attr;
@@ -430,15 +452,13 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
/// defined list.
void LTOModule::addAsmGlobalSymbol(const char *name,
lto_symbol_attributes scope) {
- StringSet::value_type &entry = _defines.GetOrCreateValue(name);
+ auto IterBool = _defines.insert(name);
// only add new define if not already defined
- if (entry.getValue())
+ if (!IterBool.second)
return;
- entry.setValue(1);
-
- NameAndAttributes &info = _undefines[entry.getKey().data()];
+ NameAndAttributes &info = _undefines[IterBool.first->first().data()];
if (info.symbol == nullptr) {
// FIXME: This is trying to take care of module ASM like this:
@@ -450,7 +470,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
// much.
// fill information structure
- info.name = entry.getKey().data();
+ info.name = IterBool.first->first().data();
info.attributes =
LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
info.isFunction = false;
@@ -473,24 +493,21 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
/// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
/// undefined list.
void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
- StringMap<NameAndAttributes>::value_type &entry =
- _undefines.GetOrCreateValue(name);
+ auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
- _asm_undefines.push_back(entry.getKey().data());
+ _asm_undefines.push_back(IterBool.first->first().data());
// we already have the symbol
- if (entry.getValue().name)
+ if (!IterBool.second)
return;
uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
attr |= LTO_SYMBOL_SCOPE_DEFAULT;
- NameAndAttributes info;
- info.name = entry.getKey().data();
+ NameAndAttributes &info = IterBool.first->second;
+ info.name = IterBool.first->first().data();
info.attributes = attr;
info.isFunction = false;
info.symbol = nullptr;
-
- entry.setValue(info);
}
/// Add a symbol which isn't defined just yet to a list to be resolved later.
@@ -502,16 +519,15 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
Sym.printName(OS);
}
- StringMap<NameAndAttributes>::value_type &entry =
- _undefines.GetOrCreateValue(name);
+ auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
// we already have the symbol
- if (entry.getValue().name)
+ if (!IterBool.second)
return;
- NameAndAttributes info;
+ NameAndAttributes &info = IterBool.first->second;
- info.name = entry.getKey().data();
+ info.name = IterBool.first->first().data();
const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
@@ -522,8 +538,6 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
info.isFunction = isFunc;
info.symbol = decl;
-
- entry.setValue(info);
}
/// parseSymbols - Parse the symbols from the module and model-level ASM and add
@@ -596,10 +610,15 @@ void LTOModule::parseMetadata() {
MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
- StringRef Op = _linkeropt_strings.
- GetOrCreateValue(MDOption->getString()).getKey();
- StringRef DepLibName = _target->getTargetLowering()->
- getObjFileLowering().getDepLibFromLinkerOpt(Op);
+ // FIXME: Make StringSet::insert match Self-Associative Container
+ // requirements, returning <iter,bool> rather than bool, and use that
+ // here.
+ StringRef Op =
+ _linkeropt_strings.insert(MDOption->getString()).first->first();
+ StringRef DepLibName = _target->getSubtargetImpl()
+ ->getTargetLowering()
+ ->getObjFileLowering()
+ .getDepLibFromLinkerOpt(Op);
if (!DepLibName.empty())
_deplibs.push_back(DepLibName.data());
else if (!Op.empty())
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 5bb2862..8321bcf 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -17,6 +17,9 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/Support/CommandLine.h"
@@ -33,61 +36,58 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
namespace {
- typedef SmallPtrSet<StructType*, 32> TypeSet;
+typedef SmallPtrSet<StructType *, 32> TypeSet;
class TypeMapTy : public ValueMapTypeRemapper {
- /// MappedTypes - This is a mapping from a source type to a destination type
- /// to use.
+ /// This is a mapping from a source type to a destination type to use.
DenseMap<Type*, Type*> MappedTypes;
- /// SpeculativeTypes - When checking to see if two subgraphs are isomorphic,
- /// we speculatively add types to MappedTypes, but keep track of them here in
- /// case we need to roll back.
+ /// When checking to see if two subgraphs are isomorphic, we speculatively
+ /// add types to MappedTypes, but keep track of them here in case we need to
+ /// roll back.
SmallVector<Type*, 16> SpeculativeTypes;
- /// SrcDefinitionsToResolve - This is a list of non-opaque structs in the
- /// source module that are mapped to an opaque struct in the destination
- /// module.
+ /// This is a list of non-opaque structs in the source module that are mapped
+ /// to an opaque struct in the destination module.
SmallVector<StructType*, 16> SrcDefinitionsToResolve;
- /// DstResolvedOpaqueTypes - This is the set of opaque types in the
- /// destination modules who are getting a body from the source module.
+ /// This is the set of opaque types in the destination modules who are
+ /// getting a body from the source module.
SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
public:
TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {}
TypeSet &DstStructTypesSet;
- /// addTypeMapping - Indicate that the specified type in the destination
- /// module is conceptually equivalent to the specified type in the source
- /// module.
+ /// Indicate that the specified type in the destination module is conceptually
+ /// equivalent to the specified type in the source module.
void addTypeMapping(Type *DstTy, Type *SrcTy);
/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
/// module from a type definition in the source module.
void linkDefinedTypeBodies();
- /// get - Return the mapped type to use for the specified input type from the
+ /// Return the mapped type to use for the specified input type from the
/// source module.
Type *get(Type *SrcTy);
FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
- /// dump - Dump out the type map for debugging purposes.
+ /// Dump out the type map for debugging purposes.
void dump() const {
for (DenseMap<Type*, Type*>::const_iterator
I = MappedTypes.begin(), E = MappedTypes.end(); I != E; ++I) {
dbgs() << "TypeMap: ";
- I->first->dump();
+ I->first->print(dbgs());
dbgs() << " => ";
- I->second->dump();
+ I->second->print(dbgs());
dbgs() << '\n';
}
}
private:
Type *getImpl(Type *T);
- /// remapType - Implement the ValueMapTypeRemapper interface.
+ /// Implement the ValueMapTypeRemapper interface.
Type *remapType(Type *SrcTy) override {
return get(SrcTy);
}
@@ -116,8 +116,8 @@ void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
SpeculativeTypes.clear();
}
-/// areTypesIsomorphic - Recursively walk this pair of types, returning true
-/// if they are isomorphic, false if they are not.
+/// Recursively walk this pair of types, returning true if they are isomorphic,
+/// false if they are not.
bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
// Two types with differing kinds are clearly not isomorphic.
if (DstTy->getTypeID() != SrcTy->getTypeID()) return false;
@@ -152,7 +152,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
// same opaque type then we fail.
if (cast<StructType>(DstTy)->isOpaque()) {
// We can only map one source type onto the opaque destination type.
- if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)))
+ if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)).second)
return false;
SrcDefinitionsToResolve.push_back(SSTy);
Entry = DstTy;
@@ -201,8 +201,8 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
return true;
}
-/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
-/// module from a type definition in the source module.
+/// Produce a body for an opaque type in the dest module from a type definition
+/// in the source module.
void TypeMapTy::linkDefinedTypeBodies() {
SmallVector<Type*, 16> Elements;
SmallString<16> TmpName;
@@ -242,8 +242,6 @@ void TypeMapTy::linkDefinedTypeBodies() {
DstResolvedOpaqueTypes.clear();
}
-/// get - Return the mapped type to use for the specified input type from the
-/// source module.
Type *TypeMapTy::get(Type *Ty) {
Type *Result = getImpl(Ty);
@@ -253,7 +251,7 @@ Type *TypeMapTy::get(Type *Ty) {
return Result;
}
-/// getImpl - This is the recursive version of get().
+/// This is the recursive version of get().
Type *TypeMapTy::getImpl(Type *Ty) {
// If we already have an entry for this type, return it.
Type **Entry = &MappedTypes[Ty];
@@ -359,9 +357,9 @@ Type *TypeMapTy::getImpl(Type *Ty) {
namespace {
class ModuleLinker;
- /// ValueMaterializerTy - Creates prototypes for functions that are lazily
- /// linked on the fly. This speeds up linking for modules with many
- /// lazily linked functions of which few get used.
+ /// Creates prototypes for functions that are lazily linked on the fly. This
+ /// speeds up linking for modules with many/ lazily linked functions of which
+ /// few get used.
class ValueMaterializerTy : public ValueMaterializer {
TypeMapTy &TypeMap;
Module *DstM;
@@ -376,57 +374,73 @@ namespace {
Value *materializeValueFor(Value *V) override;
};
- /// ModuleLinker - This is an implementation class for the LinkModules
- /// function, which is the entrypoint for this file.
+ namespace {
+ class LinkDiagnosticInfo : public DiagnosticInfo {
+ const Twine &Msg;
+
+ public:
+ LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg);
+ void print(DiagnosticPrinter &DP) const override;
+ };
+ LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity,
+ const Twine &Msg)
+ : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {}
+ void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; }
+ }
+
+ /// This is an implementation class for the LinkModules function, which is the
+ /// entrypoint for this file.
class ModuleLinker {
Module *DstM, *SrcM;
TypeMapTy TypeMap;
ValueMaterializerTy ValMaterializer;
- /// ValueMap - Mapping of values from what they used to be in Src, to what
- /// they are now in DstM. ValueToValueMapTy is a ValueMap, which involves
- /// some overhead due to the use of Value handles which the Linker doesn't
- /// actually need, but this allows us to reuse the ValueMapper code.
+ /// Mapping of values from what they used to be in Src, to what they are now
+ /// in DstM. ValueToValueMapTy is a ValueMap, which involves some overhead
+ /// due to the use of Value handles which the Linker doesn't actually need,
+ /// but this allows us to reuse the ValueMapper code.
ValueToValueMapTy ValueMap;
struct AppendingVarInfo {
- GlobalVariable *NewGV; // New aggregate global in dest module.
- Constant *DstInit; // Old initializer from dest module.
- Constant *SrcInit; // Old initializer from src module.
+ GlobalVariable *NewGV; // New aggregate global in dest module.
+ const Constant *DstInit; // Old initializer from dest module.
+ const Constant *SrcInit; // Old initializer from src module.
};
std::vector<AppendingVarInfo> AppendingVars;
- unsigned Mode; // Mode to treat source module.
-
// Set of items not to link in from source.
SmallPtrSet<const Value*, 16> DoNotLinkFromSource;
// Vector of functions to lazily link in.
std::vector<Function*> LazilyLinkFunctions;
- bool SuppressWarnings;
+ Linker::DiagnosticHandlerFunction DiagnosticHandler;
public:
- std::string ErrorMsg;
-
- ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, unsigned mode,
- bool SuppressWarnings=false)
+ ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM,
+ Linker::DiagnosticHandlerFunction DiagnosticHandler)
: DstM(dstM), SrcM(srcM), TypeMap(Set),
- ValMaterializer(TypeMap, DstM, LazilyLinkFunctions), Mode(mode),
- SuppressWarnings(SuppressWarnings) {}
+ ValMaterializer(TypeMap, DstM, LazilyLinkFunctions),
+ DiagnosticHandler(DiagnosticHandler) {}
bool run();
private:
- /// emitError - Helper method for setting a message and returning an error
- /// code.
+ bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
+ const GlobalValue &Src);
+
+ /// Helper method for setting a message and returning an error code.
bool emitError(const Twine &Message) {
- ErrorMsg = Message.str();
+ DiagnosticHandler(LinkDiagnosticInfo(DS_Error, Message));
return true;
}
+ void emitWarning(const Twine &Message) {
+ DiagnosticHandler(LinkDiagnosticInfo(DS_Warning, Message));
+ }
+
bool getComdatLeader(Module *M, StringRef ComdatName,
const GlobalVariable *&GVar);
bool computeResultingSelectionKind(StringRef ComdatName,
@@ -439,16 +453,9 @@ namespace {
bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
bool &LinkFromSrc);
- /// getLinkageResult - This analyzes the two global values and determines
- /// what the result will look like in the destination module.
- bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
- GlobalValue::LinkageTypes &LT,
- GlobalValue::VisibilityTypes &Vis,
- bool &LinkFromSrc);
-
- /// getLinkedToGlobal - Given a global in the source module, return the
- /// global in the destination module that is being linked to, if any.
- GlobalValue *getLinkedToGlobal(GlobalValue *SrcGV) {
+ /// Given a global in the source module, return the global in the
+ /// destination module that is being linked to, if any.
+ GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
@@ -469,10 +476,20 @@ namespace {
void computeTypeMapping();
- bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
- bool linkGlobalProto(GlobalVariable *SrcGV);
- bool linkFunctionProto(Function *SrcF);
- bool linkAliasProto(GlobalAlias *SrcA);
+ void upgradeMismatchedGlobalArray(StringRef Name);
+ void upgradeMismatchedGlobals();
+
+ bool linkAppendingVarProto(GlobalVariable *DstGV,
+ const GlobalVariable *SrcGV);
+
+ bool linkGlobalValueProto(GlobalValue *GV);
+ GlobalValue *linkGlobalVariableProto(const GlobalVariable *SGVar,
+ GlobalValue *DGV, bool LinkFromSrc);
+ GlobalValue *linkFunctionProto(const Function *SF, GlobalValue *DGV,
+ bool LinkFromSrc);
+ GlobalValue *linkGlobalAliasProto(const GlobalAlias *SGA, GlobalValue *DGV,
+ bool LinkFromSrc);
+
bool linkModuleFlagsMetadata();
void linkAppendingVarInit(const AppendingVarInfo &AVI);
@@ -483,9 +500,9 @@ namespace {
};
}
-/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
-/// in the symbol table. This is good for all clients except for us. Go
-/// through the trouble to force this back.
+/// The LLVM SymbolTable class autorenames globals that conflict in the symbol
+/// table. This is good for all clients except for us. Go through the trouble
+/// to force this back.
static void forceRenaming(GlobalValue *GV, StringRef Name) {
// If the global doesn't force its name or if it already has the right name,
// there is nothing for us to do.
@@ -504,8 +521,8 @@ static void forceRenaming(GlobalValue *GV, StringRef Name) {
}
}
-/// copyGVAttributes - copy additional attributes (those not needed to construct
-/// a GlobalValue) from the SrcGV to the DestGV.
+/// copy additional attributes (those not needed to construct a GlobalValue)
+/// from the SrcGV to the DestGV.
static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
// Use the maximum alignment, rather than just copying the alignment of SrcGV.
auto *DestGO = dyn_cast<GlobalObject>(DestGV);
@@ -543,6 +560,11 @@ Value *ValueMaterializerTy::materializeValueFor(Value *V) {
SF->getLinkage(), SF->getName(), DstM);
copyGVAttributes(DF, SF);
+ if (Comdat *SC = SF->getComdat()) {
+ Comdat *DC = DstM->getOrInsertComdat(SC->getName());
+ DF->setComdat(DC);
+ }
+
LazilyLinkFunctions.push_back(SF);
return DF;
}
@@ -644,99 +666,106 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
bool ModuleLinker::getComdatResult(const Comdat *SrcC,
Comdat::SelectionKind &Result,
bool &LinkFromSrc) {
+ Comdat::SelectionKind SSK = SrcC->getSelectionKind();
StringRef ComdatName = SrcC->getName();
Module::ComdatSymTabType &ComdatSymTab = DstM->getComdatSymbolTable();
Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
- if (DstCI != ComdatSymTab.end()) {
- const Comdat *DstC = &DstCI->second;
- Comdat::SelectionKind SSK = SrcC->getSelectionKind();
- Comdat::SelectionKind DSK = DstC->getSelectionKind();
- if (computeResultingSelectionKind(ComdatName, SSK, DSK, Result, LinkFromSrc))
- return true;
+
+ if (DstCI == ComdatSymTab.end()) {
+ // Use the comdat if it is only available in one of the modules.
+ LinkFromSrc = true;
+ Result = SSK;
+ return false;
}
- return false;
+
+ const Comdat *DstC = &DstCI->second;
+ Comdat::SelectionKind DSK = DstC->getSelectionKind();
+ return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
+ LinkFromSrc);
}
-/// getLinkageResult - This analyzes the two global values and determines what
-/// the result will look like in the destination module. In particular, it
-/// computes the resultant linkage type and visibility, computes whether the
-/// global in the source should be copied over to the destination (replacing
-/// the existing one), and computes whether this linkage is an error or not.
-bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
- GlobalValue::LinkageTypes &LT,
- GlobalValue::VisibilityTypes &Vis,
- bool &LinkFromSrc) {
- assert(Dest && "Must have two globals being queried");
- assert(!Src->hasLocalLinkage() &&
- "If Src has internal linkage, Dest shouldn't be set!");
-
- bool SrcIsDeclaration = Src->isDeclaration() && !Src->isMaterializable();
- bool DestIsDeclaration = Dest->isDeclaration();
+bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
+ const GlobalValue &Dest,
+ const GlobalValue &Src) {
+ // We always have to add Src if it has appending linkage.
+ if (Src.hasAppendingLinkage()) {
+ LinkFromSrc = true;
+ return false;
+ }
+
+ bool SrcIsDeclaration = Src.isDeclarationForLinker();
+ bool DestIsDeclaration = Dest.isDeclarationForLinker();
if (SrcIsDeclaration) {
// If Src is external or if both Src & Dest are external.. Just link the
// external globals, we aren't adding anything.
- if (Src->hasDLLImportStorageClass()) {
+ if (Src.hasDLLImportStorageClass()) {
// If one of GVs is marked as DLLImport, result should be dllimport'ed.
- if (DestIsDeclaration) {
- LinkFromSrc = true;
- LT = Src->getLinkage();
- }
- } else if (Dest->hasExternalWeakLinkage()) {
- // If the Dest is weak, use the source linkage.
- LinkFromSrc = true;
- LT = Src->getLinkage();
- } else {
- LinkFromSrc = false;
- LT = Dest->getLinkage();
+ LinkFromSrc = DestIsDeclaration;
+ return false;
}
- } else if (DestIsDeclaration && !Dest->hasDLLImportStorageClass()) {
+ // If the Dest is weak, use the source linkage.
+ LinkFromSrc = Dest.hasExternalWeakLinkage();
+ return false;
+ }
+
+ if (DestIsDeclaration) {
// If Dest is external but Src is not:
LinkFromSrc = true;
- LT = Src->getLinkage();
- } else if (Src->isWeakForLinker()) {
- // At this point we know that Dest has LinkOnce, External*, Weak, Common,
- // or DLL* linkage.
- if (Dest->hasExternalWeakLinkage() ||
- Dest->hasAvailableExternallyLinkage() ||
- (Dest->hasLinkOnceLinkage() &&
- (Src->hasWeakLinkage() || Src->hasCommonLinkage()))) {
+ return false;
+ }
+
+ if (Src.hasCommonLinkage()) {
+ if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) {
LinkFromSrc = true;
- LT = Src->getLinkage();
- } else {
- LinkFromSrc = false;
- LT = Dest->getLinkage();
+ return false;
}
- } else if (Dest->isWeakForLinker()) {
- // At this point we know that Src has External* or DLL* linkage.
- if (Src->hasExternalWeakLinkage()) {
+
+ if (!Dest.hasCommonLinkage()) {
LinkFromSrc = false;
- LT = Dest->getLinkage();
- } else {
+ return false;
+ }
+
+ // FIXME: Make datalayout mandatory and just use getDataLayout().
+ DataLayout DL(Dest.getParent());
+
+ uint64_t DestSize = DL.getTypeAllocSize(Dest.getType()->getElementType());
+ uint64_t SrcSize = DL.getTypeAllocSize(Src.getType()->getElementType());
+ LinkFromSrc = SrcSize > DestSize;
+ return false;
+ }
+
+ if (Src.isWeakForLinker()) {
+ assert(!Dest.hasExternalWeakLinkage());
+ assert(!Dest.hasAvailableExternallyLinkage());
+
+ if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
LinkFromSrc = true;
- LT = GlobalValue::ExternalLinkage;
+ return false;
}
- } else {
- assert((Dest->hasExternalLinkage() || Dest->hasExternalWeakLinkage()) &&
- (Src->hasExternalLinkage() || Src->hasExternalWeakLinkage()) &&
- "Unexpected linkage type!");
- return emitError("Linking globals named '" + Src->getName() +
- "': symbol multiply defined!");
+
+ LinkFromSrc = false;
+ return false;
}
- // Compute the visibility. We follow the rules in the System V Application
- // Binary Interface.
- assert(!GlobalValue::isLocalLinkage(LT) &&
- "Symbols with local linkage should not be merged");
- Vis = isLessConstraining(Src->getVisibility(), Dest->getVisibility()) ?
- Dest->getVisibility() : Src->getVisibility();
- return false;
+ if (Dest.isWeakForLinker()) {
+ assert(Src.hasExternalLinkage());
+ LinkFromSrc = true;
+ return false;
+ }
+
+ assert(!Src.hasExternalWeakLinkage());
+ assert(!Dest.hasExternalWeakLinkage());
+ assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
+ "Unexpected linkage type!");
+ return emitError("Linking globals named '" + Src.getName() +
+ "': symbol multiply defined!");
}
-/// computeTypeMapping - Loop over all of the linked values to compute type
-/// mappings. For example, if we link "extern Foo *x" and "Foo *x = NULL", then
-/// we have two struct types 'Foo' but one got renamed when the module was
-/// loaded into the same LLVMContext.
+/// Loop over all of the linked values to compute type mappings. For example,
+/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct
+/// types 'Foo' but one got renamed when the module was loaded into the same
+/// LLVMContext.
void ModuleLinker::computeTypeMapping() {
// Incorporate globals.
for (Module::global_iterator I = SrcM->global_begin(),
@@ -811,10 +840,87 @@ void ModuleLinker::computeTypeMapping() {
TypeMap.linkDefinedTypeBodies();
}
-/// linkAppendingVarProto - If there were any appending global variables, link
-/// them together now. Return true on error.
+static void upgradeGlobalArray(GlobalVariable *GV) {
+ ArrayType *ATy = cast<ArrayType>(GV->getType()->getElementType());
+ StructType *OldTy = cast<StructType>(ATy->getElementType());
+ assert(OldTy->getNumElements() == 2 && "Expected to upgrade from 2 elements");
+
+ // Get the upgraded 3 element type.
+ PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo();
+ Type *Tys[3] = {OldTy->getElementType(0), OldTy->getElementType(1),
+ VoidPtrTy};
+ StructType *NewTy = StructType::get(GV->getContext(), Tys, false);
+
+ // Build new constants with a null third field filled in.
+ Constant *OldInitC = GV->getInitializer();
+ ConstantArray *OldInit = dyn_cast<ConstantArray>(OldInitC);
+ if (!OldInit && !isa<ConstantAggregateZero>(OldInitC))
+ // Invalid initializer; give up.
+ return;
+ std::vector<Constant *> Initializers;
+ if (OldInit && OldInit->getNumOperands()) {
+ Value *Null = Constant::getNullValue(VoidPtrTy);
+ for (Use &U : OldInit->operands()) {
+ ConstantStruct *Init = cast<ConstantStruct>(U.get());
+ Initializers.push_back(ConstantStruct::get(
+ NewTy, Init->getOperand(0), Init->getOperand(1), Null, nullptr));
+ }
+ }
+ assert(Initializers.size() == ATy->getNumElements() &&
+ "Failed to copy all array elements");
+
+ // Replace the old GV with a new one.
+ ATy = ArrayType::get(NewTy, Initializers.size());
+ Constant *NewInit = ConstantArray::get(ATy, Initializers);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "",
+ GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(),
+ GV->isExternallyInitialized());
+ NewGV->copyAttributesFrom(GV);
+ NewGV->takeName(GV);
+ assert(GV->use_empty() && "program cannot use initializer list");
+ GV->eraseFromParent();
+}
+
+void ModuleLinker::upgradeMismatchedGlobalArray(StringRef Name) {
+ // Look for the global arrays.
+ auto *DstGV = dyn_cast_or_null<GlobalVariable>(DstM->getNamedValue(Name));
+ if (!DstGV)
+ return;
+ auto *SrcGV = dyn_cast_or_null<GlobalVariable>(SrcM->getNamedValue(Name));
+ if (!SrcGV)
+ return;
+
+ // Check if the types already match.
+ auto *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+ auto *SrcTy =
+ cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
+ if (DstTy == SrcTy)
+ return;
+
+ // Grab the element types. We can only upgrade an array of a two-field
+ // struct. Only bother if the other one has three-fields.
+ auto *DstEltTy = cast<StructType>(DstTy->getElementType());
+ auto *SrcEltTy = cast<StructType>(SrcTy->getElementType());
+ if (DstEltTy->getNumElements() == 2 && SrcEltTy->getNumElements() == 3) {
+ upgradeGlobalArray(DstGV);
+ return;
+ }
+ if (DstEltTy->getNumElements() == 3 && SrcEltTy->getNumElements() == 2)
+ upgradeGlobalArray(SrcGV);
+
+ // We can't upgrade any other differences.
+}
+
+void ModuleLinker::upgradeMismatchedGlobals() {
+ upgradeMismatchedGlobalArray("llvm.global_ctors");
+ upgradeMismatchedGlobalArray("llvm.global_dtors");
+}
+
+/// If there were any appending global variables, link them together now.
+/// Return true on error.
bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
- GlobalVariable *SrcGV) {
+ const GlobalVariable *SrcGV) {
if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
return emitError("Linking globals named '" + SrcGV->getName() +
@@ -879,252 +985,167 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
return false;
}
-/// linkGlobalProto - Loop through the global variables in the src module and
-/// merge them into the dest module.
-bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
+bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) {
GlobalValue *DGV = getLinkedToGlobal(SGV);
- llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+
+ // Handle the ultra special appending linkage case first.
+ if (DGV && DGV->hasAppendingLinkage())
+ return linkAppendingVarProto(cast<GlobalVariable>(DGV),
+ cast<GlobalVariable>(SGV));
+
+ bool LinkFromSrc = true;
+ Comdat *C = nullptr;
+ GlobalValue::VisibilityTypes Visibility = SGV->getVisibility();
bool HasUnnamedAddr = SGV->hasUnnamedAddr();
- bool LinkFromSrc = false;
- Comdat *DC = nullptr;
if (const Comdat *SC = SGV->getComdat()) {
Comdat::SelectionKind SK;
std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
- DC = DstM->getOrInsertComdat(SC->getName());
- DC->setSelectionKind(SK);
+ C = DstM->getOrInsertComdat(SC->getName());
+ C->setSelectionKind(SK);
+ } else if (DGV) {
+ if (shouldLinkFromSource(LinkFromSrc, *DGV, *SGV))
+ return true;
}
- if (DGV) {
- if (!DC) {
- // Concatenation of appending linkage variables is magic and handled later.
- if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
- return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
-
- // Determine whether linkage of these two globals follows the source
- // module's definition or the destination module's definition.
- GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
- GlobalValue::VisibilityTypes NV;
- if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc))
- return true;
- NewVisibility = NV;
- HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
-
- // If we're not linking from the source, then keep the definition that we
- // have.
- if (!LinkFromSrc) {
- // Special case for const propagation.
- if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
- if (DGVar->isDeclaration() && SGV->isConstant() &&
- !DGVar->isConstant())
- DGVar->setConstant(true);
-
- // Set calculated linkage, visibility and unnamed_addr.
- DGV->setLinkage(NewLinkage);
- DGV->setVisibility(*NewVisibility);
- DGV->setUnnamedAddr(HasUnnamedAddr);
- }
- }
+ if (!LinkFromSrc) {
+ // Track the source global so that we don't attempt to copy it over when
+ // processing global initializers.
+ DoNotLinkFromSource.insert(SGV);
- if (!LinkFromSrc) {
+ if (DGV)
// Make sure to remember this mapping.
- ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
-
- // Track the source global so that we don't attempt to copy it over when
- // processing global initializers.
- DoNotLinkFromSource.insert(SGV);
+ ValueMap[SGV] =
+ ConstantExpr::getBitCast(DGV, TypeMap.get(SGV->getType()));
+ }
- return false;
- }
+ if (DGV) {
+ Visibility = isLessConstraining(Visibility, DGV->getVisibility())
+ ? DGV->getVisibility()
+ : Visibility;
+ HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
}
- // If the Comdat this variable was inside of wasn't selected, skip it.
- if (DC && !DGV && !LinkFromSrc) {
- DoNotLinkFromSource.insert(SGV);
+ if (!LinkFromSrc && !DGV)
return false;
+
+ GlobalValue *NewGV;
+ if (auto *SGVar = dyn_cast<GlobalVariable>(SGV)) {
+ NewGV = linkGlobalVariableProto(SGVar, DGV, LinkFromSrc);
+ if (!NewGV)
+ return true;
+ } else if (auto *SF = dyn_cast<Function>(SGV)) {
+ NewGV = linkFunctionProto(SF, DGV, LinkFromSrc);
+ } else {
+ NewGV = linkGlobalAliasProto(cast<GlobalAlias>(SGV), DGV, LinkFromSrc);
}
- // No linking to be performed or linking from the source: simply create an
- // identical version of the symbol over in the dest module... the
- // initializer will be filled in later by LinkGlobalInits.
- GlobalVariable *NewDGV =
- new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()),
- SGV->isConstant(), SGV->getLinkage(), /*init*/nullptr,
- SGV->getName(), /*insertbefore*/nullptr,
- SGV->getThreadLocalMode(),
- SGV->getType()->getAddressSpace());
- // Propagate alignment, visibility and section info.
- copyGVAttributes(NewDGV, SGV);
- if (NewVisibility)
- NewDGV->setVisibility(*NewVisibility);
- NewDGV->setUnnamedAddr(HasUnnamedAddr);
+ if (NewGV) {
+ if (NewGV != DGV)
+ copyGVAttributes(NewGV, SGV);
- if (DC)
- NewDGV->setComdat(DC);
+ NewGV->setUnnamedAddr(HasUnnamedAddr);
+ NewGV->setVisibility(Visibility);
- if (DGV) {
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
- DGV->eraseFromParent();
+ if (auto *NewGO = dyn_cast<GlobalObject>(NewGV)) {
+ if (C)
+ NewGO->setComdat(C);
+ }
+
+ // Make sure to remember this mapping.
+ if (NewGV != DGV) {
+ if (DGV) {
+ DGV->replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewGV, DGV->getType()));
+ DGV->eraseFromParent();
+ }
+ ValueMap[SGV] = NewGV;
+ }
}
- // Make sure to remember this mapping.
- ValueMap[SGV] = NewDGV;
return false;
}
-/// linkFunctionProto - Link the function in the source module into the
-/// destination module if needed, setting up mapping information.
-bool ModuleLinker::linkFunctionProto(Function *SF) {
- GlobalValue *DGV = getLinkedToGlobal(SF);
- llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
- bool HasUnnamedAddr = SF->hasUnnamedAddr();
+/// Loop through the global variables in the src module and merge them into the
+/// dest module.
+GlobalValue *ModuleLinker::linkGlobalVariableProto(const GlobalVariable *SGVar,
+ GlobalValue *DGV,
+ bool LinkFromSrc) {
+ unsigned Alignment = 0;
+ bool ClearConstant = false;
- bool LinkFromSrc = false;
- Comdat *DC = nullptr;
- if (const Comdat *SC = SF->getComdat()) {
- Comdat::SelectionKind SK;
- std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
- DC = DstM->getOrInsertComdat(SC->getName());
- DC->setSelectionKind(SK);
+ if (DGV) {
+ if (DGV->hasCommonLinkage() && SGVar->hasCommonLinkage())
+ Alignment = std::max(SGVar->getAlignment(), DGV->getAlignment());
+
+ auto *DGVar = dyn_cast<GlobalVariable>(DGV);
+ if (!SGVar->isConstant() || (DGVar && !DGVar->isConstant()))
+ ClearConstant = true;
}
- if (DGV) {
- if (!DC) {
- GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
- GlobalValue::VisibilityTypes NV;
- if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc))
- return true;
- NewVisibility = NV;
- HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
-
- if (!LinkFromSrc) {
- // Set calculated linkage
- DGV->setLinkage(NewLinkage);
- DGV->setVisibility(*NewVisibility);
- DGV->setUnnamedAddr(HasUnnamedAddr);
- }
+ if (!LinkFromSrc) {
+ if (auto *NewGVar = dyn_cast<GlobalVariable>(DGV)) {
+ if (Alignment)
+ NewGVar->setAlignment(Alignment);
+ if (NewGVar->isDeclaration() && ClearConstant)
+ NewGVar->setConstant(false);
}
+ return DGV;
+ }
- if (!LinkFromSrc) {
- // Make sure to remember this mapping.
- ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
+ // No linking to be performed or linking from the source: simply create an
+ // identical version of the symbol over in the dest module... the
+ // initializer will be filled in later by LinkGlobalInits.
+ GlobalVariable *NewDGV = new GlobalVariable(
+ *DstM, TypeMap.get(SGVar->getType()->getElementType()),
+ SGVar->isConstant(), SGVar->getLinkage(), /*init*/ nullptr,
+ SGVar->getName(), /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
+ SGVar->getType()->getAddressSpace());
- // Track the function from the source module so we don't attempt to remap
- // it.
- DoNotLinkFromSource.insert(SF);
+ if (Alignment)
+ NewDGV->setAlignment(Alignment);
- return false;
- }
- }
+ return NewDGV;
+}
+
+/// Link the function in the source module into the destination module if
+/// needed, setting up mapping information.
+GlobalValue *ModuleLinker::linkFunctionProto(const Function *SF,
+ GlobalValue *DGV,
+ bool LinkFromSrc) {
+ if (!LinkFromSrc)
+ return DGV;
// If the function is to be lazily linked, don't create it just yet.
// The ValueMaterializerTy will deal with creating it if it's used.
if (!DGV && (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() ||
SF->hasAvailableExternallyLinkage())) {
DoNotLinkFromSource.insert(SF);
- return false;
- }
-
- // If the Comdat this function was inside of wasn't selected, skip it.
- if (DC && !DGV && !LinkFromSrc) {
- DoNotLinkFromSource.insert(SF);
- return false;
+ return nullptr;
}
// If there is no linkage to be performed or we are linking from the source,
// bring SF over.
- Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
- SF->getLinkage(), SF->getName(), DstM);
- copyGVAttributes(NewDF, SF);
- if (NewVisibility)
- NewDF->setVisibility(*NewVisibility);
- NewDF->setUnnamedAddr(HasUnnamedAddr);
-
- if (DC)
- NewDF->setComdat(DC);
-
- if (DGV) {
- // Any uses of DF need to change to NewDF, with cast.
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
- DGV->eraseFromParent();
- }
-
- ValueMap[SF] = NewDF;
- return false;
+ return Function::Create(TypeMap.get(SF->getFunctionType()), SF->getLinkage(),
+ SF->getName(), DstM);
}
-/// LinkAliasProto - Set up prototypes for any aliases that come over from the
-/// source module.
-bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
- GlobalValue *DGV = getLinkedToGlobal(SGA);
- llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
- bool HasUnnamedAddr = SGA->hasUnnamedAddr();
-
- bool LinkFromSrc = false;
- Comdat *DC = nullptr;
- if (const Comdat *SC = SGA->getComdat()) {
- Comdat::SelectionKind SK;
- std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
- DC = DstM->getOrInsertComdat(SC->getName());
- DC->setSelectionKind(SK);
- }
-
- if (DGV) {
- if (!DC) {
- GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
- GlobalValue::VisibilityTypes NV;
- if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc))
- return true;
- NewVisibility = NV;
- HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
-
- if (!LinkFromSrc) {
- // Set calculated linkage.
- DGV->setLinkage(NewLinkage);
- DGV->setVisibility(*NewVisibility);
- DGV->setUnnamedAddr(HasUnnamedAddr);
- }
- }
-
- if (!LinkFromSrc) {
- // Make sure to remember this mapping.
- ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
-
- // Track the alias from the source module so we don't attempt to remap it.
- DoNotLinkFromSource.insert(SGA);
-
- return false;
- }
- }
-
- // If the Comdat this alias was inside of wasn't selected, skip it.
- if (DC && !DGV && !LinkFromSrc) {
- DoNotLinkFromSource.insert(SGA);
- return false;
- }
+/// Set up prototypes for any aliases that come over from the source module.
+GlobalValue *ModuleLinker::linkGlobalAliasProto(const GlobalAlias *SGA,
+ GlobalValue *DGV,
+ bool LinkFromSrc) {
+ if (!LinkFromSrc)
+ return DGV;
// If there is no linkage to be performed or we're linking from the source,
// bring over SGA.
auto *PTy = cast<PointerType>(TypeMap.get(SGA->getType()));
- auto *NewDA =
- GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
- SGA->getLinkage(), SGA->getName(), DstM);
- copyGVAttributes(NewDA, SGA);
- if (NewVisibility)
- NewDA->setVisibility(*NewVisibility);
- NewDA->setUnnamedAddr(HasUnnamedAddr);
-
- if (DGV) {
- // Any uses of DGV need to change to NewDA, with cast.
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
- DGV->eraseFromParent();
- }
-
- ValueMap[SGA] = NewDA;
- return false;
+ return GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ SGA->getLinkage(), SGA->getName(), DstM);
}
-static void getArrayElements(Constant *C, SmallVectorImpl<Constant*> &Dest) {
+static void getArrayElements(const Constant *C,
+ SmallVectorImpl<Constant *> &Dest) {
unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
for (unsigned i = 0; i != NumElements; ++i)
@@ -1133,18 +1154,38 @@ static void getArrayElements(Constant *C, SmallVectorImpl<Constant*> &Dest) {
void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
// Merge the initializer.
- SmallVector<Constant*, 16> Elements;
- getArrayElements(AVI.DstInit, Elements);
+ SmallVector<Constant *, 16> DstElements;
+ getArrayElements(AVI.DstInit, DstElements);
- Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap, &ValMaterializer);
- getArrayElements(SrcInit, Elements);
+ SmallVector<Constant *, 16> SrcElements;
+ getArrayElements(AVI.SrcInit, SrcElements);
ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
- AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
+
+ StringRef Name = AVI.NewGV->getName();
+ bool IsNewStructor =
+ (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") &&
+ cast<StructType>(NewType->getElementType())->getNumElements() == 3;
+
+ for (auto *V : SrcElements) {
+ if (IsNewStructor) {
+ Constant *Key = V->getAggregateElement(2);
+ if (DoNotLinkFromSource.count(Key))
+ continue;
+ }
+ DstElements.push_back(
+ MapValue(V, ValueMap, RF_None, &TypeMap, &ValMaterializer));
+ }
+ if (IsNewStructor) {
+ NewType = ArrayType::get(NewType->getElementType(), DstElements.size());
+ AVI.NewGV->mutateType(PointerType::get(NewType, 0));
+ }
+
+ AVI.NewGV->setInitializer(ConstantArray::get(NewType, DstElements));
}
-/// linkGlobalInits - Update the initializers in the Dest module now that all
-/// globals that may be referenced are in Dest.
+/// Update the initializers in the Dest module now that all globals that may be
+/// referenced are in Dest.
void ModuleLinker::linkGlobalInits() {
// Loop over all of the globals in the src module, mapping them over as we go
for (Module::const_global_iterator I = SrcM->global_begin(),
@@ -1161,9 +1202,9 @@ void ModuleLinker::linkGlobalInits() {
}
}
-/// linkFunctionBody - Copy the source function over into the dest function and
-/// fix up references to values. At this point we know that Dest is an external
-/// function, and that Src is not.
+/// Copy the source function over into the dest function and fix up references
+/// to values. At this point we know that Dest is an external function, and
+/// that Src is not.
void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
@@ -1177,25 +1218,17 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
ValueMap[I] = DI;
}
- if (Mode == Linker::DestroySource) {
- // Splice the body of the source function into the dest function.
- Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
+ // Splice the body of the source function into the dest function.
+ Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
- // At this point, all of the instructions and values of the function are now
- // copied over. The only problem is that they are still referencing values in
- // the Source function as operands. Loop through all of the operands of the
- // functions and patch them up to point to the local versions.
- for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries,
- &TypeMap, &ValMaterializer);
-
- } else {
- // Clone the body of the function into the dest function.
- SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
- CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", nullptr,
- &TypeMap, &ValMaterializer);
- }
+ // At this point, all of the instructions and values of the function are now
+ // copied over. The only problem is that they are still referencing values in
+ // the Source function as operands. Loop through all of the operands of the
+ // functions and patch them up to point to the local versions.
+ for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap,
+ &ValMaterializer);
// There is no need to map the arguments anymore.
for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
@@ -1204,7 +1237,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
}
-/// linkAliasBodies - Insert all of the aliases in Src into the Dest module.
+/// Insert all of the aliases in Src into the Dest module.
void ModuleLinker::linkAliasBodies() {
for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
I != E; ++I) {
@@ -1219,8 +1252,7 @@ void ModuleLinker::linkAliasBodies() {
}
}
-/// linkNamedMDNodes - Insert all of the named MDNodes in Src into the Dest
-/// module.
+/// Insert all of the named MDNodes in Src into the Dest module.
void ModuleLinker::linkNamedMDNodes() {
const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
@@ -1235,8 +1267,7 @@ void ModuleLinker::linkNamedMDNodes() {
}
}
-/// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest
-/// module.
+/// Merge the linker flags in Src into the Dest module.
bool ModuleLinker::linkModuleFlagsMetadata() {
// If the source module has no module flags, we are done.
const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
@@ -1336,10 +1367,8 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
case Module::Warning: {
// Emit a warning if the values differ.
if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
- if (!SuppressWarnings) {
- errs() << "WARNING: linking module flags '" << ID->getString()
- << "': IDs have conflicting values";
- }
+ emitWarning("linking module flags '" + ID->getString() +
+ "': IDs have conflicting values");
}
continue;
}
@@ -1406,23 +1435,19 @@ bool ModuleLinker::run() {
if (SrcM->getDataLayout() && DstM->getDataLayout() &&
*SrcM->getDataLayout() != *DstM->getDataLayout()) {
- if (!SuppressWarnings) {
- errs() << "WARNING: Linking two modules of different data layouts: '"
- << SrcM->getModuleIdentifier() << "' is '"
- << SrcM->getDataLayoutStr() << "' whereas '"
- << DstM->getModuleIdentifier() << "' is '"
- << DstM->getDataLayoutStr() << "'\n";
- }
+ emitWarning("Linking two modules of different data layouts: '" +
+ SrcM->getModuleIdentifier() + "' is '" +
+ SrcM->getDataLayoutStr() + "' whereas '" +
+ DstM->getModuleIdentifier() + "' is '" +
+ DstM->getDataLayoutStr() + "'\n");
}
if (!SrcM->getTargetTriple().empty() &&
DstM->getTargetTriple() != SrcM->getTargetTriple()) {
- if (!SuppressWarnings) {
- errs() << "WARNING: Linking two modules of different target triples: "
- << SrcM->getModuleIdentifier() << "' is '"
- << SrcM->getTargetTriple() << "' whereas '"
- << DstM->getModuleIdentifier() << "' is '"
- << DstM->getTargetTriple() << "'\n";
- }
+ emitWarning("Linking two modules of different target triples: " +
+ SrcM->getModuleIdentifier() + "' is '" +
+ SrcM->getTargetTriple() + "' whereas '" +
+ DstM->getModuleIdentifier() + "' is '" +
+ DstM->getTargetTriple() + "'\n");
}
// Append the module inline asm string.
@@ -1438,7 +1463,7 @@ bool ModuleLinker::run() {
computeTypeMapping();
ComdatsChosen.clear();
- for (const StringMapEntry<llvm::Comdat> &SMEC : SrcM->getComdatSymbolTable()) {
+ for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
const Comdat &C = SMEC.getValue();
if (ComdatsChosen.count(&C))
continue;
@@ -1449,11 +1474,14 @@ bool ModuleLinker::run() {
ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
}
+ // Upgrade mismatched global arrays.
+ upgradeMismatchedGlobals();
+
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
for (Module::global_iterator I = SrcM->global_begin(),
E = SrcM->global_end(); I != E; ++I)
- if (linkGlobalProto(I))
+ if (linkGlobalValueProto(I))
return true;
// Link the functions together between the two modules, without doing function
@@ -1462,13 +1490,13 @@ bool ModuleLinker::run() {
// all of the global values that may be referenced are available in our
// ValueMap.
for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I)
- if (linkFunctionProto(I))
+ if (linkGlobalValueProto(I))
return true;
// If there were any aliases, link them now.
for (Module::alias_iterator I = SrcM->alias_begin(),
E = SrcM->alias_end(); I != E; ++I)
- if (linkAliasProto(I))
+ if (linkGlobalValueProto(I))
return true;
for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
@@ -1487,13 +1515,13 @@ bool ModuleLinker::run() {
SF->getPrefixData(), ValueMap, RF_None, &TypeMap, &ValMaterializer));
}
- // Skip if no body (function is external) or materialize.
- if (SF->isDeclaration()) {
- if (!SF->isMaterializable())
- continue;
- if (SF->Materialize(&ErrorMsg))
- return true;
- }
+ // Materialize if needed.
+ if (std::error_code EC = SF->materialize())
+ return emitError(EC.message());
+
+ // Skip if no body (function is external).
+ if (SF->isDeclaration())
+ continue;
linkFunctionBody(DF, SF);
SF->Dematerialize();
@@ -1536,13 +1564,13 @@ bool ModuleLinker::run() {
&ValMaterializer));
}
- // Materialize if necessary.
- if (SF->isDeclaration()) {
- if (!SF->isMaterializable())
- continue;
- if (SF->Materialize(&ErrorMsg))
- return true;
- }
+ // Materialize if needed.
+ if (std::error_code EC = SF->materialize())
+ return emitError(EC.message());
+
+ // Skip if no body (function is external).
+ if (SF->isDeclaration())
+ continue;
// Erase from vector *before* the function body is linked - linkFunctionBody could
// invalidate I.
@@ -1566,13 +1594,25 @@ bool ModuleLinker::run() {
return false;
}
-Linker::Linker(Module *M, bool SuppressWarnings)
- : Composite(M), SuppressWarnings(SuppressWarnings) {
+void Linker::init(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
+ this->Composite = M;
+ this->DiagnosticHandler = DiagnosticHandler;
+
TypeFinder StructTypes;
StructTypes.run(*M, true);
IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end());
}
+Linker::Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
+ init(M, DiagnosticHandler);
+}
+
+Linker::Linker(Module *M) {
+ init(M, [this](const DiagnosticInfo &DI) {
+ Composite->getContext().diagnose(DI);
+ });
+}
+
Linker::~Linker() {
}
@@ -1581,30 +1621,30 @@ void Linker::deleteModule() {
Composite = nullptr;
}
-bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) {
- ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode,
- SuppressWarnings);
- if (TheLinker.run()) {
- if (ErrorMsg)
- *ErrorMsg = TheLinker.ErrorMsg;
- return true;
- }
- return false;
+bool Linker::linkInModule(Module *Src) {
+ ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src,
+ DiagnosticHandler);
+ return TheLinker.run();
}
//===----------------------------------------------------------------------===//
// LinkModules entrypoint.
//===----------------------------------------------------------------------===//
-/// LinkModules - This function links two modules together, with the resulting
-/// Dest module modified to be the composite of the two input modules. If an
-/// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
-/// the problem. Upon failure, the Dest module could be in a modified state,
-/// and shouldn't be relied on to be consistent.
-bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
- std::string *ErrorMsg) {
+/// This function links two modules together, with the resulting Dest module
+/// modified to be the composite of the two input modules. If an error occurs,
+/// true is returned and ErrorMsg (if not null) is set to indicate the problem.
+/// Upon failure, the Dest module could be in a modified state, and shouldn't be
+/// relied on to be consistent.
+bool Linker::LinkModules(Module *Dest, Module *Src,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ Linker L(Dest, DiagnosticHandler);
+ return L.linkInModule(Src);
+}
+
+bool Linker::LinkModules(Module *Dest, Module *Src) {
Linker L(Dest);
- return L.linkInModule(Src, Mode, ErrorMsg);
+ return L.linkInModule(Src);
}
//===----------------------------------------------------------------------===//
@@ -1613,10 +1653,15 @@ bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
LLVMLinkerMode Mode, char **OutMessages) {
- std::string Messages;
- LLVMBool Result = Linker::LinkModules(unwrap(Dest), unwrap(Src),
- Mode, OutMessages? &Messages : nullptr);
- if (OutMessages)
- *OutMessages = strdup(Messages.c_str());
+ Module *D = unwrap(Dest);
+ std::string Message;
+ raw_string_ostream Stream(Message);
+ DiagnosticPrinterRawOStream DP(Stream);
+
+ LLVMBool Result = Linker::LinkModules(
+ D, unwrap(Src), [&](const DiagnosticInfo &DI) { DI.print(DP); });
+
+ if (OutMessages && Result)
+ *OutMessages = strdup(Message.c_str());
return Result;
}
diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk
index fd587c4..c7c5c1f 100644
--- a/lib/MC/Android.mk
+++ b/lib/MC/Android.mk
@@ -13,13 +13,11 @@ mc_SRC_FILES := \
MCCodeEmitter.cpp \
MCCodeGenInfo.cpp \
MCContext.cpp \
- MCDisassembler.cpp \
MCDwarf.cpp \
MCELF.cpp \
MCELFObjectTargetWriter.cpp \
MCELFStreamer.cpp \
MCExpr.cpp \
- MCExternalSymbolizer.cpp \
MCInst.cpp \
MCInstPrinter.cpp \
MCInstrAnalysis.cpp \
@@ -32,7 +30,6 @@ mc_SRC_FILES := \
MCObjectStreamer.cpp \
MCObjectWriter.cpp \
MCRegisterInfo.cpp \
- MCRelocationInfo.cpp \
MCSection.cpp \
MCSectionCOFF.cpp \
MCSectionELF.cpp \
@@ -44,6 +41,7 @@ mc_SRC_FILES := \
MCTargetOptions.cpp \
MCValue.cpp \
MCWin64EH.cpp \
+ MCWinEH.cpp \
MachObjectWriter.cpp \
StringTableBuilder.cpp \
SubtargetFeature.cpp \
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 330519e..7181bdc 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -11,13 +11,11 @@ add_llvm_library(LLVMMC
MCCodeEmitter.cpp
MCCodeGenInfo.cpp
MCContext.cpp
- MCDisassembler.cpp
MCDwarf.cpp
MCELF.cpp
MCELFObjectTargetWriter.cpp
MCELFStreamer.cpp
MCExpr.cpp
- MCExternalSymbolizer.cpp
MCInst.cpp
MCInstPrinter.cpp
MCInstrAnalysis.cpp
@@ -30,7 +28,6 @@ add_llvm_library(LLVMMC
MCObjectStreamer.cpp
MCObjectWriter.cpp
MCRegisterInfo.cpp
- MCRelocationInfo.cpp
MCSection.cpp
MCSectionCOFF.cpp
MCSectionELF.cpp
@@ -42,6 +39,7 @@ add_llvm_library(LLVMMC
MCTargetOptions.cpp
MCValue.cpp
MCWin64EH.cpp
+ MCWinEH.cpp
MachObjectWriter.cpp
StringTableBuilder.cpp
SubtargetFeature.cpp
@@ -50,6 +48,5 @@ add_llvm_library(LLVMMC
YAML.cpp
)
-add_subdirectory(MCAnalysis)
add_subdirectory(MCParser)
add_subdirectory(MCDisassembler)
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
index f979dad..c4cea60 100644
--- a/lib/MC/ConstantPools.cpp
+++ b/lib/MC/ConstantPools.cpp
@@ -24,21 +24,22 @@ using namespace llvm;
void ConstantPool::emitEntries(MCStreamer &Streamer) {
if (Entries.empty())
return;
- Streamer.EmitCodeAlignment(4); // align to 4-byte address
Streamer.EmitDataRegion(MCDR_DataRegion);
for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
I != E; ++I) {
- Streamer.EmitLabel(I->first);
- Streamer.EmitValue(I->second, 4);
+ Streamer.EmitCodeAlignment(I->Size); // align naturally
+ Streamer.EmitLabel(I->Label);
+ Streamer.EmitValue(I->Value, I->Size);
}
Streamer.EmitDataRegion(MCDR_DataRegionEnd);
Entries.clear();
}
-const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
+const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context,
+ unsigned Size) {
MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
- Entries.push_back(std::make_pair(CPEntryLabel, Value));
+ Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size));
return MCSymbolRefExpr::Create(CPEntryLabel, Context);
}
@@ -89,7 +90,9 @@ void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
}
const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
- const MCExpr *Expr) {
+ const MCExpr *Expr,
+ unsigned Size) {
const MCSection *Section = Streamer.getCurrentSection().first;
- return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
+ return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext(),
+ Size);
}
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 7fb9fae..e4442e1 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -81,23 +81,13 @@ public:
struct ELFRelocationEntry {
uint64_t Offset; // Where is the relocation.
- bool UseSymbol; // Relocate with a symbol, not the section.
- union {
- const MCSymbol *Symbol; // The symbol to relocate with.
- const MCSectionData *Section; // The section to relocate with.
- };
+ const MCSymbol *Symbol; // The symbol to relocate with.
unsigned Type; // The type of the relocation.
uint64_t Addend; // The addend to use.
ELFRelocationEntry(uint64_t Offset, const MCSymbol *Symbol, unsigned Type,
uint64_t Addend)
- : Offset(Offset), UseSymbol(true), Symbol(Symbol), Type(Type),
- Addend(Addend) {}
-
- ELFRelocationEntry(uint64_t Offset, const MCSectionData *Section,
- unsigned Type, uint64_t Addend)
- : Offset(Offset), UseSymbol(false), Section(Section), Type(Type),
- Addend(Addend) {}
+ : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {}
};
class ELFObjectWriter : public MCObjectWriter {
@@ -137,6 +127,14 @@ class ELFObjectWriter : public MCObjectWriter {
// Support lexicographic sorting.
bool operator<(const ELFSymbolData &RHS) const {
+ unsigned LHSType = MCELF::GetType(*SymbolData);
+ unsigned RHSType = MCELF::GetType(*RHS.SymbolData);
+ if (LHSType == ELF::STT_SECTION && RHSType != ELF::STT_SECTION)
+ return false;
+ if (LHSType != ELF::STT_SECTION && RHSType == ELF::STT_SECTION)
+ return true;
+ if (LHSType == ELF::STT_SECTION && RHSType == ELF::STT_SECTION)
+ return SectionIndex < RHS.SectionIndex;
return Name < RHS.Name;
}
};
@@ -246,7 +244,7 @@ class ELFObjectWriter : public MCObjectWriter {
/// \param NumRegularSections - Number of non-relocation sections.
void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
const SectionIndexMapTy &SectionIndexMap,
- RevGroupMapTy RevGroupMap,
+ const RevGroupMapTy &RevGroupMap,
unsigned NumRegularSections);
void ComputeIndexMap(MCAssembler &Asm,
@@ -651,22 +649,6 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
WriteSymbol(Writer, MSD, Layout);
}
- // Write out a symbol table entry for each regular section.
- for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e;
- ++i) {
- const MCSectionELF &Section =
- static_cast<const MCSectionELF&>(i->getSection());
- if (Section.getType() == ELF::SHT_RELA ||
- Section.getType() == ELF::SHT_REL ||
- Section.getType() == ELF::SHT_STRTAB ||
- Section.getType() == ELF::SHT_SYMTAB ||
- Section.getType() == ELF::SHT_SYMTAB_SHNDX)
- continue;
- Writer.writeSymbol(0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT,
- SectionIndexMap.lookup(&Section), false);
- LastLocalSymbolIndex++;
- }
-
for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
ELFSymbolData &MSD = ExternalSymbolData[i];
MCSymbolData &Data = *MSD.SymbolData;
@@ -770,8 +752,9 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
}
// Most TLS relocations use a got, so they need the symbol. Even those that
- // are just an offset (@tpoff), require a symbol in some linkers (gold,
- // but not bfd ld).
+ // are just an offset (@tpoff), require a symbol in gold versions before
+ // 5efeedf61e4fe720fd3e9a08e6c91c10abb66d42 (2014-09-26) which fixed
+ // http://sourceware.org/PR16773.
if (Flags & ELF::SHF_TLS)
return true;
@@ -782,7 +765,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (Asm.isThumbFunc(&Sym))
return true;
- if (TargetObjectWriter->needsRelocateWithSymbol(Type))
+ if (TargetObjectWriter->needsRelocateWithSymbol(*SD, Type))
return true;
return false;
}
@@ -881,8 +864,11 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
if (!RelocateWithSymbol) {
const MCSection *SecA =
(SymA && !SymA->isUndefined()) ? &SymA->getSection() : nullptr;
- const MCSectionData *SecAD = SecA ? &Asm.getSectionData(*SecA) : nullptr;
- ELFRelocationEntry Rec(FixupOffset, SecAD, Type, Addend);
+ auto *ELFSec = cast_or_null<MCSectionELF>(SecA);
+ MCSymbol *SectionSymbol =
+ ELFSec ? Asm.getContext().getOrCreateSectionSymbol(*ELFSec)
+ : nullptr;
+ ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend);
Relocations[FixupSection].push_back(Rec);
return;
}
@@ -991,7 +977,7 @@ void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
void
ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
const SectionIndexMapTy &SectionIndexMap,
- RevGroupMapTy RevGroupMap,
+ const RevGroupMapTy &RevGroupMap,
unsigned NumRegularSections) {
// FIXME: Is this the correct place to do this?
// FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
@@ -1037,7 +1023,7 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
MSD.SectionIndex = ELF::SHN_COMMON;
} else if (BaseSymbol->isUndefined()) {
if (isSignature && !Used)
- MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap[&Symbol]);
+ MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap.lookup(&Symbol));
else
MSD.SectionIndex = ELF::SHN_UNDEF;
if (!Used && WeakrefUsed)
@@ -1060,7 +1046,10 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
Buf += Name.substr(Pos + Skip);
Name = Buf;
}
- MSD.Name = StrTabBuilder.add(Name);
+
+ // Sections have their own string table
+ if (MCELF::GetType(SD) != ELF::STT_SECTION)
+ MSD.Name = StrTabBuilder.add(Name);
if (MSD.SectionIndex == ELF::SHN_UNDEF)
UndefinedSymbolData.push_back(MSD);
@@ -1073,14 +1062,16 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
StrTabBuilder.add(*i);
- StrTabBuilder.finalize();
+ StrTabBuilder.finalize(StringTableBuilder::ELF);
for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
FileSymbolData.push_back(StrTabBuilder.getOffset(*i));
- for (ELFSymbolData& MSD : LocalSymbolData)
- MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
- for (ELFSymbolData& MSD : ExternalSymbolData)
+ for (ELFSymbolData &MSD : LocalSymbolData)
+ MSD.StringIndex = MCELF::GetType(*MSD.SymbolData) == ELF::STT_SECTION
+ ? 0
+ : StrTabBuilder.getOffset(MSD.Name);
+ for (ELFSymbolData &MSD : ExternalSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
for (ELFSymbolData& MSD : UndefinedSymbolData)
MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
@@ -1096,8 +1087,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
LocalSymbolData[i].SymbolData->setIndex(Index++);
- Index += NumRegularSections;
-
for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
ExternalSymbolData[i].SymbolData->setIndex(Index++);
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
@@ -1353,18 +1342,8 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
const ELFRelocationEntry &Entry = Relocs[e - i - 1];
-
- unsigned Index;
- if (Entry.UseSymbol) {
- Index = getSymbolIndexInSymbolTable(Asm, Entry.Symbol);
- } else {
- const MCSectionData *Sec = Entry.Section;
- if (Sec)
- Index = Sec->getOrdinal() + FileSymbolData.size() +
- LocalSymbolData.size() + 1;
- else
- Index = 0;
- }
+ unsigned Index =
+ Entry.Symbol ? getSymbolIndexInSymbolTable(Asm, Entry.Symbol) : 0;
if (is64Bit()) {
write(*F, Entry.Offset);
@@ -1446,7 +1425,7 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
static_cast<const MCSectionELF&>(it->getSection());
ShStrTabBuilder.add(Section.getSectionName());
}
- ShStrTabBuilder.finalize();
+ ShStrTabBuilder.finalize(StringTableBuilder::ELF);
F->getContents().append(ShStrTabBuilder.data().begin(),
ShStrTabBuilder.data().end());
}
@@ -1457,14 +1436,7 @@ void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
RevGroupMapTy &RevGroupMap,
SectionIndexMapTy &SectionIndexMap,
const RelMapTy &RelMap) {
- // Create the .note.GNU-stack section if needed.
MCContext &Ctx = Asm.getContext();
- if (Asm.getNoExecStack()) {
- const MCSectionELF *GnuStackSection =
- Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0,
- SectionKind::getReadOnly());
- Asm.getOrCreateSectionData(*GnuStackSection);
- }
// Build the groups
for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt
index 3fcb50b..f06be45 100644
--- a/lib/MC/LLVMBuild.txt
+++ b/lib/MC/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = MCAnalysis MCDisassembler MCParser
+subdirectories = MCDisassembler MCParser
[component_0]
type = Library
diff --git a/lib/MC/MCAnalysis/Android.mk b/lib/MC/MCAnalysis/Android.mk
deleted file mode 100644
index 27f848a..0000000
--- a/lib/MC/MCAnalysis/Android.mk
+++ /dev/null
@@ -1,37 +0,0 @@
-LOCAL_PATH:= $(call my-dir)
-
-mc_analysis_SRC_FILES := \
- MCAtom.cpp \
- MCFunction.cpp \
- MCModule.cpp \
- MCModuleYAML.cpp \
- MCObjectDisassembler.cpp \
- MCObjectSymbolizer.cpp
-
-# For the host
-# =====================================================
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
-
-LOCAL_MODULE:= libLLVMMCAnalysis
-
-LOCAL_MODULE_TAGS := optional
-
-include $(LLVM_HOST_BUILD_MK)
-include $(BUILD_HOST_STATIC_LIBRARY)
-
-# For the device
-# =====================================================
-include $(CLEAR_VARS)
-ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
-
-LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
-
-LOCAL_MODULE:= libLLVMMCAnalysis
-
-LOCAL_MODULE_TAGS := optional
-
-include $(LLVM_DEVICE_BUILD_MK)
-include $(BUILD_STATIC_LIBRARY)
-endif
diff --git a/lib/MC/MCAnalysis/CMakeLists.txt b/lib/MC/MCAnalysis/CMakeLists.txt
deleted file mode 100644
index 81eae2d..0000000
--- a/lib/MC/MCAnalysis/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-add_llvm_library(LLVMMCAnalysis
- MCAtom.cpp
- MCFunction.cpp
- MCModule.cpp
- MCModuleYAML.cpp
- MCObjectDisassembler.cpp
- MCObjectSymbolizer.cpp
-)
diff --git a/lib/MC/MCAnalysis/LLVMBuild.txt b/lib/MC/MCAnalysis/LLVMBuild.txt
deleted file mode 100644
index 1b58fec..0000000
--- a/lib/MC/MCAnalysis/LLVMBuild.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-[component_0]
-type = Library
-name = MCAnalysis
-parent = Libraries
-required_libraries = MC Object Support
diff --git a/lib/MC/MCAnalysis/MCAtom.cpp b/lib/MC/MCAnalysis/MCAtom.cpp
deleted file mode 100644
index 82056ee..0000000
--- a/lib/MC/MCAnalysis/MCAtom.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCAnalysis/MCAtom.h"
-#include "llvm/MC/MCAnalysis/MCModule.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <iterator>
-
-using namespace llvm;
-
-// Pin the vtable to this file.
-void MCAtom::anchor() {}
-
-void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
- Parent->remap(this, NewBegin, NewEnd);
-}
-
-void MCAtom::remapForTruncate(uint64_t TruncPt) {
- assert((TruncPt >= Begin && TruncPt < End) &&
- "Truncation point not contained in atom!");
- remap(Begin, TruncPt);
-}
-
-void MCAtom::remapForSplit(uint64_t SplitPt,
- uint64_t &LBegin, uint64_t &LEnd,
- uint64_t &RBegin, uint64_t &REnd) {
- assert((SplitPt > Begin && SplitPt <= End) &&
- "Splitting at point not contained in atom!");
-
- // Compute the new begin/end points.
- LBegin = Begin;
- LEnd = SplitPt - 1;
- RBegin = SplitPt;
- REnd = End;
-
- // Remap this atom to become the lower of the two new ones.
- remap(LBegin, LEnd);
-}
-
-// MCDataAtom
-
-void MCDataAtom::addData(const MCData &D) {
- Data.push_back(D);
- if (Data.size() > End + 1 - Begin)
- remap(Begin, End + 1);
-}
-
-void MCDataAtom::truncate(uint64_t TruncPt) {
- remapForTruncate(TruncPt);
-
- Data.resize(TruncPt - Begin + 1);
-}
-
-MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
- uint64_t LBegin, LEnd, RBegin, REnd;
- remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
-
- MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
- RightAtom->setName(getName());
-
- std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
- assert(I != Data.end() && "Split point not found in range!");
-
- std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
- Data.erase(I, Data.end());
- return RightAtom;
-}
-
-// MCTextAtom
-
-void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
- if (NextInstAddress + Size - 1 > End)
- remap(Begin, NextInstAddress + Size - 1);
- Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
- NextInstAddress += Size;
-}
-
-void MCTextAtom::truncate(uint64_t TruncPt) {
- remapForTruncate(TruncPt);
-
- InstListTy::iterator I = Insts.begin();
- while (I != Insts.end() && I->Address <= TruncPt) ++I;
-
- assert(I != Insts.end() && "Truncation point not found in disassembly!");
- assert(I->Address == TruncPt + 1 &&
- "Truncation point does not fall on instruction boundary");
-
- Insts.erase(I, Insts.end());
-}
-
-MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
- uint64_t LBegin, LEnd, RBegin, REnd;
- remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
-
- MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
- RightAtom->setName(getName());
-
- InstListTy::iterator I = Insts.begin();
- while (I != Insts.end() && I->Address < SplitPt) ++I;
- assert(I != Insts.end() && "Split point not found in disassembly!");
- assert(I->Address == SplitPt &&
- "Split point does not fall on instruction boundary!");
-
- std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
- Insts.erase(I, Insts.end());
- Parent->splitBasicBlocksForAtom(this, RightAtom);
- return RightAtom;
-}
diff --git a/lib/MC/MCAnalysis/MCFunction.cpp b/lib/MC/MCAnalysis/MCFunction.cpp
deleted file mode 100644
index 4e09d1a..0000000
--- a/lib/MC/MCAnalysis/MCFunction.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCAnalysis/MCFunction.h"
-#include "llvm/MC/MCAnalysis/MCAtom.h"
-#include "llvm/MC/MCAnalysis/MCModule.h"
-#include <algorithm>
-
-using namespace llvm;
-
-// MCFunction
-
-MCFunction::MCFunction(StringRef Name, MCModule *Parent)
- : Name(Name), ParentModule(Parent)
-{}
-
-MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
- std::unique_ptr<MCBasicBlock> MCBB(new MCBasicBlock(TA, this));
- Blocks.push_back(std::move(MCBB));
- return *Blocks.back();
-}
-
-MCBasicBlock *MCFunction::find(uint64_t StartAddr) {
- for (const_iterator I = begin(), E = end(); I != E; ++I)
- if ((*I)->getInsts()->getBeginAddr() == StartAddr)
- return I->get();
- return nullptr;
-}
-
-const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const {
- return const_cast<MCFunction *>(this)->find(StartAddr);
-}
-
-// MCBasicBlock
-
-MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
- : Insts(&Insts), Parent(Parent) {
- getParent()->getParent()->trackBBForAtom(&Insts, this);
-}
-
-void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
- if (!isSuccessor(MCBB))
- Successors.push_back(MCBB);
-}
-
-bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
- return std::find(Successors.begin(), Successors.end(),
- MCBB) != Successors.end();
-}
-
-void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
- if (!isPredecessor(MCBB))
- Predecessors.push_back(MCBB);
-}
-
-bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
- return std::find(Predecessors.begin(), Predecessors.end(),
- MCBB) != Predecessors.end();
-}
-
-void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) {
- assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() &&
- "Splitting unrelated basic blocks!");
- SplitBB->addPredecessor(this);
- assert(SplitBB->Successors.empty() &&
- "Split basic block shouldn't already have successors!");
- SplitBB->Successors = Successors;
- Successors.clear();
- addSuccessor(SplitBB);
-}
diff --git a/lib/MC/MCAnalysis/MCModule.cpp b/lib/MC/MCAnalysis/MCModule.cpp
deleted file mode 100644
index 7512299..0000000
--- a/lib/MC/MCAnalysis/MCModule.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCAnalysis/MCModule.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCAnalysis/MCAtom.h"
-#include "llvm/MC/MCAnalysis/MCFunction.h"
-#include <algorithm>
-
-using namespace llvm;
-
-static bool AtomComp(const MCAtom *L, uint64_t Addr) {
- return L->getEndAddr() < Addr;
-}
-
-static bool AtomCompInv(uint64_t Addr, const MCAtom *R) {
- return Addr < R->getEndAddr();
-}
-
-void MCModule::map(MCAtom *NewAtom) {
- uint64_t Begin = NewAtom->Begin;
-
- assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?");
-
- // Check for atoms already covering this range.
- AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
- Begin, AtomComp);
- assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End)
- && "Offset range already occupied!");
-
- // Insert the new atom to the list.
- Atoms.insert(I, NewAtom);
-}
-
-MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
- MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
- map(NewAtom);
- return NewAtom;
-}
-
-MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
- MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
- map(NewAtom);
- return NewAtom;
-}
-
-// remap - Update the interval mapping for an atom.
-void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
- // Find and erase the old mapping.
- AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
- Atom->Begin, AtomComp);
- assert(I != atom_end() && "Atom offset not found in module!");
- assert(*I == Atom && "Previous atom mapping was invalid!");
- Atoms.erase(I);
-
- // FIXME: special case NewBegin == Atom->Begin
-
- // Insert the new mapping.
- AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
- NewBegin, AtomComp);
- assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End)
- && "Offset range already occupied!");
- Atoms.insert(NewI, Atom);
-
- // Update the atom internal bounds.
- Atom->Begin = NewBegin;
- Atom->End = NewEnd;
-}
-
-const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
- AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
- Addr, AtomComp);
- if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
- return *I;
- return nullptr;
-}
-
-MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
- return const_cast<MCAtom*>(
- const_cast<const MCModule *>(this)->findAtomContaining(Addr));
-}
-
-const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const {
- AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(),
- Addr, AtomCompInv);
- if (I != atom_end())
- return *I;
- return nullptr;
-}
-
-MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) {
- return const_cast<MCAtom*>(
- const_cast<const MCModule *>(this)->findFirstAtomAfter(Addr));
-}
-
-MCFunction *MCModule::createFunction(StringRef Name) {
- std::unique_ptr<MCFunction> MCF(new MCFunction(Name, this));
- Functions.push_back(std::move(MCF));
- return Functions.back().get();
-}
-
-static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) {
- return BB->getInsts() < Atom;
-}
-
-void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA,
- const MCTextAtom *NewTA) {
- BBsByAtomTy::iterator
- I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(),
- TA, CompBBToAtom);
- for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) {
- MCBasicBlock *BB = *I;
- MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA);
- BB->splitBasicBlock(NewBB);
- }
-}
-
-void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) {
- assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!");
- BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(),
- BBsByAtom.end(),
- Atom, CompBBToAtom);
- for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I)
- if (*I == BB)
- return;
- BBsByAtom.insert(I, BB);
-}
-
-MCModule::MCModule() : Entrypoint(0) { }
-
-MCModule::~MCModule() {
- for (AtomListTy::iterator AI = atom_begin(),
- AE = atom_end();
- AI != AE; ++AI)
- delete *AI;
-}
diff --git a/lib/MC/MCAnalysis/MCModuleYAML.cpp b/lib/MC/MCAnalysis/MCModuleYAML.cpp
deleted file mode 100644
index 876b06d..0000000
--- a/lib/MC/MCAnalysis/MCModuleYAML.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
-//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines classes for handling the YAML representation of MCModule.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCAnalysis/MCModuleYAML.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/MC/MCAnalysis/MCAtom.h"
-#include "llvm/MC/MCAnalysis/MCFunction.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/YAML.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <vector>
-
-namespace llvm {
-
-namespace {
-
-// This class is used to map opcode and register names to enum values.
-//
-// There are at least 3 obvious ways to do this:
-// 1- Generate an MII/MRI method using a tablegen StringMatcher
-// 2- Write an MII/MRI method using std::lower_bound and the assumption that
-// the enums are sorted (starting at a fixed value).
-// 3- Do the matching manually as is done here.
-//
-// Why 3?
-// 1- A StringMatcher function for thousands of entries would incur
-// a non-negligible binary size overhead.
-// 2- The lower_bound comparators would be somewhat involved and aren't
-// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h)
-// 3- This isn't actually something useful outside tests (but the same argument
-// can be made against having {MII,MRI}::getName).
-//
-// If this becomes useful outside this specific situation, feel free to do
-// the Right Thing (tm) and move the functionality to MII/MRI.
-//
-class InstrRegInfoHolder {
- typedef StringMap<unsigned, BumpPtrAllocator> EnumValByNameTy;
- EnumValByNameTy InstEnumValueByName;
- EnumValByNameTy RegEnumValueByName;
-
-public:
- const MCInstrInfo &MII;
- const MCRegisterInfo &MRI;
- InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI)
- : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())),
- RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) {
- for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i)
- InstEnumValueByName[MII.getName(i)] = i;
- for (int i = 0, e = MRI.getNumRegs(); i != e; ++i)
- RegEnumValueByName[MRI.getName(i)] = i;
- }
-
- bool matchRegister(StringRef Name, unsigned &Reg) {
- EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name);
- if (It == RegEnumValueByName.end())
- return false;
- Reg = It->getValue();
- return true;
- }
- bool matchOpcode(StringRef Name, unsigned &Opc) {
- EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name);
- if (It == InstEnumValueByName.end())
- return false;
- Opc = It->getValue();
- return true;
- }
-};
-
-} // end unnamed namespace
-
-namespace MCModuleYAML {
-
-LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum)
-
-struct Operand {
- MCOperand MCOp;
-};
-
-struct Inst {
- OpcodeEnum Opcode;
- std::vector<Operand> Operands;
- uint64_t Size;
-};
-
-struct Atom {
- MCAtom::AtomKind Type;
- yaml::Hex64 StartAddress;
- uint64_t Size;
-
- std::vector<Inst> Insts;
- yaml::BinaryRef Data;
-};
-
-struct BasicBlock {
- yaml::Hex64 Address;
- std::vector<yaml::Hex64> Preds;
- std::vector<yaml::Hex64> Succs;
-};
-
-struct Function {
- StringRef Name;
- std::vector<BasicBlock> BasicBlocks;
-};
-
-struct Module {
- std::vector<Atom> Atoms;
- std::vector<Function> Functions;
-};
-
-} // end namespace MCModuleYAML
-} // end namespace llvm
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function)
-
-namespace llvm {
-
-namespace yaml {
-
-template <> struct ScalarEnumerationTraits<MCAtom::AtomKind> {
- static void enumeration(IO &IO, MCAtom::AtomKind &Kind);
-};
-
-template <> struct MappingTraits<MCModuleYAML::Atom> {
- static void mapping(IO &IO, MCModuleYAML::Atom &A);
-};
-
-template <> struct MappingTraits<MCModuleYAML::Inst> {
- static void mapping(IO &IO, MCModuleYAML::Inst &I);
-};
-
-template <> struct MappingTraits<MCModuleYAML::BasicBlock> {
- static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB);
-};
-
-template <> struct MappingTraits<MCModuleYAML::Function> {
- static void mapping(IO &IO, MCModuleYAML::Function &Fn);
-};
-
-template <> struct MappingTraits<MCModuleYAML::Module> {
- static void mapping(IO &IO, MCModuleYAML::Module &M);
-};
-
-template <> struct ScalarTraits<MCModuleYAML::Operand> {
- static void output(const MCModuleYAML::Operand &, void *,
- llvm::raw_ostream &);
- static StringRef input(StringRef, void *, MCModuleYAML::Operand &);
- static bool mustQuote(StringRef) { return false; }
-};
-
-template <> struct ScalarTraits<MCModuleYAML::OpcodeEnum> {
- static void output(const MCModuleYAML::OpcodeEnum &, void *,
- llvm::raw_ostream &);
- static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &);
- static bool mustQuote(StringRef) { return false; }
-};
-
-void ScalarEnumerationTraits<MCAtom::AtomKind>::enumeration(
- IO &IO, MCAtom::AtomKind &Value) {
- IO.enumCase(Value, "Text", MCAtom::TextAtom);
- IO.enumCase(Value, "Data", MCAtom::DataAtom);
-}
-
-void MappingTraits<MCModuleYAML::Atom>::mapping(IO &IO, MCModuleYAML::Atom &A) {
- IO.mapRequired("StartAddress", A.StartAddress);
- IO.mapRequired("Size", A.Size);
- IO.mapRequired("Type", A.Type);
- if (A.Type == MCAtom::TextAtom)
- IO.mapRequired("Content", A.Insts);
- else if (A.Type == MCAtom::DataAtom)
- IO.mapRequired("Content", A.Data);
-}
-
-void MappingTraits<MCModuleYAML::Inst>::mapping(IO &IO, MCModuleYAML::Inst &I) {
- IO.mapRequired("Inst", I.Opcode);
- IO.mapRequired("Size", I.Size);
- IO.mapRequired("Ops", I.Operands);
-}
-
-void
-MappingTraits<MCModuleYAML::BasicBlock>::mapping(IO &IO,
- MCModuleYAML::BasicBlock &BB) {
- IO.mapRequired("Address", BB.Address);
- IO.mapRequired("Preds", BB.Preds);
- IO.mapRequired("Succs", BB.Succs);
-}
-
-void MappingTraits<MCModuleYAML::Function>::mapping(IO &IO,
- MCModuleYAML::Function &F) {
- IO.mapRequired("Name", F.Name);
- IO.mapRequired("BasicBlocks", F.BasicBlocks);
-}
-
-void MappingTraits<MCModuleYAML::Module>::mapping(IO &IO,
- MCModuleYAML::Module &M) {
- IO.mapRequired("Atoms", M.Atoms);
- IO.mapOptional("Functions", M.Functions);
-}
-
-void
-ScalarTraits<MCModuleYAML::Operand>::output(const MCModuleYAML::Operand &Val,
- void *Ctx, raw_ostream &Out) {
- InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
-
- // FIXME: Doesn't support FPImm and expr/inst, but do these make sense?
- if (Val.MCOp.isImm())
- Out << "I" << Val.MCOp.getImm();
- else if (Val.MCOp.isReg())
- Out << "R" << IRI->MRI.getName(Val.MCOp.getReg());
- else
- llvm_unreachable("Trying to output invalid MCOperand!");
-}
-
-StringRef
-ScalarTraits<MCModuleYAML::Operand>::input(StringRef Scalar, void *Ctx,
- MCModuleYAML::Operand &Val) {
- InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
- char Type = 0;
- if (Scalar.size() >= 1)
- Type = Scalar.front();
- if (Type != 'R' && Type != 'I')
- return "Operand must start with 'R' (register) or 'I' (immediate).";
- if (Type == 'R') {
- unsigned Reg;
- if (!IRI->matchRegister(Scalar.substr(1), Reg))
- return "Invalid register name.";
- Val.MCOp = MCOperand::CreateReg(Reg);
- } else if (Type == 'I') {
- int64_t RIVal;
- if (Scalar.substr(1).getAsInteger(10, RIVal))
- return "Invalid immediate value.";
- Val.MCOp = MCOperand::CreateImm(RIVal);
- } else {
- Val.MCOp = MCOperand();
- }
- return StringRef();
-}
-
-void ScalarTraits<MCModuleYAML::OpcodeEnum>::output(
- const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) {
- InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
- Out << IRI->MII.getName(Val);
-}
-
-StringRef
-ScalarTraits<MCModuleYAML::OpcodeEnum>::input(StringRef Scalar, void *Ctx,
- MCModuleYAML::OpcodeEnum &Val) {
- InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
- unsigned Opc;
- if (!IRI->matchOpcode(Scalar, Opc))
- return "Invalid instruction opcode.";
- Val = Opc;
- return "";
-}
-
-} // end namespace yaml
-
-namespace {
-
-class MCModule2YAML {
- const MCModule &MCM;
- MCModuleYAML::Module YAMLModule;
- void dumpAtom(const MCAtom *MCA);
- void dumpFunction(const MCFunction &MCF);
- void dumpBasicBlock(const MCBasicBlock *MCBB);
-
-public:
- MCModule2YAML(const MCModule &MCM);
- MCModuleYAML::Module &getYAMLModule();
-};
-
-class YAML2MCModule {
- MCModule &MCM;
-
-public:
- YAML2MCModule(MCModule &MCM);
- StringRef parse(const MCModuleYAML::Module &YAMLModule);
-};
-
-} // end unnamed namespace
-
-MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() {
- for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end();
- AI != AE; ++AI)
- dumpAtom(*AI);
- for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end();
- FI != FE; ++FI)
- dumpFunction(**FI);
-}
-
-void MCModule2YAML::dumpAtom(const MCAtom *MCA) {
- YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1);
- MCModuleYAML::Atom &A = YAMLModule.Atoms.back();
- A.Type = MCA->getKind();
- A.StartAddress = MCA->getBeginAddr();
- A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1;
- if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(MCA)) {
- const size_t InstCount = TA->size();
- A.Insts.resize(InstCount);
- for (size_t i = 0; i != InstCount; ++i) {
- const MCDecodedInst &MCDI = TA->at(i);
- A.Insts[i].Opcode = MCDI.Inst.getOpcode();
- A.Insts[i].Size = MCDI.Size;
- const unsigned OpCount = MCDI.Inst.getNumOperands();
- A.Insts[i].Operands.resize(OpCount);
- for (unsigned oi = 0; oi != OpCount; ++oi)
- A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi);
- }
- } else if (const MCDataAtom *DA = dyn_cast<MCDataAtom>(MCA)) {
- A.Data = DA->getData();
- } else {
- llvm_unreachable("Unknown atom type.");
- }
-}
-
-void MCModule2YAML::dumpFunction(const MCFunction &MCF) {
- YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1);
- MCModuleYAML::Function &F = YAMLModule.Functions.back();
- F.Name = MCF.getName();
- for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end();
- BBI != BBE; ++BBI) {
- const MCBasicBlock &MCBB = **BBI;
- F.BasicBlocks.resize(F.BasicBlocks.size() + 1);
- MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back();
- BB.Address = MCBB.getInsts()->getBeginAddr();
- for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(),
- PE = MCBB.pred_end();
- PI != PE; ++PI)
- BB.Preds.push_back((*PI)->getInsts()->getBeginAddr());
- for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(),
- SE = MCBB.succ_end();
- SI != SE; ++SI)
- BB.Succs.push_back((*SI)->getInsts()->getBeginAddr());
- }
-}
-
-MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; }
-
-YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {}
-
-StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) {
- typedef std::vector<MCModuleYAML::Atom>::const_iterator AtomIt;
- typedef std::vector<MCModuleYAML::Inst>::const_iterator InstIt;
- typedef std::vector<MCModuleYAML::Operand>::const_iterator OpIt;
-
- typedef DenseMap<uint64_t, MCTextAtom *> AddrToTextAtomTy;
- AddrToTextAtomTy TAByAddr;
-
- for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end();
- AI != AE; ++AI) {
- uint64_t StartAddress = AI->StartAddress;
- if (AI->Size == 0)
- return "Atoms can't be empty!";
- uint64_t EndAddress = StartAddress + AI->Size - 1;
- switch (AI->Type) {
- case MCAtom::TextAtom: {
- MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress);
- TAByAddr[StartAddress] = TA;
- for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE;
- ++II) {
- MCInst MI;
- MI.setOpcode(II->Opcode);
- for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE;
- ++OI)
- MI.addOperand(OI->MCOp);
- TA->addInst(MI, II->Size);
- }
- break;
- }
- case MCAtom::DataAtom: {
- MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress);
- SmallVector<char, 64> Data;
- raw_svector_ostream OS(Data);
- AI->Data.writeAsBinary(OS);
- OS.flush();
- for (size_t i = 0, e = Data.size(); i != e; ++i)
- DA->addData((uint8_t)Data[i]);
- break;
- }
- }
- }
-
- typedef std::vector<MCModuleYAML::Function>::const_iterator FuncIt;
- typedef std::vector<MCModuleYAML::BasicBlock>::const_iterator BBIt;
- typedef std::vector<yaml::Hex64>::const_iterator AddrIt;
- for (FuncIt FI = YAMLModule.Functions.begin(),
- FE = YAMLModule.Functions.end();
- FI != FE; ++FI) {
- MCFunction *MCFN = MCM.createFunction(FI->Name);
- for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
- BBI != BBE; ++BBI) {
- AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address);
- if (It == TAByAddr.end())
- return "Basic block start address doesn't match any text atom!";
- MCFN->createBlock(*It->second);
- }
- for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
- BBI != BBE; ++BBI) {
- MCBasicBlock *MCBB = MCFN->find(BBI->Address);
- if (!MCBB)
- return "Couldn't find matching basic block in function.";
- for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE;
- ++PI) {
- MCBasicBlock *Pred = MCFN->find(*PI);
- if (!Pred)
- return "Couldn't find predecessor basic block.";
- MCBB->addPredecessor(Pred);
- }
- for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE;
- ++SI) {
- MCBasicBlock *Succ = MCFN->find(*SI);
- if (!Succ)
- return "Couldn't find predecessor basic block.";
- MCBB->addSuccessor(Succ);
- }
- }
- }
- return "";
-}
-
-StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM,
- const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
- MCModule2YAML Dumper(MCM);
- InstrRegInfoHolder IRI(MII, MRI);
- yaml::Output YOut(OS, (void *)&IRI);
- YOut << Dumper.getYAMLModule();
- return "";
-}
-
-StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent,
- const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
- MCM.reset(new MCModule);
- YAML2MCModule Parser(*MCM);
- MCModuleYAML::Module YAMLModule;
- InstrRegInfoHolder IRI(MII, MRI);
- yaml::Input YIn(YamlContent, (void *)&IRI);
- YIn >> YAMLModule;
- if (std::error_code ec = YIn.error())
- return ec.message();
- StringRef err = Parser.parse(YAMLModule);
- if (!err.empty())
- return err;
- return "";
-}
-
-} // end namespace llvm
diff --git a/lib/MC/MCAnalysis/MCObjectDisassembler.cpp b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp
deleted file mode 100644
index 0f789ff..0000000
--- a/lib/MC/MCAnalysis/MCObjectDisassembler.cpp
+++ /dev/null
@@ -1,574 +0,0 @@
-//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCObjectDisassembler.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAnalysis/MCAtom.h"
-#include "llvm/MC/MCAnalysis/MCFunction.h"
-#include "llvm/MC/MCAnalysis/MCModule.h"
-#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/MCObjectSymbolizer.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MachO.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/StringRefMemoryObject.h"
-#include "llvm/Support/raw_ostream.h"
-#include <map>
-
-using namespace llvm;
-using namespace object;
-
-#define DEBUG_TYPE "mc"
-
-MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
- const MCDisassembler &Dis,
- const MCInstrAnalysis &MIA)
- : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {}
-
-uint64_t MCObjectDisassembler::getEntrypoint() {
- for (const SymbolRef &Symbol : Obj.symbols()) {
- StringRef Name;
- Symbol.getName(Name);
- if (Name == "main" || Name == "_main") {
- uint64_t Entrypoint;
- Symbol.getAddress(Entrypoint);
- return getEffectiveLoadAddr(Entrypoint);
- }
- }
- return 0;
-}
-
-ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
- return ArrayRef<uint64_t>();
-}
-
-ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
- return ArrayRef<uint64_t>();
-}
-
-MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) {
- // FIXME: Keep track of object sections.
- return FallbackRegion.get();
-}
-
-uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
- return Addr;
-}
-
-uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
- return Addr;
-}
-
-MCModule *MCObjectDisassembler::buildEmptyModule() {
- MCModule *Module = new MCModule;
- Module->Entrypoint = getEntrypoint();
- return Module;
-}
-
-MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
- MCModule *Module = buildEmptyModule();
-
- buildSectionAtoms(Module);
- if (withCFG)
- buildCFG(Module);
- return Module;
-}
-
-void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
- for (const SectionRef &Section : Obj.sections()) {
- bool isText;
- Section.isText(isText);
- bool isData;
- Section.isData(isData);
- if (!isData && !isText)
- continue;
-
- uint64_t StartAddr;
- Section.getAddress(StartAddr);
- uint64_t SecSize;
- Section.getSize(SecSize);
- if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
- continue;
- StartAddr = getEffectiveLoadAddr(StartAddr);
-
- StringRef Contents;
- Section.getContents(Contents);
- StringRefMemoryObject memoryObject(Contents, StartAddr);
-
- // We don't care about things like non-file-backed sections yet.
- if (Contents.size() != SecSize || !SecSize)
- continue;
- uint64_t EndAddr = StartAddr + SecSize - 1;
-
- StringRef SecName;
- Section.getName(SecName);
-
- if (isText) {
- MCTextAtom *Text = nullptr;
- MCDataAtom *InvalidData = nullptr;
-
- uint64_t InstSize;
- for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
- const uint64_t CurAddr = StartAddr + Index;
- MCInst Inst;
- if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
- nulls())) {
- if (!Text) {
- Text = Module->createTextAtom(CurAddr, CurAddr);
- Text->setName(SecName);
- }
- Text->addInst(Inst, InstSize);
- InvalidData = nullptr;
- } else {
- assert(InstSize && "getInstruction() consumed no bytes");
- if (!InvalidData) {
- Text = nullptr;
- InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1);
- }
- for (uint64_t I = 0; I < InstSize; ++I)
- InvalidData->addData(Contents[Index+I]);
- }
- }
- } else {
- MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
- Data->setName(SecName);
- for (uint64_t Index = 0; Index < SecSize; ++Index)
- Data->addData(Contents[Index]);
- }
- }
-}
-
-namespace {
- struct BBInfo;
- typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy;
-
- struct BBInfo {
- MCTextAtom *Atom;
- MCBasicBlock *BB;
- BBInfoSetTy Succs;
- BBInfoSetTy Preds;
- MCObjectDisassembler::AddressSetTy SuccAddrs;
-
- BBInfo() : Atom(nullptr), BB(nullptr) {}
-
- void addSucc(BBInfo &Succ) {
- Succs.insert(&Succ);
- Succ.Preds.insert(this);
- }
- };
-}
-
-static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) {
- std::sort(V.begin(), V.end());
- V.erase(std::unique(V.begin(), V.end()), V.end());
-}
-
-void MCObjectDisassembler::buildCFG(MCModule *Module) {
- typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
- BBInfoByAddrTy BBInfos;
- AddressSetTy Splits;
- AddressSetTy Calls;
-
- for (const SymbolRef &Symbol : Obj.symbols()) {
- SymbolRef::Type SymType;
- Symbol.getType(SymType);
- if (SymType == SymbolRef::ST_Function) {
- uint64_t SymAddr;
- Symbol.getAddress(SymAddr);
- SymAddr = getEffectiveLoadAddr(SymAddr);
- Calls.push_back(SymAddr);
- Splits.push_back(SymAddr);
- }
- }
-
- assert(Module->func_begin() == Module->func_end()
- && "Module already has a CFG!");
-
- // First, determine the basic block boundaries and call targets.
- for (MCModule::atom_iterator AI = Module->atom_begin(),
- AE = Module->atom_end();
- AI != AE; ++AI) {
- MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
- if (!TA) continue;
- Calls.push_back(TA->getBeginAddr());
- BBInfos[TA->getBeginAddr()].Atom = TA;
- for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
- II != IE; ++II) {
- if (MIA.isTerminator(II->Inst))
- Splits.push_back(II->Address + II->Size);
- uint64_t Target;
- if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
- if (MIA.isCall(II->Inst))
- Calls.push_back(Target);
- Splits.push_back(Target);
- }
- }
- }
-
- RemoveDupsFromAddressVector(Splits);
- RemoveDupsFromAddressVector(Calls);
-
- // Split text atoms into basic block atoms.
- for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
- SI != SE; ++SI) {
- MCAtom *A = Module->findAtomContaining(*SI);
- if (!A) continue;
- MCTextAtom *TA = cast<MCTextAtom>(A);
- if (TA->getBeginAddr() == *SI)
- continue;
- MCTextAtom *NewAtom = TA->split(*SI);
- BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
- StringRef BBName = TA->getName();
- BBName = BBName.substr(0, BBName.find_last_of(':'));
- NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
- }
-
- // Compute succs/preds.
- for (MCModule::atom_iterator AI = Module->atom_begin(),
- AE = Module->atom_end();
- AI != AE; ++AI) {
- MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
- if (!TA) continue;
- BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
- const MCDecodedInst &LI = TA->back();
- if (MIA.isBranch(LI.Inst)) {
- uint64_t Target;
- if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
- CurBB.addSucc(BBInfos[Target]);
- if (MIA.isConditionalBranch(LI.Inst))
- CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
- } else if (!MIA.isTerminator(LI.Inst))
- CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
- }
-
-
- // Create functions and basic blocks.
- for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
- CI != CE; ++CI) {
- BBInfo &BBI = BBInfos[*CI];
- if (!BBI.Atom) continue;
-
- MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
-
- // Create MCBBs.
- SmallSetVector<BBInfo*, 16> Worklist;
- Worklist.insert(&BBI);
- for (size_t wi = 0; wi < Worklist.size(); ++wi) {
- BBInfo *BBI = Worklist[wi];
- if (!BBI->Atom)
- continue;
- BBI->BB = &MCFN.createBlock(*BBI->Atom);
- // Add all predecessors and successors to the worklist.
- for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
- SI != SE; ++SI)
- Worklist.insert(*SI);
- for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
- PI != PE; ++PI)
- Worklist.insert(*PI);
- }
-
- // Set preds/succs.
- for (size_t wi = 0; wi < Worklist.size(); ++wi) {
- BBInfo *BBI = Worklist[wi];
- MCBasicBlock *MCBB = BBI->BB;
- if (!MCBB)
- continue;
- for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
- SI != SE; ++SI)
- if ((*SI)->BB)
- MCBB->addSuccessor((*SI)->BB);
- for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
- PI != PE; ++PI)
- if ((*PI)->BB)
- MCBB->addPredecessor((*PI)->BB);
- }
- }
-}
-
-// Basic idea of the disassembly + discovery:
-//
-// start with the wanted address, insert it in the worklist
-// while worklist not empty, take next address in the worklist:
-// - check if atom exists there
-// - if middle of atom:
-// - split basic blocks referencing the atom
-// - look for an already encountered BBInfo (using a map<atom, bbinfo>)
-// - if there is, split it (new one, fallthrough, move succs, etc..)
-// - if start of atom: nothing else to do
-// - if no atom: create new atom and new bbinfo
-// - look at the last instruction in the atom, add succs to worklist
-// for all elements in the worklist:
-// - create basic block, update preds/succs, etc..
-//
-MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
- uint64_t BBBeginAddr,
- AddressSetTy &CallTargets,
- AddressSetTy &TailCallTargets) {
- typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
- typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
- BBInfoByAddrTy BBInfos;
- AddrWorklistTy Worklist;
-
- Worklist.insert(BBBeginAddr);
- for (size_t wi = 0; wi < Worklist.size(); ++wi) {
- const uint64_t BeginAddr = Worklist[wi];
- BBInfo *BBI = &BBInfos[BeginAddr];
-
- MCTextAtom *&TA = BBI->Atom;
- assert(!TA && "Discovered basic block already has an associated atom!");
-
- // Look for an atom at BeginAddr.
- if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
- // FIXME: We don't care about mixed atoms, see above.
- TA = cast<MCTextAtom>(A);
-
- // The found atom doesn't begin at BeginAddr, we have to split it.
- if (TA->getBeginAddr() != BeginAddr) {
- // FIXME: Handle overlapping atoms: middle-starting instructions, etc..
- MCTextAtom *NewTA = TA->split(BeginAddr);
-
- // Look for an already encountered basic block that needs splitting
- BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
- if (It != BBInfos.end() && It->second.Atom) {
- BBI->SuccAddrs = It->second.SuccAddrs;
- It->second.SuccAddrs.clear();
- It->second.SuccAddrs.push_back(BeginAddr);
- }
- TA = NewTA;
- }
- BBI->Atom = TA;
- } else {
- // If we didn't find an atom, then we have to disassemble to create one!
-
- MemoryObject *Region = getRegionFor(BeginAddr);
- if (!Region)
- llvm_unreachable(("Couldn't find suitable region for disassembly at " +
- utostr(BeginAddr)).c_str());
-
- uint64_t InstSize;
- uint64_t EndAddr = Region->getBase() + Region->getExtent();
-
- // We want to stop before the next atom and have a fallthrough to it.
- if (MCTextAtom *NextAtom =
- cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
- EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
-
- for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
- MCInst Inst;
- if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
- nulls())) {
- if (!TA)
- TA = Module->createTextAtom(Addr, Addr);
- TA->addInst(Inst, InstSize);
- } else {
- // We don't care about splitting mixed atoms either.
- llvm_unreachable("Couldn't disassemble instruction in atom.");
- }
-
- uint64_t BranchTarget;
- if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
- if (MIA.isCall(Inst))
- CallTargets.push_back(BranchTarget);
- }
-
- if (MIA.isTerminator(Inst))
- break;
- }
- BBI->Atom = TA;
- }
-
- assert(TA && "Couldn't disassemble atom, none was created!");
- assert(TA->begin() != TA->end() && "Empty atom!");
-
- MemoryObject *Region = getRegionFor(TA->getBeginAddr());
- assert(Region && "Couldn't find region for already disassembled code!");
- uint64_t EndRegion = Region->getBase() + Region->getExtent();
-
- // Now we have a basic block atom, add successors.
- // Add the fallthrough block.
- if ((MIA.isConditionalBranch(TA->back().Inst) ||
- !MIA.isTerminator(TA->back().Inst)) &&
- (TA->getEndAddr() + 1 < EndRegion)) {
- BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
- Worklist.insert(TA->getEndAddr() + 1);
- }
-
- // If the terminator is a branch, add the target block.
- if (MIA.isBranch(TA->back().Inst)) {
- uint64_t BranchTarget;
- if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
- TA->back().Size, BranchTarget)) {
- StringRef ExtFnName;
- if (MOS)
- ExtFnName =
- MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
- if (!ExtFnName.empty()) {
- TailCallTargets.push_back(BranchTarget);
- CallTargets.push_back(BranchTarget);
- } else {
- BBI->SuccAddrs.push_back(BranchTarget);
- Worklist.insert(BranchTarget);
- }
- }
- }
- }
-
- for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
- const uint64_t BeginAddr = Worklist[wi];
- BBInfo *BBI = &BBInfos[BeginAddr];
-
- assert(BBI->Atom && "Found a basic block without an associated atom!");
-
- // Look for a basic block at BeginAddr.
- BBI->BB = MCFN->find(BeginAddr);
- if (BBI->BB) {
- // FIXME: check that the succs/preds are the same
- continue;
- }
- // If there was none, we have to create one from the atom.
- BBI->BB = &MCFN->createBlock(*BBI->Atom);
- }
-
- for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
- const uint64_t BeginAddr = Worklist[wi];
- BBInfo *BBI = &BBInfos[BeginAddr];
- MCBasicBlock *BB = BBI->BB;
-
- RemoveDupsFromAddressVector(BBI->SuccAddrs);
- for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
- SE = BBI->SuccAddrs.end();
- SE != SE; ++SI) {
- MCBasicBlock *Succ = BBInfos[*SI].BB;
- BB->addSuccessor(Succ);
- Succ->addPredecessor(BB);
- }
- }
-
- assert(BBInfos[Worklist[0]].BB &&
- "No basic block created at requested address?");
-
- return BBInfos[Worklist[0]].BB;
-}
-
-MCFunction *
-MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
- AddressSetTy &CallTargets,
- AddressSetTy &TailCallTargets) {
- // First, check if this is an external function.
- StringRef ExtFnName;
- if (MOS)
- ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr));
- if (!ExtFnName.empty())
- return Module->createFunction(ExtFnName);
-
- // If it's not, look for an existing function.
- for (MCModule::func_iterator FI = Module->func_begin(),
- FE = Module->func_end();
- FI != FE; ++FI) {
- if ((*FI)->empty())
- continue;
- // FIXME: MCModule should provide a findFunctionByAddr()
- if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
- return FI->get();
- }
-
- // Finally, just create a new one.
- MCFunction *MCFN = Module->createFunction("");
- getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
- return MCFN;
-}
-
-// MachO MCObjectDisassembler implementation.
-
-MCMachOObjectDisassembler::MCMachOObjectDisassembler(
- const MachOObjectFile &MOOF, const MCDisassembler &Dis,
- const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
- uint64_t HeaderLoadAddress)
- : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
- VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
-
- for (const SectionRef &Section : MOOF.sections()) {
- StringRef Name;
- Section.getName(Name);
- // FIXME: We should use the S_ section type instead of the name.
- if (Name == "__mod_init_func") {
- DEBUG(dbgs() << "Found __mod_init_func section!\n");
- Section.getContents(ModInitContents);
- } else if (Name == "__mod_exit_func") {
- DEBUG(dbgs() << "Found __mod_exit_func section!\n");
- Section.getContents(ModExitContents);
- }
- }
-}
-
-// FIXME: Only do the translations for addresses actually inside the object.
-uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
- return Addr + VMAddrSlide;
-}
-
-uint64_t
-MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
- return EffectiveAddr - VMAddrSlide;
-}
-
-uint64_t MCMachOObjectDisassembler::getEntrypoint() {
- uint64_t EntryFileOffset = 0;
-
- // Look for LC_MAIN.
- {
- uint32_t LoadCommandCount = MOOF.getHeader().ncmds;
- MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
- for (unsigned I = 0;; ++I) {
- if (Load.C.cmd == MachO::LC_MAIN) {
- EntryFileOffset =
- ((const MachO::entry_point_command *)Load.Ptr)->entryoff;
- break;
- }
-
- if (I == LoadCommandCount - 1)
- break;
- else
- Load = MOOF.getNextLoadCommandInfo(Load);
- }
- }
-
- // If we didn't find anything, default to the common implementation.
- // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
- if (EntryFileOffset)
- return MCObjectDisassembler::getEntrypoint();
-
- return EntryFileOffset + HeaderLoadAddress;
-}
-
-ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
- // FIXME: We only handle 64bit mach-o
- assert(MOOF.is64Bit());
-
- size_t EntrySize = 8;
- size_t EntryCount = ModInitContents.size() / EntrySize;
- return ArrayRef<uint64_t>(
- reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
-}
-
-ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
- // FIXME: We only handle 64bit mach-o
- assert(MOOF.is64Bit());
-
- size_t EntrySize = 8;
- size_t EntryCount = ModExitContents.size() / EntrySize;
- return ArrayRef<uint64_t>(
- reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
-}
diff --git a/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp
deleted file mode 100644
index b149596..0000000
--- a/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCObjectSymbolizer.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRelocationInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-
-using namespace llvm;
-using namespace object;
-
-//===- MCMachObjectSymbolizer ---------------------------------------------===//
-
-namespace {
-class MCMachObjectSymbolizer : public MCObjectSymbolizer {
- const MachOObjectFile *MOOF;
- // __TEXT;__stubs support.
- uint64_t StubsStart;
- uint64_t StubsCount;
- uint64_t StubSize;
- uint64_t StubsIndSymIndex;
-
-public:
- MCMachObjectSymbolizer(MCContext &Ctx,
- std::unique_ptr<MCRelocationInfo> RelInfo,
- const MachOObjectFile *MOOF);
-
- StringRef findExternalFunctionAt(uint64_t Addr) override;
-
- void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
- uint64_t Address) override;
-};
-} // End unnamed namespace
-
-MCMachObjectSymbolizer::MCMachObjectSymbolizer(
- MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
- const MachOObjectFile *MOOF)
- : MCObjectSymbolizer(Ctx, std::move(RelInfo), MOOF), MOOF(MOOF),
- StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) {
-
- for (const SectionRef &Section : MOOF->sections()) {
- StringRef Name;
- Section.getName(Name);
- if (Name == "__stubs") {
- SectionRef StubsSec = Section;
- if (MOOF->is64Bit()) {
- MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl());
- StubsIndSymIndex = S.reserved1;
- StubSize = S.reserved2;
- } else {
- MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl());
- StubsIndSymIndex = S.reserved1;
- StubSize = S.reserved2;
- }
- assert(StubSize && "Mach-O stub entry size can't be zero!");
- StubsSec.getAddress(StubsStart);
- StubsSec.getSize(StubsCount);
- StubsCount /= StubSize;
- }
- }
-}
-
-StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
- // FIXME: also, this can all be done at the very beginning, by iterating over
- // all stubs and creating the calls to outside functions. Is it worth it
- // though?
- if (!StubSize)
- return StringRef();
- uint64_t StubIdx = (Addr - StubsStart) / StubSize;
- if (StubIdx >= StubsCount)
- return StringRef();
-
- uint32_t SymtabIdx =
- MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx);
-
- StringRef SymName;
- symbol_iterator SI = MOOF->symbol_begin();
- for (uint32_t i = 0; i != SymtabIdx; ++i)
- ++SI;
- SI->getName(SymName);
- assert(SI != MOOF->symbol_end() && "Stub wasn't found in the symbol table!");
- assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!");
- return SymName.substr(1);
-}
-
-void MCMachObjectSymbolizer::
-tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
- uint64_t Address) {
- if (const RelocationRef *R = findRelocationAt(Address)) {
- const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R);
- if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false)
- return;
- }
- uint64_t Addr = Value;
- if (const SectionRef *S = findSectionContaining(Addr)) {
- StringRef Name; S->getName(Name);
- uint64_t SAddr; S->getAddress(SAddr);
- if (Name == "__cstring") {
- StringRef Contents;
- S->getContents(Contents);
- Contents = Contents.substr(Addr - SAddr);
- cStream << " ## literal pool for: "
- << Contents.substr(0, Contents.find_first_of(0));
- }
- }
-}
-
-//===- MCObjectSymbolizer -------------------------------------------------===//
-
-MCObjectSymbolizer::MCObjectSymbolizer(
- MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
- const ObjectFile *Obj)
- : MCSymbolizer(Ctx, std::move(RelInfo)), Obj(Obj), SortedSections(),
- AddrToReloc() {}
-
-bool MCObjectSymbolizer::
-tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream,
- int64_t Value, uint64_t Address, bool IsBranch,
- uint64_t Offset, uint64_t InstSize) {
- if (IsBranch) {
- StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value);
- if (!ExtFnName.empty()) {
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName);
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
- MI.addOperand(MCOperand::CreateExpr(Expr));
- return true;
- }
- }
-
- if (const RelocationRef *R = findRelocationAt(Address + Offset)) {
- if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) {
- MI.addOperand(MCOperand::CreateExpr(RelExpr));
- return true;
- }
- // Only try to create a symbol+offset expression if there is no relocation.
- return false;
- }
-
- // Interpret Value as a branch target.
- if (IsBranch == false)
- return false;
- uint64_t UValue = Value;
- // FIXME: map instead of looping each time?
- for (const SymbolRef &Symbol : Obj->symbols()) {
- uint64_t SymAddr;
- Symbol.getAddress(SymAddr);
- uint64_t SymSize;
- Symbol.getSize(SymSize);
- StringRef SymName;
- Symbol.getName(SymName);
- SymbolRef::Type SymType;
- Symbol.getType(SymType);
- if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize ||
- SymName.empty() || SymType != SymbolRef::ST_Function)
- continue;
-
- if ( SymAddr == UValue ||
- (SymAddr <= UValue && SymAddr + SymSize > UValue)) {
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
- if (SymAddr != UValue) {
- const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx);
- Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx);
- }
- MI.addOperand(MCOperand::CreateExpr(Expr));
- return true;
- }
- }
- return false;
-}
-
-void MCObjectSymbolizer::
-tryAddingPcLoadReferenceComment(raw_ostream &cStream,
- int64_t Value, uint64_t Address) {
-}
-
-StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
- return StringRef();
-}
-
-MCObjectSymbolizer *MCObjectSymbolizer::createObjectSymbolizer(
- MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
- const ObjectFile *Obj) {
- if (const MachOObjectFile *MOOF = dyn_cast<MachOObjectFile>(Obj))
- return new MCMachObjectSymbolizer(Ctx, std::move(RelInfo), MOOF);
- return new MCObjectSymbolizer(Ctx, std::move(RelInfo), Obj);
-}
-
-// SortedSections implementation.
-
-static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) {
- uint64_t SAddr; S.getAddress(SAddr);
- return SAddr < Addr;
-}
-
-const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) {
- if (SortedSections.empty())
- buildSectionList();
-
- SortedSectionList::iterator
- EndIt = SortedSections.end(),
- It = std::lower_bound(SortedSections.begin(), EndIt,
- Addr, SectionStartsBefore);
- if (It == EndIt)
- return nullptr;
- uint64_t SAddr; It->getAddress(SAddr);
- uint64_t SSize; It->getSize(SSize);
- if (Addr >= SAddr + SSize)
- return nullptr;
- return &*It;
-}
-
-const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) {
- if (AddrToReloc.empty())
- buildRelocationByAddrMap();
-
- AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr);
- if (RI == AddrToReloc.end())
- return nullptr;
- return &RI->second;
-}
-
-void MCObjectSymbolizer::buildSectionList() {
- for (const SectionRef &Section : Obj->sections()) {
- bool RequiredForExec;
- Section.isRequiredForExecution(RequiredForExec);
- if (RequiredForExec == false)
- continue;
- uint64_t SAddr;
- Section.getAddress(SAddr);
- uint64_t SSize;
- Section.getSize(SSize);
- SortedSectionList::iterator It =
- std::lower_bound(SortedSections.begin(), SortedSections.end(), SAddr,
- SectionStartsBefore);
- if (It != SortedSections.end()) {
- uint64_t FoundSAddr; It->getAddress(FoundSAddr);
- if (FoundSAddr < SAddr + SSize)
- llvm_unreachable("Inserting overlapping sections");
- }
- SortedSections.insert(It, Section);
- }
-}
-
-void MCObjectSymbolizer::buildRelocationByAddrMap() {
- for (const SectionRef &Section : Obj->sections()) {
- for (const RelocationRef &Reloc : Section.relocations()) {
- uint64_t Address;
- Reloc.getAddress(Address);
- // At a specific address, only keep the first relocation.
- if (AddrToReloc.find(Address) == AddrToReloc.end())
- AddrToReloc[Address] = Reloc;
- }
- }
-}
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index f8081ef..2fb558f 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -32,7 +32,6 @@ MCAsmInfo::MCAsmInfo() {
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
HasStaticCtorDtorReferenceInStaticMode = false;
- LinkerRequiresNonEmptyDwarfLines = false;
MaxInstLength = 4;
MinInstAlignment = 1;
DollarIsPC = false;
@@ -64,7 +63,7 @@ MCAsmInfo::MCAsmInfo() {
GPRel64Directive = nullptr;
GPRel32Directive = nullptr;
GlobalDirective = "\t.globl\t";
- HasSetDirective = true;
+ SetDirectiveSuppressesReloc = false;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = true;
LCOMMDirectiveAlignmentType = LCOMM::NoAlignment;
@@ -79,10 +78,9 @@ MCAsmInfo::MCAsmInfo() {
HiddenVisibilityAttr = MCSA_Hidden;
HiddenDeclarationVisibilityAttr = MCSA_Hidden;
ProtectedVisibilityAttr = MCSA_Protected;
- HasLEB128 = false;
SupportsDebugInformation = false;
ExceptionsType = ExceptionHandling::None;
- WinEHEncodingType = WinEH::EncodingType::ET_Invalid;
+ WinEHEncodingType = WinEH::EncodingType::Invalid;
DwarfUsesRelocationsAcrossSections = true;
DwarfFDESymbolsUseAbsDiff = false;
DwarfRegNumForCFI = false;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 9945637..bb3f0d3 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -32,7 +32,6 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
ProtectedVisibilityAttr = MCSA_Invalid;
// Set up DWARF directives
- HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
SupportsDebugInformation = true;
NeedsDwarfSectionOffsetDirective = true;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index eaf28dd..66a138b 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -42,9 +42,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HasMachoTBSSDirective = true; // Uses .tbss
HasStaticCtorDtorReferenceInStaticMode = true;
- // FIXME: Darwin 10 and newer don't need this.
- LinkerRequiresNonEmptyDwarfLines = true;
-
// FIXME: Change this once MC is the system assembler.
HasAggressiveSymbolFolding = false;
@@ -60,4 +57,5 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
DwarfUsesRelocationsAcrossSections = false;
UseIntegratedAssembler = true;
+ SetDirectiveSuppressesReloc = true;
}
diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp
index ccb3dc3..9f70d8d 100644
--- a/lib/MC/MCAsmInfoELF.cpp
+++ b/lib/MC/MCAsmInfoELF.cpp
@@ -13,10 +13,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
void MCAsmInfoELF::anchor() { }
+const MCSection *
+MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
+}
+
MCAsmInfoELF::MCAsmInfoELF() {
HasIdentDirective = true;
WeakRefDirective = "\t.weak\t";
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 6973bbb..f60c7fc 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -700,7 +700,6 @@ void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
EmitULEB128IntValue(IntValue);
return;
}
- assert(MAI->hasLEB128() && "Cannot print a .uleb");
OS << ".uleb128 " << *Value;
EmitEOL();
}
@@ -711,7 +710,6 @@ void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
EmitSLEB128IntValue(IntValue);
return;
}
- assert(MAI->hasLEB128() && "Cannot print a .sleb");
OS << ".sleb128 " << *Value;
EmitEOL();
}
@@ -1089,19 +1087,6 @@ void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind,
EmitEOL();
}
-static const MCSection *getWin64EHTableSection(StringRef suffix,
- MCContext &context) {
- // FIXME: This doesn't belong in MCObjectFileInfo. However,
- /// this duplicate code in MCWin64EH.cpp.
- if (suffix == "")
- return context.getObjectFileInfo()->getXDataSection();
- return context.getCOFFSection((".xdata"+suffix).str(),
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-}
-
void MCAsmStreamer::EmitWinEHHandlerData() {
MCStreamer::EmitWinEHHandlerData();
@@ -1109,11 +1094,10 @@ void MCAsmStreamer::EmitWinEHHandlerData() {
// cause the section switch to be visible in the emitted assembly.
// We only do this so the section switch that terminates the handler
// data block is visible.
- MCWin64EHUnwindInfo *CurFrame = getCurrentW64UnwindInfo();
- StringRef suffix=MCWin64EHUnwindEmitter::GetSectionSuffix(CurFrame->Function);
- const MCSection *xdataSect = getWin64EHTableSection(suffix, getContext());
- if (xdataSect)
- SwitchSectionNoChange(xdataSect);
+ WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo();
+ if (const MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
+ CurFrame->Function, getContext()))
+ SwitchSectionNoChange(XData);
OS << "\t.seh_handlerdata";
EmitEOL();
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index a8aad71..85d0c13 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -141,7 +141,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout,
// If SD is a variable, evaluate it.
MCValue Target;
- if (!S.getVariableValue()->EvaluateAsValue(Target, &Layout))
+ if (!S.getVariableValue()->EvaluateAsValue(Target, &Layout, nullptr))
report_fatal_error("unable to evaluate offset for variable '" +
S.getName() + "'");
@@ -187,7 +187,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
const MCExpr *Expr = Symbol.getVariableValue();
MCValue Value;
- if (!Expr->EvaluateAsValue(Value, this))
+ if (!Expr->EvaluateAsValue(Value, this, nullptr))
llvm_unreachable("Invalid Expression");
const MCSymbolRefExpr *RefB = Value.getSymB();
@@ -291,7 +291,9 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
: Section(&_Section),
Ordinal(~UINT32_C(0)),
Alignment(1),
- BundleLockState(NotBundleLocked), BundleGroupBeforeFirstInst(false),
+ BundleLockState(NotBundleLocked),
+ BundleLockNestingDepth(0),
+ BundleGroupBeforeFirstInst(false),
HasInstructions(false)
{
if (A)
@@ -328,17 +330,33 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
return IP;
}
+void MCSectionData::setBundleLockState(BundleLockStateType NewState) {
+ if (NewState == NotBundleLocked) {
+ if (BundleLockNestingDepth == 0) {
+ report_fatal_error("Mismatched bundle_lock/unlock directives");
+ }
+ if (--BundleLockNestingDepth == 0) {
+ BundleLockState = NotBundleLocked;
+ }
+ return;
+ }
+
+ // If any of the directives is an align_to_end directive, the whole nested
+ // group is align_to_end. So don't downgrade from align_to_end to just locked.
+ if (BundleLockState != BundleLockedAlignToEnd) {
+ BundleLockState = NewState;
+ }
+ ++BundleLockNestingDepth;
+}
+
/* *** */
MCSymbolData::MCSymbolData() : Symbol(nullptr) {}
MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
uint64_t _Offset, MCAssembler *A)
- : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
- IsExternal(false), IsPrivateExtern(false),
- CommonSize(0), SymbolSize(nullptr), CommonAlign(0),
- Flags(0), Index(0)
-{
+ : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
+ SymbolSize(nullptr), CommonAlign(-1U), Flags(0), Index(0) {
if (A)
A->getSymbolList().push_back(this);
}
@@ -348,9 +366,9 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
raw_ostream &OS_)
- : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
- OS(OS_), BundleAlignSize(0), RelaxAll(false), NoExecStack(false),
- SubsectionsViaSymbols(false), ELFHeaderEFlags(0) {
+ : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
+ OS(OS_), BundleAlignSize(0), RelaxAll(false),
+ SubsectionsViaSymbols(false), ELFHeaderEFlags(0) {
VersionMinInfo.Major = 0; // Major version == 0 for "none specified"
}
@@ -364,11 +382,15 @@ void MCAssembler::reset() {
SymbolMap.clear();
IndirectSymbols.clear();
DataRegions.clear();
+ LinkerOptions.clear();
+ FileNames.clear();
ThumbFuncs.clear();
+ BundleAlignSize = 0;
RelaxAll = false;
- NoExecStack = false;
SubsectionsViaSymbols = false;
ELFHeaderEFlags = 0;
+ LOHContainer.reset();
+ VersionMinInfo.Major = 0;
// reset objects owned by us
getBackend().reset();
@@ -438,11 +460,12 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
// a relocatable expr.
// FIXME: Should this be the behavior of EvaluateAsRelocatable itself?
static bool evaluate(const MCExpr &Expr, const MCAsmLayout &Layout,
- MCValue &Target) {
- if (Expr.EvaluateAsValue(Target, &Layout))
+ const MCFixup &Fixup, MCValue &Target) {
+ if (Expr.EvaluateAsValue(Target, &Layout, &Fixup)) {
if (Target.isAbsolute())
return true;
- return Expr.EvaluateAsRelocatable(Target, &Layout);
+ }
+ return Expr.EvaluateAsRelocatable(Target, &Layout, &Fixup);
}
bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
@@ -454,7 +477,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
// probably merge the two into a single callback that tries to evaluate a
// fixup and records a relocation if one is needed.
const MCExpr *Expr = Fixup.getValue();
- if (!evaluate(*Expr, Layout, Target))
+ if (!evaluate(*Expr, Layout, Fixup, Target))
getContext().FatalError(Fixup.getLoc(), "expected relocatable expression");
bool IsPCRel = Backend.getFixupKindInfo(
@@ -993,11 +1016,8 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
}
bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
- int64_t Value = 0;
uint64_t OldSize = LF.getContents().size();
- bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
- (void)IsAbs;
- assert(IsAbs);
+ int64_t Value = LF.getValue().evaluateKnownAbsolute(Layout);
SmallString<8> &Data = LF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
@@ -1012,11 +1032,8 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
MCDwarfLineAddrFragment &DF) {
MCContext &Context = Layout.getAssembler().getContext();
- int64_t AddrDelta = 0;
uint64_t OldSize = DF.getContents().size();
- bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
- (void)IsAbs;
- assert(IsAbs);
+ int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout);
int64_t LineDelta;
LineDelta = DF.getLineDelta();
SmallString<8> &Data = DF.getContents();
@@ -1030,11 +1047,8 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
MCDwarfCallFrameFragment &DF) {
MCContext &Context = Layout.getAssembler().getContext();
- int64_t AddrDelta = 0;
uint64_t OldSize = DF.getContents().size();
- bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
- (void)IsAbs;
- assert(IsAbs);
+ int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout);
SmallString<8> &Data = DF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
@@ -1247,8 +1261,10 @@ void MCSymbolData::dump() const {
raw_ostream &OS = llvm::errs();
OS << "<MCSymbolData Symbol:" << getSymbol()
- << " Fragment:" << getFragment() << " Offset:" << getOffset()
- << " Flags:" << getFlags() << " Index:" << getIndex();
+ << " Fragment:" << getFragment();
+ if (!isCommon())
+ OS << " Offset:" << getOffset();
+ OS << " Flags:" << getFlags() << " Index:" << getIndex();
if (isCommon())
OS << " (common, size:" << getCommonSize()
<< " align: " << getCommonAlignment() << ")";
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 960a071..8630b25 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -73,7 +73,10 @@ void MCContext::reset() {
Symbols.clear();
Allocator.Reset();
Instances.clear();
+ CompilationDir.clear();
+ MainFileName.clear();
MCDwarfLineTablesCUMap.clear();
+ SectionStartEndSyms.clear();
MCGenDwarfLabelEntries.clear();
DwarfDebugFlags = StringRef();
DwarfCompileUnitID = 0;
@@ -97,16 +100,33 @@ void MCContext::reset() {
MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
assert(!Name.empty() && "Normal symbols cannot be unnamed!");
- // Do the lookup and get the entire StringMapEntry. We want access to the
- // key if we are creating the entry.
- StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
- MCSymbol *Sym = Entry.getValue();
+ MCSymbol *&Sym = Symbols[Name];
+ if (!Sym)
+ Sym = CreateSymbol(Name);
+
+ return Sym;
+}
+
+MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
+ MCSymbol *&Sym = SectionSymbols[&Section];
if (Sym)
return Sym;
- Sym = CreateSymbol(Name);
- Entry.setValue(Sym);
+ StringRef Name = Section.getSectionName();
+
+ MCSymbol *&OldSym = Symbols[Name];
+ if (OldSym && OldSym->isUndefined()) {
+ Sym = OldSym;
+ return OldSym;
+ }
+
+ auto NameIter = UsedNames.insert(std::make_pair(Name, true)).first;
+ Sym = new (*this) MCSymbol(NameIter->getKey(), /*isTemporary*/ false);
+
+ if (!OldSym)
+ OldSym = Sym;
+
return Sym;
}
@@ -116,21 +136,21 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name) {
if (AllowTemporaryLabels)
isTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
- StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
- if (NameEntry->getValue()) {
+ auto NameEntry = UsedNames.insert(std::make_pair(Name, true));
+ if (!NameEntry.second) {
assert(isTemporary && "Cannot rename non-temporary symbols");
SmallString<128> NewName = Name;
do {
NewName.resize(Name.size());
raw_svector_ostream(NewName) << NextUniqueID++;
- NameEntry = &UsedNames.GetOrCreateValue(NewName);
- } while (NameEntry->getValue());
+ NameEntry = UsedNames.insert(std::make_pair(NewName, true));
+ } while (!NameEntry.second);
}
- NameEntry->setValue(true);
// Ok, the entry doesn't already exist. Have the MCSymbol object itself refer
// to the copy of the string that is embedded in the UsedNames entry.
- MCSymbol *Result = new (*this) MCSymbol(NameEntry->getKey(), isTemporary);
+ MCSymbol *Result =
+ new (*this) MCSymbol(NameEntry.first->getKey(), isTemporary);
return Result;
}
@@ -291,7 +311,7 @@ const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
if (!IterBool.second)
return Iter->second;
- const MCSymbol *COMDATSymbol = nullptr;
+ MCSymbol *COMDATSymbol = nullptr;
if (!COMDATSymName.empty())
COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
@@ -317,6 +337,22 @@ const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
return Iter->second;
}
+const MCSectionCOFF *
+MCContext::getAssociativeCOFFSection(const MCSectionCOFF *Sec,
+ const MCSymbol *KeySym) {
+ // Return the normal section if we don't have to be associative.
+ if (!KeySym)
+ return Sec;
+
+ // Make an associative section with the same name and kind as the normal
+ // section.
+ unsigned Characteristics =
+ Sec->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT;
+ return getCOFFSection(Sec->getSectionName(), Characteristics, Sec->getKind(),
+ KeySym->getName(),
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
+}
+
//===----------------------------------------------------------------------===//
// Dwarf Management
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCDisassembler/Android.mk b/lib/MC/MCDisassembler/Android.mk
index 7f73df3..87455e2 100644
--- a/lib/MC/MCDisassembler/Android.mk
+++ b/lib/MC/MCDisassembler/Android.mk
@@ -1,15 +1,37 @@
LOCAL_PATH:= $(call my-dir)
+mc_disassembler_SRC_FILES := \
+ Disassembler.cpp \
+ MCDisassembler.cpp \
+ MCExternalSymbolizer.cpp \
+ MCRelocationInfo.cpp
+
+
# For the host
# =====================================================
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := \
- Disassembler.cpp
+LOCAL_SRC_FILES := $(mc_disassembler_SRC_FILES)
LOCAL_MODULE:= libLLVMMCDisassembler
LOCAL_MODULE_TAGS := optional
+
include $(LLVM_HOST_BUILD_MK)
include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
+
+LOCAL_SRC_FILES := $(mc_disassembler_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMCDisassembler
+
+LOCAL_MODULE_TAGS := optional
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 5195b9e..f266f8f 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -1,3 +1,6 @@
add_llvm_library(LLVMMCDisassembler
Disassembler.cpp
+ MCRelocationInfo.cpp
+ MCExternalSymbolizer.cpp
+ MCDisassembler.cpp
)
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 0530c26..d0d7f30 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -21,7 +21,6 @@
#include "llvm/MC/MCSymbolizer.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -33,10 +32,11 @@ using namespace llvm;
// functions can all be passed as NULL. If successful, this returns a
// disassembler context. If not, it returns NULL.
//
-LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
- void *DisInfo, int TagType,
- LLVMOpInfoCallback GetOpInfo,
- LLVMSymbolLookupCallback SymbolLookUp){
+LLVMDisasmContextRef
+LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
+ const char *Features, void *DisInfo, int TagType,
+ LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp) {
// Get the target.
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
@@ -56,11 +56,8 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
if (!MII)
return nullptr;
- // Package up features to be passed to target/subtarget
- std::string FeaturesStr;
-
const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
- FeaturesStr);
+ Features);
if (!STI)
return nullptr;
@@ -101,11 +98,19 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
return DC;
}
+LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
+ void *DisInfo, int TagType,
+ LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp){
+ return LLVMCreateDisasmCPUFeatures(Triple, CPU, "", DisInfo, TagType,
+ GetOpInfo, SymbolLookUp);
+}
+
LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo,
int TagType, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp) {
- return LLVMCreateDisasmCPU(Triple, "", DisInfo, TagType, GetOpInfo,
- SymbolLookUp);
+ return LLVMCreateDisasmCPUFeatures(Triple, "", "", DisInfo, TagType,
+ GetOpInfo, SymbolLookUp);
}
//
@@ -116,30 +121,6 @@ void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
delete DC;
}
-namespace {
-//
-// The memory object created by LLVMDisasmInstruction().
-//
-class DisasmMemoryObject : public MemoryObject {
- uint8_t *Bytes;
- uint64_t Size;
- uint64_t BasePC;
-public:
- DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
- Bytes(bytes), Size(size), BasePC(basePC) {}
-
- uint64_t getBase() const override { return BasePC; }
- uint64_t getExtent() const override { return Size; }
-
- int readByte(uint64_t Addr, uint8_t *Byte) const override {
- if (Addr - BasePC >= Size)
- return -1;
- *Byte = Bytes[Addr - BasePC];
- return 0;
- }
-};
-} // end anonymous namespace
-
/// \brief Emits the comments that are stored in \p DC comment stream.
/// Each comment in the comment stream must end with a newline.
static void emitComments(LLVMDisasmContext *DC,
@@ -202,19 +183,19 @@ static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
// Try to compute scheduling information.
const MCSubtargetInfo *STI = DC->getSubtargetInfo();
- const MCSchedModel *SCModel = STI->getSchedModel();
+ const MCSchedModel SCModel = STI->getSchedModel();
const int NoInformationAvailable = -1;
// Check if we have a scheduling model for instructions.
- if (!SCModel || !SCModel->hasInstrSchedModel())
- // Try to fall back to the itinerary model if we do not have a
- // scheduling model.
+ if (!SCModel.hasInstrSchedModel())
+ // Try to fall back to the itinerary model if the scheduling model doesn't
+ // have a scheduling table. Note the default does not have a table.
return getItineraryLatency(DC, Inst);
// Get the scheduling class of the requested instruction.
const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
unsigned SCClass = Desc.getSchedClass();
- const MCSchedClassDesc *SCDesc = SCModel->getSchedClassDesc(SCClass);
+ const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass);
// Resolving the variant SchedClass requires an MI to pass to
// SubTargetInfo::resolveSchedClass.
if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant())
@@ -263,7 +244,7 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
size_t OutStringSize){
LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
// Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject.
- DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC);
+ ArrayRef<uint8_t> Data(Bytes, BytesSize);
uint64_t Size;
MCInst Inst;
@@ -272,7 +253,7 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
MCDisassembler::DecodeStatus S;
SmallVector<char, 64> InsnStr;
raw_svector_ostream Annotations(InsnStr);
- S = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+ S = DisAsm->getInstruction(Inst, Size, Data, PC,
/*REMOVE*/ nulls(), Annotations);
switch (S) {
case MCDisassembler::Fail:
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index d1d40cd..46d0c4c 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_MC_DISASSEMBLER_H
-#define LLVM_MC_DISASSEMBLER_H
+#ifndef LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H
+#define LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H
#include "llvm-c/Disassembler.h"
#include "llvm/ADT/SmallString.h"
diff --git a/lib/MC/MCDisassembler.cpp b/lib/MC/MCDisassembler/MCDisassembler.cpp
index 77d9ce1..1084e5e 100644
--- a/lib/MC/MCDisassembler.cpp
+++ b/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -1,4 +1,4 @@
-//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===//
+//===-- MCDisassembler.cpp - Disassembler interface -----------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/MC/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 7c3073a..0145623 100644
--- a/lib/MC/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -1,4 +1,4 @@
-//===-- lib/MC/MCExternalSymbolizer.cpp - External symbolizer ---*- C++ -*-===//
+//===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/MC/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index a00c009..ff0c27f 100644
--- a/lib/MC/MCRelocationInfo.cpp
+++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -1,4 +1,4 @@
-//==-- lib/MC/MCRelocationInfo.cpp -------------------------------*- C++ -*-==//
+//==-- MCRelocationInfo.cpp ------------------------------------------------==//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 0a3fab8..5effb01 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -247,6 +247,22 @@ std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS)
return Emit(MCOS, StandardOpcodeLengths);
}
+static const MCExpr *forceExpAbs(MCStreamer &OS, const MCExpr* Expr) {
+ MCContext &Context = OS.getContext();
+ assert(!isa<MCSymbolRefExpr>(Expr));
+ if (Context.getAsmInfo()->hasAggressiveSymbolFolding())
+ return Expr;
+
+ MCSymbol *ABS = Context.CreateTempSymbol();
+ OS.EmitAssignment(ABS, Expr);
+ return MCSymbolRefExpr::Create(ABS, Context);
+}
+
+static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) {
+ const MCExpr *ABS = forceExpAbs(OS, Value);
+ OS.EmitValue(ABS, Size);
+}
+
std::pair<MCSymbol *, MCSymbol *>
MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
ArrayRef<char> StandardOpcodeLengths) const {
@@ -265,8 +281,8 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
// The first 4 bytes is the total length of the information for this
// compilation unit (not including these 4 bytes for the length).
- MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
- 4);
+ emitAbsValue(*MCOS,
+ MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym, 4), 4);
// Next 2 bytes is the Version, which is Dwarf 2.
MCOS->EmitIntValue(2, 2);
@@ -278,8 +294,9 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
// section to the end of the prologue. Not including the 4 bytes for the
// total length, the 2 bytes for the version, and these 4 bytes for the
// length of the prologue.
- MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
- (4 + 2 + 4)), 4);
+ emitAbsValue(
+ *MCOS,
+ MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym, (4 + 2 + 4)), 4);
// Parameters of the state machine, are next.
MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1);
@@ -327,18 +344,6 @@ void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS) const {
for (const auto &LineSec : MCLineSections.getMCLineEntries())
EmitDwarfLineTable(MCOS, LineSec.first, LineSec.second);
- if (MCOS->getContext().getAsmInfo()->getLinkerRequiresNonEmptyDwarfLines() &&
- MCLineSections.getMCLineEntries().empty()) {
- // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
- // it requires:
- // total_length >= prologue_length + 10
- // We are 4 bytes short, since we have total_length = 51 and
- // prologue_length = 45
-
- // The regular end_sequence should be sufficient.
- MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
- }
-
// This is the end of the section, so set the value of the symbol at the end
// of this section (that was used in a previous expression).
MCOS->EmitLabel(LineEndSym);
@@ -363,10 +368,10 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory,
FileNumber = SourceIdMap.size() + 1;
assert((MCDwarfFiles.empty() || FileNumber == MCDwarfFiles.size()) &&
"Don't mix autonumbered and explicit numbered line table usage");
- StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(
- (Directory + Twine('\0') + FileName).str(), FileNumber);
- if (Ent.getValue() != FileNumber)
- return Ent.getValue();
+ auto IterBool = SourceIdMap.insert(
+ std::make_pair((Directory + Twine('\0') + FileName).str(), FileNumber));
+ if (!IterBool.second)
+ return IterBool.first->second;
}
// Make space for this FileNumber in the MCDwarfFiles vector if needed.
MCDwarfFiles.resize(FileNumber + 1);
@@ -519,7 +524,8 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
- if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1) {
+ if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1 &&
+ MCOS->getContext().getDwarfVersion() >= 3) {
EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4);
} else {
EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
@@ -596,7 +602,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
// The 4 byte offset to the compile unit in the .debug_info from the start
// of the .debug_info.
if (InfoSectionSymbol)
- MCOS->EmitSymbolValue(InfoSectionSymbol, 4);
+ MCOS->EmitSymbolValue(InfoSectionSymbol, 4,
+ asmInfo->needsDwarfSectionOffsetDirective());
else
MCOS->EmitIntValue(0, 4);
// The 1 byte size of an address.
@@ -620,7 +627,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
*StartSymbol, *EndSymbol, 0);
MCOS->EmitValue(Addr, AddrSize);
- MCOS->EmitAbsValue(Size, AddrSize);
+ emitAbsValue(*MCOS, Size, AddrSize);
}
// And finally the pair of terminating zeros.
@@ -650,18 +657,19 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// The 4 byte total length of the information for this compilation unit, not
// including these 4 bytes.
const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
- MCOS->EmitAbsValue(Length, 4);
+ emitAbsValue(*MCOS, Length, 4);
// The 2 byte DWARF version.
MCOS->EmitIntValue(context.getDwarfVersion(), 2);
+ const MCAsmInfo &AsmInfo = *context.getAsmInfo();
// The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
// it is at the start of that section so this is zero.
- if (AbbrevSectionSymbol) {
- MCOS->EmitSymbolValue(AbbrevSectionSymbol, 4);
- } else {
+ if (AbbrevSectionSymbol == nullptr)
MCOS->EmitIntValue(0, 4);
- }
+ else
+ MCOS->EmitSymbolValue(AbbrevSectionSymbol, 4,
+ AsmInfo.needsDwarfSectionOffsetDirective());
const MCAsmInfo *asmInfo = context.getAsmInfo();
int AddrSize = asmInfo->getPointerSize();
@@ -675,11 +683,11 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section,
// which is at the start of that section so this is zero.
- if (LineSectionSymbol) {
- MCOS->EmitSymbolValue(LineSectionSymbol, 4);
- } else {
+ if (LineSectionSymbol)
+ MCOS->EmitSymbolValue(LineSectionSymbol, 4,
+ AsmInfo.needsDwarfSectionOffsetDirective());
+ else
MCOS->EmitIntValue(0, 4);
- }
if (RangesSectionSymbol) {
// There are multiple sections containing code, so we must use the
@@ -740,14 +748,10 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// AT_producer, the version of the assembler tool.
StringRef DwarfDebugProducer = context.getDwarfDebugProducer();
- if (!DwarfDebugProducer.empty()){
+ if (!DwarfDebugProducer.empty())
MCOS->EmitBytes(DwarfDebugProducer);
- }
- else {
- MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "));
- MCOS->EmitBytes(StringRef(PACKAGE_VERSION));
- MCOS->EmitBytes(StringRef(")"));
- }
+ else
+ MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM " PACKAGE_VERSION ")"));
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
// AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2
@@ -824,7 +828,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) {
const MCExpr *SectionSize = MakeStartMinusEndExpr(*MCOS,
*StartSymbol, *EndSymbol, 0);
MCOS->EmitIntValue(0, AddrSize);
- MCOS->EmitAbsValue(SectionSize, AddrSize);
+ emitAbsValue(*MCOS, SectionSize, AddrSize);
}
// Emit end of list entry
@@ -858,10 +862,11 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
if (MCOS->getContext().getGenDwarfSectionSyms().empty())
return;
- // We only need to use the .debug_ranges section if we have multiple
- // code sections.
+ // We only use the .debug_ranges section if we have multiple code sections,
+ // and we are emitting a DWARF version which supports it.
const bool UseRangesSection =
- MCOS->getContext().getGenDwarfSectionSyms().size() > 1;
+ MCOS->getContext().getGenDwarfSectionSyms().size() > 1 &&
+ MCOS->getContext().getDwarfVersion() >= 3;
CreateDwarfSectionSymbols |= UseRangesSection;
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
@@ -974,18 +979,16 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
}
}
-static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol,
- unsigned symbolEncoding, bool isEH,
- const char *comment = nullptr) {
+static void emitFDESymbol(MCObjectStreamer &streamer, const MCSymbol &symbol,
+ unsigned symbolEncoding, bool isEH) {
MCContext &context = streamer.getContext();
const MCAsmInfo *asmInfo = context.getAsmInfo();
const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol,
symbolEncoding,
streamer);
unsigned size = getSizeForEncoding(streamer, symbolEncoding);
- if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
if (asmInfo->doDwarfFDESymbolsUseAbsDiff() && isEH)
- streamer.EmitAbsValue(v, size);
+ emitAbsValue(streamer, v, size);
else
streamer.EmitValue(v, size);
}
@@ -1004,17 +1007,16 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
namespace {
class FrameEmitterImpl {
int CFAOffset;
- int CIENum;
bool IsEH;
const MCSymbol *SectionStart;
public:
FrameEmitterImpl(bool isEH)
- : CFAOffset(0), CIENum(0), IsEH(isEH), SectionStart(nullptr) {}
+ : CFAOffset(0), IsEH(isEH), SectionStart(nullptr) {}
void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
- /// EmitCompactUnwind - Emit the unwind information in a compact way.
- void EmitCompactUnwind(MCStreamer &streamer,
+ /// Emit the unwind information in a compact way.
+ void EmitCompactUnwind(MCObjectStreamer &streamer,
const MCDwarfFrameInfo &frame);
const MCSymbol &EmitCIE(MCObjectStreamer &streamer,
@@ -1036,65 +1038,18 @@ namespace {
} // end anonymous namespace
-static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
- StringRef Prefix) {
- if (Streamer.isVerboseAsm()) {
- const char *EncStr;
- switch (Encoding) {
- default: EncStr = "<unknown encoding>"; break;
- case dwarf::DW_EH_PE_absptr: EncStr = "absptr"; break;
- case dwarf::DW_EH_PE_omit: EncStr = "omit"; break;
- case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel"; break;
- case dwarf::DW_EH_PE_udata4: EncStr = "udata4"; break;
- case dwarf::DW_EH_PE_udata8: EncStr = "udata8"; break;
- case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4"; break;
- case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8"; break;
- case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
- EncStr = "pcrel udata4";
- break;
- case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
- EncStr = "pcrel sdata4";
- break;
- case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
- EncStr = "pcrel udata8";
- break;
- case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
- EncStr = "screl sdata8";
- break;
- case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
- EncStr = "indirect pcrel udata4";
- break;
- case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
- EncStr = "indirect pcrel sdata4";
- break;
- case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
- EncStr = "indirect pcrel udata8";
- break;
- case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
- EncStr = "indirect pcrel sdata8";
- break;
- }
-
- Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
- }
-
+static void emitEncodingByte(MCObjectStreamer &Streamer, unsigned Encoding) {
Streamer.EmitIntValue(Encoding, 1);
}
void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
const MCCFIInstruction &Instr) {
int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
- bool VerboseAsm = Streamer.isVerboseAsm();
switch (Instr.getOperation()) {
case MCCFIInstruction::OpRegister: {
unsigned Reg1 = Instr.getRegister();
unsigned Reg2 = Instr.getRegister2();
- if (VerboseAsm) {
- Streamer.AddComment("DW_CFA_register");
- Streamer.AddComment(Twine("Reg1 ") + Twine(Reg1));
- Streamer.AddComment(Twine("Reg2 ") + Twine(Reg2));
- }
Streamer.EmitIntValue(dwarf::DW_CFA_register, 1);
Streamer.EmitULEB128IntValue(Reg1);
Streamer.EmitULEB128IntValue(Reg2);
@@ -1106,10 +1061,6 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
}
case MCCFIInstruction::OpUndefined: {
unsigned Reg = Instr.getRegister();
- if (VerboseAsm) {
- Streamer.AddComment("DW_CFA_undefined");
- Streamer.AddComment(Twine("Reg ") + Twine(Reg));
- }
Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
Streamer.EmitULEB128IntValue(Reg);
return;
@@ -1119,8 +1070,6 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
const bool IsRelative =
Instr.getOperation() == MCCFIInstruction::OpAdjustCfaOffset;
- if (VerboseAsm)
- Streamer.AddComment("DW_CFA_def_cfa_offset");
Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
if (IsRelative)
@@ -1128,37 +1077,21 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
else
CFAOffset = -Instr.getOffset();
- if (VerboseAsm)
- Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
Streamer.EmitULEB128IntValue(CFAOffset);
return;
}
case MCCFIInstruction::OpDefCfa: {
- if (VerboseAsm)
- Streamer.AddComment("DW_CFA_def_cfa");
Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
-
- if (VerboseAsm)
- Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
Streamer.EmitULEB128IntValue(Instr.getRegister());
-
CFAOffset = -Instr.getOffset();
-
- if (VerboseAsm)
- Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
Streamer.EmitULEB128IntValue(CFAOffset);
return;
}
case MCCFIInstruction::OpDefCfaRegister: {
- if (VerboseAsm)
- Streamer.AddComment("DW_CFA_def_cfa_register");
Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
-
- if (VerboseAsm)
- Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
Streamer.EmitULEB128IntValue(Instr.getRegister());
return;
@@ -1176,63 +1109,44 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
Offset = Offset / dataAlignmentFactor;
if (Offset < 0) {
- if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf");
Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
- if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
Streamer.EmitULEB128IntValue(Reg);
- if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
Streamer.EmitSLEB128IntValue(Offset);
} else if (Reg < 64) {
- if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") +
- Twine(Reg) + ")");
Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
- if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
Streamer.EmitULEB128IntValue(Offset);
} else {
- if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended");
Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
- if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
Streamer.EmitULEB128IntValue(Reg);
- if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
Streamer.EmitULEB128IntValue(Offset);
}
return;
}
case MCCFIInstruction::OpRememberState:
- if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
return;
case MCCFIInstruction::OpRestoreState:
- if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
return;
case MCCFIInstruction::OpSameValue: {
unsigned Reg = Instr.getRegister();
- if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
- if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
Streamer.EmitULEB128IntValue(Reg);
return;
}
case MCCFIInstruction::OpRestore: {
unsigned Reg = Instr.getRegister();
- if (VerboseAsm) {
- Streamer.AddComment("DW_CFA_restore");
- Streamer.AddComment(Twine("Reg ") + Twine(Reg));
- }
Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
return;
}
case MCCFIInstruction::OpEscape:
- if (VerboseAsm) Streamer.AddComment("Escape bytes");
Streamer.EmitBytes(Instr.getValues());
return;
}
llvm_unreachable("Unhandled case in switch");
}
-/// EmitFrameMoves - Emit frame instructions to describe the layout of the
-/// frame.
+/// Emit frame instructions to describe the layout of the frame.
void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel) {
@@ -1246,7 +1160,6 @@ void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
if (BaseLabel && Label) {
MCSymbol *ThisSym = Label;
if (ThisSym != BaseLabel) {
- if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4");
streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
BaseLabel = ThisSym;
}
@@ -1256,12 +1169,11 @@ void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
}
}
-/// EmitCompactUnwind - Emit the unwind information in a compact way.
-void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+/// Emit the unwind information in a compact way.
+void FrameEmitterImpl::EmitCompactUnwind(MCObjectStreamer &Streamer,
const MCDwarfFrameInfo &Frame) {
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
- bool VerboseAsm = Streamer.isVerboseAsm();
// range-start range-length compact-unwind-enc personality-func lsda
// _foo LfooEnd-_foo 0x00000023 0 0
@@ -1296,24 +1208,19 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
// Range Start
unsigned FDEEncoding = MOFI->getFDEEncoding();
unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
- if (VerboseAsm) Streamer.AddComment("Range Start");
Streamer.EmitSymbolValue(Frame.Begin, Size);
// Range Length
const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
*Frame.End, 0);
- if (VerboseAsm) Streamer.AddComment("Range Length");
- Streamer.EmitAbsValue(Range, 4);
+ emitAbsValue(Streamer, Range, 4);
// Compact Encoding
Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
- if (VerboseAsm) Streamer.AddComment("Compact Unwind Encoding: 0x" +
- Twine::utohexstr(Encoding));
Streamer.EmitIntValue(Encoding, Size);
// Personality Function
Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
- if (VerboseAsm) Streamer.AddComment("Personality Function");
if (!DwarfEHFrameOnly && Frame.Personality)
Streamer.EmitSymbolValue(Frame.Personality, Size);
else
@@ -1321,7 +1228,6 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
// LSDA
Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
- if (VerboseAsm) Streamer.AddComment("LSDA");
if (!DwarfEHFrameOnly && Frame.Lsda)
Streamer.EmitSymbolValue(Frame.Lsda, Size);
else
@@ -1338,27 +1244,22 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
MCContext &context = streamer.getContext();
const MCRegisterInfo *MRI = context.getRegisterInfo();
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
- bool verboseAsm = streamer.isVerboseAsm();
MCSymbol *sectionStart = context.CreateTempSymbol();
streamer.EmitLabel(sectionStart);
- CIENum++;
MCSymbol *sectionEnd = context.CreateTempSymbol();
// Length
const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
*sectionEnd, 4);
- if (verboseAsm) streamer.AddComment("CIE Length");
- streamer.EmitAbsValue(Length, 4);
+ emitAbsValue(streamer, Length, 4);
// CIE ID
unsigned CIE_ID = IsEH ? 0 : -1;
- if (verboseAsm) streamer.AddComment("CIE ID Tag");
streamer.EmitIntValue(CIE_ID, 4);
// Version
- if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
// For DWARF2, we use CIE version 1
// For DWARF3+, we use CIE version 3
uint8_t CIEVersion = context.getDwarfVersion() <= 2 ? 1 : 3;
@@ -1367,7 +1268,6 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
// Augmentation String
SmallString<8> Augmentation;
if (IsEH) {
- if (verboseAsm) streamer.AddComment("CIE Augmentation");
Augmentation += "z";
if (personality)
Augmentation += "P";
@@ -1381,15 +1281,12 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
streamer.EmitIntValue(0, 1);
// Code Alignment Factor
- if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor");
streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment());
// Data Alignment Factor
- if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor");
streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
// Return Address Register
- if (verboseAsm) streamer.AddComment("CIE Return Address Column");
if (CIEVersion == 1) {
assert(MRI->getRARegister() <= 255 &&
"DWARF 2 encodes return_address_register in one byte");
@@ -1414,24 +1311,21 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
// Encoding of the FDE pointers
augmentationLength += 1;
- if (verboseAsm) streamer.AddComment("Augmentation Size");
streamer.EmitULEB128IntValue(augmentationLength);
// Augmentation Data (optional)
if (personality) {
// Personality Encoding
- EmitEncodingByte(streamer, personalityEncoding,
- "Personality Encoding");
+ emitEncodingByte(streamer, personalityEncoding);
// Personality
- if (verboseAsm) streamer.AddComment("Personality");
EmitPersonality(streamer, *personality, personalityEncoding);
}
if (lsda)
- EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
+ emitEncodingByte(streamer, lsdaEncoding);
// Encoding of the FDE pointers
- EmitEncodingByte(streamer, MOFI->getFDEEncoding(), "FDE Encoding");
+ emitEncodingByte(streamer, MOFI->getFDEEncoding());
}
// Initial Instructions
@@ -1457,12 +1351,10 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
MCSymbol *fdeStart = context.CreateTempSymbol();
MCSymbol *fdeEnd = context.CreateTempSymbol();
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
- bool verboseAsm = streamer.isVerboseAsm();
// Length
const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
- if (verboseAsm) streamer.AddComment("FDE Length");
- streamer.EmitAbsValue(Length, 4);
+ emitAbsValue(streamer, Length, 4);
streamer.EmitLabel(fdeStart);
@@ -1471,12 +1363,11 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
if (IsEH) {
const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
0);
- if (verboseAsm) streamer.AddComment("FDE CIE Offset");
- streamer.EmitAbsValue(offset, 4);
+ emitAbsValue(streamer, offset, 4);
} else if (!asmInfo->doesDwarfUseRelocationsAcrossSections()) {
const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
cieStart, 0);
- streamer.EmitAbsValue(offset, 4);
+ emitAbsValue(streamer, offset, 4);
} else {
streamer.EmitSymbolValue(&cieStart, 4);
}
@@ -1485,13 +1376,12 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
unsigned PCEncoding =
IsEH ? MOFI->getFDEEncoding() : (unsigned)dwarf::DW_EH_PE_absptr;
unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
- EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location");
+ emitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH);
// PC Range
const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
*frame.End, 0);
- if (verboseAsm) streamer.AddComment("FDE address range");
- streamer.EmitAbsValue(Range, PCSize);
+ emitAbsValue(streamer, Range, PCSize);
if (IsEH) {
// Augmentation Data Length
@@ -1500,13 +1390,11 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
if (frame.Lsda)
augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
- if (verboseAsm) streamer.AddComment("Augmentation size");
streamer.EmitULEB128IntValue(augmentationLength);
// Augmentation Data
if (frame.Lsda)
- EmitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true,
- "Language Specific Data Area");
+ emitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true);
}
// Call Frame Instructions
@@ -1574,7 +1462,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
FrameEmitterImpl Emitter(IsEH);
- ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
+ ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getDwarfFrameInfos();
// Emit the compact unwind info if available.
bool NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index 0a9cd31..386c209 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -61,7 +61,7 @@ void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
}
-unsigned MCELF::GetVisibility(MCSymbolData &SD) {
+unsigned MCELF::GetVisibility(const MCSymbolData &SD) {
unsigned Visibility =
(SD.getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
@@ -76,7 +76,7 @@ void MCELF::setOther(MCSymbolData &SD, unsigned Other) {
SD.setFlags(OtherFlags | (Other << ELF_STO_Shift));
}
-unsigned MCELF::getOther(MCSymbolData &SD) {
+unsigned MCELF::getOther(const MCSymbolData &SD) {
unsigned Other =
(SD.getFlags() & (0x3f << ELF_STO_Shift)) >> ELF_STO_Shift;
return Other;
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 4012c44..84176dc 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -24,6 +24,7 @@ MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
IsN64(IsN64_){
}
-bool MCELFObjectTargetWriter::needsRelocateWithSymbol(unsigned Type) const {
+bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const {
return false;
}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 7c70540..bdc4a84 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -38,19 +39,23 @@ using namespace llvm;
MCELFStreamer::~MCELFStreamer() {
}
-void MCELFStreamer::InitSections() {
+void MCELFStreamer::InitSections(bool NoExecStack) {
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
- SwitchSection(getContext().getObjectFileInfo()->getTextSection());
+ MCContext &Ctx = getContext();
+ SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
EmitCodeAlignment(4);
- SwitchSection(getContext().getObjectFileInfo()->getDataSection());
+ SwitchSection(Ctx.getObjectFileInfo()->getDataSection());
EmitCodeAlignment(4);
- SwitchSection(getContext().getObjectFileInfo()->getBSSSection());
+ SwitchSection(Ctx.getObjectFileInfo()->getBSSSection());
EmitCodeAlignment(4);
- SwitchSection(getContext().getObjectFileInfo()->getTextSection());
+ SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
+
+ if (NoExecStack)
+ SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
}
void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
@@ -87,10 +92,19 @@ void MCELFStreamer::ChangeSection(const MCSection *Section,
MCSectionData *CurSection = getCurrentSectionData();
if (CurSection && CurSection->isBundleLocked())
report_fatal_error("Unterminated .bundle_lock when changing a section");
- const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
+
+ MCAssembler &Asm = getAssembler();
+ auto *SectionELF = static_cast<const MCSectionELF *>(Section);
+ const MCSymbol *Grp = SectionELF->getGroup();
if (Grp)
- getAssembler().getOrCreateSymbolData(*Grp);
+ Asm.getOrCreateSymbolData(*Grp);
+
this->MCObjectStreamer::ChangeSection(Section, Subsection);
+ MCSymbol *SectionSymbol = getContext().getOrCreateSectionSymbol(*SectionELF);
+ if (SectionSymbol->isUndefined()) {
+ EmitLabel(SectionSymbol);
+ MCELF::SetType(Asm.getSymbolData(*SectionSymbol), ELF::STT_SECTION);
+ }
}
void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
@@ -448,11 +462,13 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
} else {
DF = new MCDataFragment();
insert(DF);
- if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
- // If this is a new fragment created for a bundle-locked group, and the
- // group was marked as "align_to_end", set a flag in the fragment.
- DF->setAlignToBundleEnd(true);
- }
+ }
+ if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
+ // If this fragment is for a group marked "align_to_end", set a flag
+ // in the fragment. This can happen after the fragment has already been
+ // created if there are nested bundle_align groups and an inner one
+ // is the one marked align_to_end.
+ DF->setAlignToBundleEnd(true);
}
// We're now emitting an instruction in a bundle group, so this flag has
@@ -474,10 +490,11 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
assert(AlignPow2 <= 30 && "Invalid bundle alignment");
MCAssembler &Assembler = getAssembler();
- if (Assembler.getBundleAlignSize() == 0 && AlignPow2 > 0)
- Assembler.setBundleAlignSize(1 << AlignPow2);
+ if (AlignPow2 > 0 && (Assembler.getBundleAlignSize() == 0 ||
+ Assembler.getBundleAlignSize() == 1U << AlignPow2))
+ Assembler.setBundleAlignSize(1U << AlignPow2);
else
- report_fatal_error(".bundle_align_mode should be only set once per file");
+ report_fatal_error(".bundle_align_mode cannot be changed once set");
}
void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
@@ -487,12 +504,12 @@ void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
//
if (!getAssembler().isBundlingEnabled())
report_fatal_error(".bundle_lock forbidden when bundling is disabled");
- else if (SD->isBundleLocked())
- report_fatal_error("Nesting of .bundle_lock is forbidden");
+
+ if (!SD->isBundleLocked())
+ SD->setBundleGroupBeforeFirstInst(true);
SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd :
MCSectionData::BundleLocked);
- SD->setBundleGroupBeforeFirstInst(true);
}
void MCELFStreamer::EmitBundleUnlock() {
@@ -543,12 +560,10 @@ void MCELFStreamer::FinishImpl() {
MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll, bool NoExecStack) {
+ bool RelaxAll) {
MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
return S;
}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index f724716..6e648b2 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -49,12 +49,8 @@ void MCExpr::print(raw_ostream &OS) const {
else
OS << Sym;
- if (SRE.getKind() != MCSymbolRefExpr::VK_None) {
- if (SRE.getMCAsmInfo().useParensForSymbolVariant())
- OS << '(' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()) << ')';
- else
- OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
- }
+ if (SRE.getKind() != MCSymbolRefExpr::VK_None)
+ SRE.printVariantKind(OS);
return;
}
@@ -150,6 +146,15 @@ const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
/* *** */
+MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind,
+ const MCAsmInfo *MAI)
+ : MCExpr(MCExpr::SymbolRef), Kind(Kind),
+ UseParensForSymbolVariant(MAI->useParensForSymbolVariant()),
+ HasSubsectionsViaSymbols(MAI->hasSubsectionsViaSymbols()),
+ Symbol(Symbol) {
+ assert(Symbol);
+}
+
const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
VariantKind Kind,
MCContext &Ctx) {
@@ -247,6 +252,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_GOT_TLSLD_HI: return "got@tlsld@h";
case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha";
case VK_PPC_TLSLD: return "tlsld";
+ case VK_PPC_LOCAL: return "local";
case VK_Mips_GPREL: return "GPREL";
case VK_Mips_GOT_CALL: return "GOT_CALL";
case VK_Mips_GOT16: return "GOT16";
@@ -273,7 +279,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Mips_CALL_LO16: return "CALL_LO16";
case VK_Mips_PCREL_HI16: return "PCREL_HI16";
case VK_Mips_PCREL_LO16: return "PCREL_LO16";
- case VK_COFF_IMGREL32: return "IMGREL32";
+ case VK_COFF_IMGREL32: return "IMGREL";
}
llvm_unreachable("Invalid variant kind");
}
@@ -442,6 +448,13 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Default(VK_Invalid);
}
+void MCSymbolRefExpr::printVariantKind(raw_ostream &OS) const {
+ if (UseParensForSymbolVariant)
+ OS << '(' << MCSymbolRefExpr::getVariantKindName(getKind()) << ')';
+ else
+ OS << '@' << MCSymbolRefExpr::getVariantKindName(getKind());
+}
+
/* *** */
void MCTargetExpr::anchor() {}
@@ -467,9 +480,27 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr);
}
+int64_t MCExpr::evaluateKnownAbsolute(const MCAsmLayout &Layout) const {
+ int64_t Res;
+ bool Abs =
+ evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, true);
+ (void)Abs;
+ assert(Abs && "Not actually absolute");
+ return Res;
+}
+
bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const SectionAddrMap *Addrs) const {
+ // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
+ // absolutize differences across sections and that is what the MachO writer
+ // uses Addrs for.
+ return evaluateAsAbsolute(Res, Asm, Layout, Addrs, Addrs);
+}
+
+bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs, bool InSet) const {
MCValue Value;
// Fast path constants.
@@ -478,12 +509,8 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
return true;
}
- // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
- // absolutize differences across sections and that is what the MachO writer
- // uses Addrs for.
- bool IsRelocatable =
- EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs,
- /*ForceVarExpansion*/ false);
+ bool IsRelocatable = EvaluateAsRelocatableImpl(
+ Value, Asm, Layout, nullptr, Addrs, InSet, /*ForceVarExpansion*/ false);
// Record the current value.
Res = Value.getConstant();
@@ -632,27 +659,31 @@ static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
}
bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
- const MCAsmLayout *Layout) const {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
- return EvaluateAsRelocatableImpl(Res, Assembler, Layout, nullptr, false,
- /*ForceVarExpansion*/ false);
+ return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
+ false, /*ForceVarExpansion*/ false);
}
-bool MCExpr::EvaluateAsValue(MCValue &Res, const MCAsmLayout *Layout) const {
+bool MCExpr::EvaluateAsValue(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
- return EvaluateAsRelocatableImpl(Res, Assembler, Layout, nullptr, false,
- /*ForceVarExpansion*/ true);
+ return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
+ false, /*ForceVarExpansion*/ true);
}
bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
+ const MCFixup *Fixup,
const SectionAddrMap *Addrs, bool InSet,
bool ForceVarExpansion) const {
++stats::MCExprEvaluate;
switch (getKind()) {
case Target:
- return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
+ return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout,
+ Fixup);
case Constant:
Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
@@ -661,16 +692,15 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
case SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
const MCSymbol &Sym = SRE->getSymbol();
- const MCAsmInfo &MCAsmInfo = SRE->getMCAsmInfo();
// Evaluate recursively if this is a variable.
if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
if (Sym.getVariableValue()->EvaluateAsRelocatableImpl(
- Res, Asm, Layout, Addrs, true, ForceVarExpansion)) {
+ Res, Asm, Layout, Fixup, Addrs, true, ForceVarExpansion)) {
const MCSymbolRefExpr *A = Res.getSymA();
const MCSymbolRefExpr *B = Res.getSymB();
- if (MCAsmInfo.hasSubsectionsViaSymbols()) {
+ if (SRE->hasSubsectionsViaSymbols()) {
// FIXME: This is small hack. Given
// a = b + 4
// .long a
@@ -697,8 +727,9 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
MCValue Value;
- if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs,
- InSet, ForceVarExpansion))
+ if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
+ Fixup, Addrs, InSet,
+ ForceVarExpansion))
return false;
switch (AUE->getOpcode()) {
@@ -731,10 +762,12 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
MCValue LHSValue, RHSValue;
- if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Addrs,
- InSet, ForceVarExpansion) ||
- !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Addrs,
- InSet, ForceVarExpansion))
+ if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
+ Fixup, Addrs, InSet,
+ ForceVarExpansion) ||
+ !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
+ Fixup, Addrs, InSet,
+ ForceVarExpansion))
return false;
// We only support a few operations on non-constant expressions, handle
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 9e8bc94..a147c3d 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -55,6 +55,12 @@ public:
: MCObjectStreamer(Context, MAB, OS, Emitter),
LabelSections(label) {}
+ /// state management
+ void reset() override {
+ HasSectionLabel.clear();
+ MCObjectStreamer::reset();
+ }
+
/// @name MCStreamer Interface
/// @{
@@ -90,8 +96,8 @@ public:
unsigned ByteAlignment) override;
void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
uint64_t Size = 0, unsigned ByteAlignment = 0) override;
- virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment = 0) override;
+ void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment = 0) override;
void EmitFileDirective(StringRef Filename) override {
// FIXME: Just ignore the .file; it isn't important enough to fail the
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index d543402..fc56728 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -29,7 +29,6 @@ namespace {
return true;
}
- void EmitCOFFSecRel32(MCSymbol const *Symbol) override {}
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override {}
void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index d490ef3..1b88462 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -24,7 +24,7 @@ static bool useCompactUnwind(const Triple &T) {
return false;
// aarch64 always has it.
- if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)
+ if (T.getArch() == Triple::aarch64)
return true;
// Use it on newer version of OS X.
@@ -43,8 +43,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
// MachO
SupportsWeakOmittedEHFrame = false;
- if (T.isOSDarwin() &&
- (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))
+ if (T.isOSDarwin() && T.getArch() == Triple::aarch64)
SupportsCompactUnwindWithoutEHFrame = true;
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
@@ -178,7 +177,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
CompactUnwindDwarfEHFrameOnly = 0x04000000;
- else if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)
+ else if (T.getArch() == Triple::aarch64)
CompactUnwindDwarfEHFrameOnly = 0x03000000;
}
@@ -287,6 +286,7 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
if (Ctx->getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
break;
// Fallthrough if not using EHABI
+ case Triple::ppc:
case Triple::x86:
PersonalityEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
@@ -321,8 +321,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
break;
case Triple::aarch64:
case Triple::aarch64_be:
- case Triple::arm64:
- case Triple::arm64_be:
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
@@ -340,6 +338,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
break;
case Triple::mips:
case Triple::mipsel:
+ case Triple::mips64:
+ case Triple::mips64el:
// MIPS uses indirect pointer to refer personality functions, so that the
// eh_frame section can be read-only. DW.ref.personality will be generated
// for relocation.
@@ -563,6 +563,9 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
DwarfInfoDWOSection =
Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
+ DwarfTypesDWOSection =
+ Ctx->getELFSection(".debug_types.dwo", ELF::SHT_PROGBITS, 0,
+ SectionKind::getMetadata());
DwarfAbbrevDWOSection =
Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
@@ -582,15 +585,19 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
DwarfAddrSection =
Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
+
+ StackMapSection =
+ Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getMetadata());
+
}
void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb;
- // The object file format cannot represent common symbols with explicit
- // alignments.
- CommDirectiveSupportsAlignment = false;
+ CommDirectiveSupportsAlignment = true;
// COFF
BSSSection =
@@ -738,6 +745,10 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_DISCARDABLE |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
+ DwarfTypesDWOSection =
+ Ctx->getCOFFSection(".debug_types.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfAbbrevDWOSection =
Ctx->getCOFFSection(".debug_abbrev.dwo",
COFF::IMAGE_SCN_MEM_DISCARDABLE |
@@ -770,6 +781,27 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
+ DwarfAccelNamesSection =
+ Ctx->getCOFFSection(".apple_names",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfAccelNamespaceSection =
+ Ctx->getCOFFSection(".apple_namespaces",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfAccelTypesSection =
+ Ctx->getCOFFSection(".apple_types",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfAccelObjCSection =
+ Ctx->getCOFFSection(".apple_objc",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
DrectveSection =
Ctx->getCOFFSection(".drectve",
COFF::IMAGE_SCN_LNK_INFO |
@@ -827,7 +859,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
// cellspu-apple-darwin. Perhaps we should fix in Triple?
if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
Arch == Triple::arm || Arch == Triple::thumb ||
- Arch == Triple::arm64 || Arch == Triple::aarch64 ||
+ Arch == Triple::aarch64 ||
Arch == Triple::ppc || Arch == Triple::ppc64 ||
Arch == Triple::UnknownArch) &&
(TT.isOSDarwin() || TT.isOSBinFormatMachO())) {
@@ -849,13 +881,6 @@ const MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
SectionKind::getMetadata(), 0, utostr(Hash));
}
-const MCSection *
-MCObjectFileInfo::getDwarfTypesDWOSection(uint64_t Hash) const {
- return Ctx->getELFSection(".debug_types.dwo", ELF::SHT_PROGBITS,
- ELF::SHF_GROUP, SectionKind::getMetadata(), 0,
- utostr(Hash));
-}
-
void MCObjectFileInfo::InitEHFrameSection() {
if (Env == IsMachO)
EHFrameSection =
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index a721b59..21e6867 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -42,6 +42,21 @@ MCObjectStreamer::~MCObjectStreamer() {
delete Assembler;
}
+void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
+ if (PendingLabels.size()) {
+ if (!F) {
+ F = new MCDataFragment();
+ CurSectionData->getFragmentList().insert(CurInsertionPoint, F);
+ F->setParent(CurSectionData);
+ }
+ for (MCSymbolData *SD : PendingLabels) {
+ SD->setFragment(F);
+ SD->setOffset(0);
+ }
+ PendingLabels.clear();
+ }
+}
+
void MCObjectStreamer::reset() {
if (Assembler)
Assembler->reset();
@@ -49,6 +64,7 @@ void MCObjectStreamer::reset() {
CurInsertionPoint = MCSectionData::iterator();
EmitEHFrame = true;
EmitDebugFrame = false;
+ PendingLabels.clear();
MCStreamer::reset();
}
@@ -72,7 +88,7 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const {
return nullptr;
}
-MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() {
MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
// When bundling is enabled, we don't want to add data to a fragment that
// already has instructions (see MCELFStreamer::EmitInstToData for details)
@@ -127,15 +143,17 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
MCStreamer::EmitLabel(Symbol);
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-
- // FIXME: This is wasteful, we don't necessarily need to create a data
- // fragment. Instead, we should mark the symbol as pointing into the data
- // fragment if it exists, otherwise we should just queue the label and set its
- // fragment pointer when we emit the next fragment.
- MCDataFragment *F = getOrCreateDataFragment();
assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
- SD.setFragment(F);
- SD.setOffset(F->getContents().size());
+
+ // If there is a current fragment, mark the symbol as pointing into it.
+ // Otherwise queue the label and set its fragment pointer when we emit the
+ // next fragment.
+ if (auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment())) {
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+ } else {
+ PendingLabels.push_back(&SD);
+ }
}
void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
@@ -144,7 +162,6 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
EmitULEB128IntValue(IntValue);
return;
}
- Value = ForceExpAbs(Value);
insert(new MCLEBFragment(*Value, false));
}
@@ -154,7 +171,6 @@ void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
EmitSLEB128IntValue(IntValue);
return;
}
- Value = ForceExpAbs(Value);
insert(new MCLEBFragment(*Value, true));
}
@@ -166,6 +182,7 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
void MCObjectStreamer::ChangeSection(const MCSection *Section,
const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
+ flushPendingLabels(nullptr);
CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
@@ -266,33 +283,54 @@ void MCObjectStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
Isa, Discriminator, FileName);
}
+static const MCExpr *buildSymbolDiff(MCObjectStreamer &OS, const MCSymbol *A,
+ const MCSymbol *B) {
+ MCContext &Context = OS.getContext();
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *ARef = MCSymbolRefExpr::Create(A, Variant, Context);
+ const MCExpr *BRef = MCSymbolRefExpr::Create(B, Variant, Context);
+ const MCExpr *AddrDelta =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+ return AddrDelta;
+}
+
+static void emitDwarfSetLineAddr(MCObjectStreamer &OS, int64_t LineDelta,
+ const MCSymbol *Label, int PointerSize) {
+ // emit the sequence to set the address
+ OS.EmitIntValue(dwarf::DW_LNS_extended_op, 1);
+ OS.EmitULEB128IntValue(PointerSize + 1);
+ OS.EmitIntValue(dwarf::DW_LNE_set_address, 1);
+ OS.EmitSymbolValue(Label, PointerSize);
+
+ // emit the sequence for the LineDelta (from 1) and a zero address delta.
+ MCDwarfLineAddr::Emit(&OS, LineDelta, 0);
+}
+
void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
const MCSymbol *LastLabel,
const MCSymbol *Label,
unsigned PointerSize) {
if (!LastLabel) {
- EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+ emitDwarfSetLineAddr(*this, LineDelta, Label, PointerSize);
return;
}
- const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
int64_t Res;
if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
MCDwarfLineAddr::Emit(this, LineDelta, Res);
return;
}
- AddrDelta = ForceExpAbs(AddrDelta);
insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
}
void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
const MCSymbol *Label) {
- const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
int64_t Res;
if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
return;
}
- AddrDelta = ForceExpAbs(AddrDelta);
insert(new MCDwarfCallFrameFragment(*AddrDelta));
}
@@ -379,5 +417,6 @@ void MCObjectStreamer::FinishImpl() {
// Dump out the dwarf file & directory tables and line tables.
MCDwarfLineTable::Emit(this);
+ flushPendingLabels(nullptr);
getAssembler().Finish();
}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 145ad4a..5c8ec66 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -417,7 +417,7 @@ AsmToken AsmLexer::LexQuote() {
StringRef AsmLexer::LexUntilEndOfStatement() {
TokStart = CurPtr;
- while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
+ while (!isAtStartOfComment(CurPtr) && // Start of line comment.
!isAtStatementSeparator(CurPtr) && // End of statement marker.
*CurPtr != '\n' && *CurPtr != '\r' &&
(*CurPtr != 0 || CurPtr != CurBuf.end())) {
@@ -458,9 +458,17 @@ const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
return Token;
}
-bool AsmLexer::isAtStartOfComment(char Char) {
- // FIXME: This won't work for multi-character comment indicators like "//".
- return Char == *MAI.getCommentString();
+bool AsmLexer::isAtStartOfComment(const char *Ptr) {
+ const char *CommentString = MAI.getCommentString();
+
+ if (CommentString[1] == '\0')
+ return CommentString[0] == Ptr[0];
+
+ // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin
+ if (CommentString[1] == '#')
+ return CommentString[0] == Ptr[0];
+
+ return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
}
bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
@@ -473,7 +481,7 @@ AsmToken AsmLexer::LexToken() {
// This always consumes at least one character.
int CurChar = getNextChar();
- if (isAtStartOfComment(CurChar)) {
+ if (isAtStartOfComment(TokStart)) {
// If this comment starts with a '#', then return the Hash token and let
// the assembler parser see if it can be parsed as a cpp line filename
// comment. We do this only if we are at the start of a line.
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 62ab4a5..de7d961 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -45,10 +45,6 @@
#include <vector>
using namespace llvm;
-static cl::opt<bool>
-FatalAssemblerWarnings("fatal-assembler-warnings",
- cl::desc("Consider warnings as error"));
-
MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {}
namespace {
@@ -73,19 +69,13 @@ struct MCAsmMacro {
MCAsmMacroParameters Parameters;
public:
- MCAsmMacro(StringRef N, StringRef B, ArrayRef<MCAsmMacroParameter> P) :
- Name(N), Body(B), Parameters(P) {}
+ MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
+ : Name(N), Body(B), Parameters(std::move(P)) {}
};
/// \brief Helper class for storing information about an active macro
/// instantiation.
struct MacroInstantiation {
- /// The macro being instantiated.
- const MCAsmMacro *TheMacro;
-
- /// The macro instantiation with substitutions.
- MemoryBuffer *Instantiation;
-
/// The location of the instantiation.
SMLoc InstantiationLoc;
@@ -95,9 +85,11 @@ struct MacroInstantiation {
/// The location where parsing should resume upon instantiation completion.
SMLoc ExitLoc;
+ /// The depth of TheCondStack at the start of the instantiation.
+ size_t CondStackDepth;
+
public:
- MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB, SMLoc EL,
- MemoryBuffer *I);
+ MacroInstantiation(SMLoc IL, int EB, SMLoc EL, size_t CondStackDepth);
};
struct ParseStatementInfo {
@@ -129,7 +121,7 @@ private:
SourceMgr &SrcMgr;
SourceMgr::DiagHandlerTy SavedDiagHandler;
void *SavedDiagContext;
- MCAsmParserExtension *PlatformParser;
+ std::unique_ptr<MCAsmParserExtension> PlatformParser;
/// This is the current buffer index we're lexing from as managed by the
/// SourceMgr object.
@@ -144,7 +136,7 @@ private:
StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
/// \brief Map of currently defined macros.
- StringMap<MCAsmMacro*> MacroMap;
+ StringMap<MCAsmMacro> MacroMap;
/// \brief Stack of active macro instantiations.
std::vector<MacroInstantiation*> ActiveMacros;
@@ -246,7 +238,8 @@ public:
private:
- bool parseStatement(ParseStatementInfo &Info);
+ bool parseStatement(ParseStatementInfo &Info,
+ MCAsmParserSemaCallback *SI);
void eatToEndOfLine();
bool parseCppHashLineFilenameComment(const SMLoc &L);
@@ -269,7 +262,7 @@ private:
const MCAsmMacro* lookupMacro(StringRef Name);
/// \brief Define a new macro with the given name and information.
- void defineMacro(StringRef Name, const MCAsmMacro& Macro);
+ void defineMacro(StringRef Name, MCAsmMacro Macro);
/// \brief Undefine a macro. If no such macro was defined, it's a no-op.
void undefineMacro(StringRef Name);
@@ -355,9 +348,10 @@ private:
DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE,
DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED,
DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE,
- DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM,
+ DK_MACROS_ON, DK_MACROS_OFF,
+ DK_MACRO, DK_EXITM, DK_ENDM, DK_ENDMACRO, DK_PURGEM,
DK_SLEB128, DK_ULEB128,
- DK_ERR, DK_ERROR,
+ DK_ERR, DK_ERROR, DK_WARNING,
DK_END
};
@@ -407,6 +401,7 @@ private:
// macro directives
bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
+ bool parseDirectiveExitMacro(StringRef Directive);
bool parseDirectiveEndMacro(StringRef Directive);
bool parseDirectiveMacro(SMLoc DirectiveLoc);
bool parseDirectiveMacrosOnOff(StringRef Directive);
@@ -474,6 +469,9 @@ private:
// ".err" or ".error"
bool parseDirectiveError(SMLoc DirectiveLoc, bool WithMessage);
+ // ".warning"
+ bool parseDirectiveWarning(SMLoc DirectiveLoc);
+
void initializeDirectiveKindMap();
};
}
@@ -504,34 +502,24 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
// Initialize the platform / file format parser.
switch (_Ctx.getObjectFileInfo()->getObjectFileType()) {
case MCObjectFileInfo::IsCOFF:
- PlatformParser = createCOFFAsmParser();
- PlatformParser->Initialize(*this);
- break;
+ PlatformParser.reset(createCOFFAsmParser());
+ break;
case MCObjectFileInfo::IsMachO:
- PlatformParser = createDarwinAsmParser();
- PlatformParser->Initialize(*this);
- IsDarwin = true;
- break;
+ PlatformParser.reset(createDarwinAsmParser());
+ IsDarwin = true;
+ break;
case MCObjectFileInfo::IsELF:
- PlatformParser = createELFAsmParser();
- PlatformParser->Initialize(*this);
- break;
+ PlatformParser.reset(createELFAsmParser());
+ break;
}
+ PlatformParser->Initialize(*this);
initializeDirectiveKindMap();
}
AsmParser::~AsmParser() {
assert((HadError || ActiveMacros.empty()) &&
"Unexpected active macro instantiation!");
-
- // Destroy any macros.
- for (StringMap<MCAsmMacro *>::iterator it = MacroMap.begin(),
- ie = MacroMap.end();
- it != ie; ++it)
- delete it->getValue();
-
- delete PlatformParser;
}
void AsmParser::printMacroInstantiations() {
@@ -550,7 +538,7 @@ void AsmParser::Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
}
bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
- if (FatalAssemblerWarnings)
+ if (getTargetParser().getTargetOptions().MCFatalWarnings)
return Error(L, Msg, Ranges);
printMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
printMacroInstantiations();
@@ -619,7 +607,7 @@ const AsmToken &AsmParser::Lex() {
bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Create the initial section, if requested.
if (!NoInitialTextSection)
- Out.InitSections();
+ Out.InitSections(false);
// Prime the lexer.
Lex();
@@ -643,7 +631,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
ParseStatementInfo Info;
- if (!parseStatement(Info))
+ if (!parseStatement(Info, nullptr))
continue;
// We had an error, validate that one was emitted and recover by skipping to
@@ -702,7 +690,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
void AsmParser::checkForValidSection() {
if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
TokError("expected section directive before assembly directive");
- Out.InitSections();
+ Out.InitSections(false);
}
}
@@ -1188,7 +1176,8 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
/// ::= EndOfStatement
/// ::= Label* Directive ...Operands... EndOfStatement
/// ::= Label* Identifier OperandList* EndOfStatement
-bool AsmParser::parseStatement(ParseStatementInfo &Info) {
+bool AsmParser::parseStatement(ParseStatementInfo &Info,
+ MCAsmParserSemaCallback *SI) {
if (Lexer.is(AsmToken::EndOfStatement)) {
Out.AddBlankLine();
Lex();
@@ -1298,9 +1287,16 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
// FIXME: This doesn't diagnose assignment to a symbol which has been
// implicitly marked as external.
MCSymbol *Sym;
- if (LocalLabelVal == -1)
+ if (LocalLabelVal == -1) {
+ if (ParsingInlineAsm && SI) {
+ StringRef RewrittenLabel = SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
+ assert(RewrittenLabel.size() && "We should have an internal name here.");
+ Info.AsmRewrites->push_back(AsmRewrite(AOK_Label, IDLoc,
+ IDVal.size(), RewrittenLabel));
+ IDVal = RewrittenLabel;
+ }
Sym = getContext().GetOrCreateSymbol(IDVal);
- else
+ } else
Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
if (!Sym->isUndefined() || Sym->isVariable())
return Error(IDLoc, "invalid symbol redefinition");
@@ -1542,6 +1538,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
return parseDirectiveMacrosOnOff(IDVal);
case DK_MACRO:
return parseDirectiveMacro(IDLoc);
+ case DK_EXITM:
+ return parseDirectiveExitMacro(IDVal);
case DK_ENDM:
case DK_ENDMACRO:
return parseDirectiveEndMacro(IDVal);
@@ -1553,6 +1551,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
return parseDirectiveError(IDLoc, false);
case DK_ERROR:
return parseDirectiveError(IDLoc, true);
+ case DK_WARNING:
+ return parseDirectiveWarning(IDLoc);
}
return Error(IDLoc, "unknown directive");
@@ -1630,7 +1630,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
// If parsing succeeded, match the instruction.
if (!HadError) {
- unsigned ErrorInfo;
+ uint64_t ErrorInfo;
getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
Info.ParsedOperands, Out,
ErrorInfo, ParsingInlineAsm);
@@ -1856,10 +1856,10 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
return false;
}
-MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB,
- SMLoc EL, MemoryBuffer *I)
- : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB),
- ExitLoc(EL) {}
+MacroInstantiation::MacroInstantiation(SMLoc IL, int EB, SMLoc EL,
+ size_t CondStackDepth)
+ : InstantiationLoc(IL), ExitBuffer(EB), ExitLoc(EL),
+ CondStackDepth(CondStackDepth) {}
static bool isOperator(AsmToken::TokenKind kind) {
switch (kind) {
@@ -2078,21 +2078,15 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
}
const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) {
- StringMap<MCAsmMacro *>::iterator I = MacroMap.find(Name);
- return (I == MacroMap.end()) ? nullptr : I->getValue();
+ StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
+ return (I == MacroMap.end()) ? nullptr : &I->getValue();
}
-void AsmParser::defineMacro(StringRef Name, const MCAsmMacro &Macro) {
- MacroMap[Name] = new MCAsmMacro(Macro);
+void AsmParser::defineMacro(StringRef Name, MCAsmMacro Macro) {
+ MacroMap.insert(std::make_pair(Name, std::move(Macro)));
}
-void AsmParser::undefineMacro(StringRef Name) {
- StringMap<MCAsmMacro *>::iterator I = MacroMap.find(Name);
- if (I != MacroMap.end()) {
- delete I->getValue();
- MacroMap.erase(I);
- }
-}
+void AsmParser::undefineMacro(StringRef Name) { MacroMap.erase(Name); }
bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
@@ -2117,17 +2111,17 @@ bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// instantiation.
OS << ".endmacro\n";
- MemoryBuffer *Instantiation =
+ std::unique_ptr<MemoryBuffer> Instantiation =
MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
// Create the macro instantiation object and add to the current macro
// instantiation stack.
MacroInstantiation *MI = new MacroInstantiation(
- M, NameLoc, CurBuffer, getTok().getLoc(), Instantiation);
+ NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size());
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
- CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+ CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
Lex();
@@ -2600,12 +2594,14 @@ bool AsmParser::parseDirectiveFill() {
if (!isUInt<32>(FillExpr) && FillSize > 4)
Warning(ExprLoc, "'.fill' directive pattern has been truncated to 32-bits");
- int64_t NonZeroFillSize = FillSize > 4 ? 4 : FillSize;
- FillExpr &= ~0ULL >> (64 - NonZeroFillSize * 8);
-
- for (uint64_t i = 0, e = NumValues; i != e; ++i) {
- getStreamer().EmitIntValue(FillExpr, NonZeroFillSize);
- getStreamer().EmitIntValue(0, FillSize - NonZeroFillSize);
+ if (NumValues > 0) {
+ int64_t NonZeroFillSize = FillSize > 4 ? 4 : FillSize;
+ FillExpr &= ~0ULL >> (64 - NonZeroFillSize * 8);
+ for (uint64_t i = 0, e = NumValues; i != e; ++i) {
+ getStreamer().EmitIntValue(FillExpr, NonZeroFillSize);
+ if (NonZeroFillSize < FillSize)
+ getStreamer().EmitIntValue(0, FillSize - NonZeroFillSize);
+ }
}
return false;
@@ -3292,7 +3288,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
if (Qualifier == "req")
Parameter.Required = true;
- else if (Qualifier == "vararg" && !IsDarwin)
+ else if (Qualifier == "vararg")
Parameter.Vararg = true;
else
return Error(QualLoc, Qualifier + " is not a valid parameter qualifier "
@@ -3313,7 +3309,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
"'" + Parameter.Name + "' in macro '" + Name + "'");
}
- Parameters.push_back(Parameter);
+ Parameters.push_back(std::move(Parameter));
if (getLexer().is(AsmToken::Comma))
Lex();
@@ -3365,7 +3361,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
const char *BodyEnd = EndToken.getLoc().getPointer();
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
checkForBadMacro(DirectiveLoc, Name, Body, Parameters);
- defineMacro(Name, MCAsmMacro(Name, Body, Parameters));
+ defineMacro(Name, MCAsmMacro(Name, Body, std::move(Parameters)));
return false;
}
@@ -3471,6 +3467,26 @@ void AsmParser::checkForBadMacro(SMLoc DirectiveLoc, StringRef Name,
"found in body which will have no effect");
}
+/// parseDirectiveExitMacro
+/// ::= .exitm
+bool AsmParser::parseDirectiveExitMacro(StringRef Directive) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + Directive + "' directive");
+
+ if (!isInsideMacroInstantiation())
+ return TokError("unexpected '" + Directive + "' in file, "
+ "no current macro definition");
+
+ // Exit all conditionals that are active in the current macro.
+ while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
+ TheCondState = TheCondStack.back();
+ TheCondStack.pop_back();
+ }
+
+ handleMacroExit();
+ return false;
+}
+
/// parseDirectiveEndMacro
/// ::= .endm
/// ::= .endmacro
@@ -4073,6 +4089,32 @@ bool AsmParser::parseDirectiveError(SMLoc L, bool WithMessage) {
return true;
}
+/// parseDirectiveWarning
+/// ::= .warning [string]
+bool AsmParser::parseDirectiveWarning(SMLoc L) {
+ if (!TheCondStack.empty()) {
+ if (TheCondStack.back().Ignore) {
+ eatToEndOfStatement();
+ return false;
+ }
+ }
+
+ StringRef Message = ".warning directive invoked in source file";
+ if (Lexer.isNot(AsmToken::EndOfStatement)) {
+ if (Lexer.isNot(AsmToken::String)) {
+ TokError(".warning argument must be a string");
+ eatToEndOfStatement();
+ return true;
+ }
+
+ Message = getTok().getStringContents();
+ Lex();
+ }
+
+ Warning(L, Message);
+ return false;
+}
+
/// parseDirectiveEndIf
/// ::= .endif
bool AsmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
@@ -4200,11 +4242,13 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
DirectiveKindMap[".macro"] = DK_MACRO;
+ DirectiveKindMap[".exitm"] = DK_EXITM;
DirectiveKindMap[".endm"] = DK_ENDM;
DirectiveKindMap[".endmacro"] = DK_ENDMACRO;
DirectiveKindMap[".purgem"] = DK_PURGEM;
DirectiveKindMap[".err"] = DK_ERR;
DirectiveKindMap[".error"] = DK_ERROR;
+ DirectiveKindMap[".warning"] = DK_WARNING;
}
MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -4246,7 +4290,8 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
// We Are Anonymous.
- MacroLikeBodies.push_back(MCAsmMacro(StringRef(), Body, None));
+ MacroLikeBodies.push_back(
+ MCAsmMacro(StringRef(), Body, MCAsmMacroParameters()));
return &MacroLikeBodies.back();
}
@@ -4254,17 +4299,17 @@ void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
raw_svector_ostream &OS) {
OS << ".endr\n";
- MemoryBuffer *Instantiation =
+ std::unique_ptr<MemoryBuffer> Instantiation =
MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
// Create the macro instantiation object and add to the current macro
// instantiation stack.
MacroInstantiation *MI = new MacroInstantiation(
- M, DirectiveLoc, CurBuffer, getTok().getLoc(), Instantiation);
+ DirectiveLoc, CurBuffer, getTok().getLoc(), TheCondStack.size());
ActiveMacros.push_back(MI);
// Jump to the macro instantiation and prime the lexer.
- CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+ CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
Lex();
}
@@ -4490,7 +4535,7 @@ bool AsmParser::parseMSInlineAsm(
unsigned OutputIdx = 0;
while (getLexer().isNot(AsmToken::Eof)) {
ParseStatementInfo Info(&AsmStrRewrites);
- if (parseStatement(Info))
+ if (parseStatement(Info, &SI))
return true;
if (Info.ParseError)
@@ -4510,7 +4555,8 @@ bool AsmParser::parseMSInlineAsm(
continue;
// Register operand.
- if (Operand.isReg() && !Operand.needAddressOf()) {
+ if (Operand.isReg() && !Operand.needAddressOf() &&
+ !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
unsigned NumDefs = Desc.getNumDefs();
// Clobber.
if (NumDefs && Operand.getMCOperandNum() < NumDefs)
@@ -4615,6 +4661,9 @@ bool AsmParser::parseMSInlineAsm(
case AOK_ImmPrefix:
OS << "$$";
break;
+ case AOK_Label:
+ OS << Ctx.getAsmInfo()->getPrivateGlobalPrefix() << AR.Label;
+ break;
case AOK_Input:
OS << '$' << InputIdx++;
break;
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 5ecf9e5..6f82e6e 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -364,6 +364,10 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
Flags |= COFF::IMAGE_SCN_LNK_COMDAT;
+ if (!getLexer().is(AsmToken::Identifier))
+ return TokError("expected comdat type such as 'discard' or 'largest' "
+ "after protection bits");
+
if (parseCOMDATType(Type))
return true;
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index b2a6785..3ea745e 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -638,13 +638,13 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
// Open the secure log file if we haven't already.
raw_ostream *OS = getContext().getSecureLog();
if (!OS) {
- std::string Err;
- OS = new raw_fd_ostream(SecureLogFile, Err,
+ std::error_code EC;
+ OS = new raw_fd_ostream(SecureLogFile, EC,
sys::fs::F_Append | sys::fs::F_Text);
- if (!Err.empty()) {
+ if (EC) {
delete OS;
return Error(IDLoc, Twine("can't open secure log file: ") +
- SecureLogFile + " (" + Err + ")");
+ SecureLogFile + " (" + EC.message() + ")");
}
getContext().setSecureLog(OS);
}
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 98b2b3b..e302004 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -555,7 +555,7 @@ EndStmt:
std::make_pair(ELFSection, std::make_pair(nullptr, nullptr)));
if (InsertResult.second) {
if (getContext().getDwarfVersion() <= 2)
- Error(loc, "DWARF2 only supports one section per compilation unit");
+ Warning(loc, "DWARF2 only supports one section per compilation unit");
MCSymbol *SectionStartSymbol = getContext().CreateTempSymbol();
getStreamer().EmitLabel(SectionStartSymbol);
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 530814b..795cc85 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -30,3 +30,7 @@ SMLoc AsmToken::getLoc() const {
SMLoc AsmToken::getEndLoc() const {
return SMLoc::getFromPointer(Str.data() + Str.size());
}
+
+SMRange AsmToken::getLocRange() const {
+ return SMRange(getLoc(), getEndLoc());
+}
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index e417aa9..290dcb2 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -29,7 +29,7 @@ void MCAsmParser::setTargetParser(MCTargetAsmParser &P) {
TargetParser->Initialize(*this);
}
-const AsmToken &MCAsmParser::getTok() {
+const AsmToken &MCAsmParser::getTok() const {
return getLexer().getTok();
}
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index fc2bd36..e95845f0 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -47,18 +47,22 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
}
OS << "\t.section\t" << getSectionName() << ",\"";
- if (getKind().isText())
+ if (getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)
OS << 'x';
- else if (getKind().isBSS())
- OS << 'b';
- if (getKind().isWriteable())
+ if (getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE)
OS << 'w';
- else
+ else if (getCharacteristics() & COFF::IMAGE_SCN_MEM_READ)
OS << 'r';
- if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE)
- OS << 'n';
+ else
+ OS << 'y';
if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
OS << 'd';
+ if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ OS << 'b';
+ if (getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)
+ OS << 'n';
+ if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)
+ OS << 's';
OS << '"';
if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 09eb3e7..a29bb97 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -19,8 +19,8 @@ using namespace llvm;
MCSectionELF::~MCSectionELF() {} // anchor.
-// ShouldOmitSectionDirective - Decides whether a '.section' directive
-// should be printed before the section name
+// Decides whether a '.section' directive
+// should be printed before the section name.
bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
const MCAsmInfo &MAI) const {
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index bdcdb97..f11ee66 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCWin64EH.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,47 +38,26 @@ void MCTargetStreamer::finish() {}
void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
MCStreamer::MCStreamer(MCContext &Ctx)
- : Context(Ctx), CurrentW64UnwindInfo(nullptr) {
+ : Context(Ctx), CurrentWinFrameInfo(nullptr) {
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
MCStreamer::~MCStreamer() {
- for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
- delete W64UnwindInfos[i];
+ for (unsigned i = 0; i < getNumWinFrameInfos(); ++i)
+ delete WinFrameInfos[i];
}
void MCStreamer::reset() {
- for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
- delete W64UnwindInfos[i];
- W64UnwindInfos.clear();
- CurrentW64UnwindInfo = nullptr;
+ DwarfFrameInfos.clear();
+ for (unsigned i = 0; i < getNumWinFrameInfos(); ++i)
+ delete WinFrameInfos[i];
+ WinFrameInfos.clear();
+ CurrentWinFrameInfo = nullptr;
+ SymbolOrdering.clear();
SectionStack.clear();
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
-const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
- const MCSymbol *A,
- const MCSymbol *B) {
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- const MCExpr *ARef =
- MCSymbolRefExpr::Create(A, Variant, Context);
- const MCExpr *BRef =
- MCSymbolRefExpr::Create(B, Variant, Context);
- const MCExpr *AddrDelta =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
- return AddrDelta;
-}
-
-const MCExpr *MCStreamer::ForceExpAbs(const MCExpr* Expr) {
- assert(!isa<MCSymbolRefExpr>(Expr));
- if (Context.getAsmInfo()->hasAggressiveSymbolFolding())
- return Expr;
-
- MCSymbol *ABS = Context.CreateTempSymbol();
- EmitAssignment(ABS, Expr);
- return MCSymbolRefExpr::Create(ABS, Context);
-}
-
raw_ostream &MCStreamer::GetCommentOS() {
// By default, discard comments.
return nulls();
@@ -86,28 +66,15 @@ raw_ostream &MCStreamer::GetCommentOS() {
void MCStreamer::emitRawComment(const Twine &T, bool TabPrefix) {}
void MCStreamer::generateCompactUnwindEncodings(MCAsmBackend *MAB) {
- for (std::vector<MCDwarfFrameInfo>::iterator I = FrameInfos.begin(),
- E = FrameInfos.end(); I != E; ++I)
- I->CompactUnwindEncoding =
- (MAB ? MAB->generateCompactUnwindEncoding(I->Instructions) : 0);
-}
-
-void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
- const MCSymbol *Label, int PointerSize) {
- // emit the sequence to set the address
- EmitIntValue(dwarf::DW_LNS_extended_op, 1);
- EmitULEB128IntValue(PointerSize + 1);
- EmitIntValue(dwarf::DW_LNE_set_address, 1);
- EmitSymbolValue(Label, PointerSize);
-
- // emit the sequence for the LineDelta (from 1) and a zero address delta.
- MCDwarfLineAddr::Emit(this, LineDelta, 0);
+ for (auto &FI : DwarfFrameInfos)
+ FI.CompactUnwindEncoding =
+ (MAB ? MAB->generateCompactUnwindEncoding(FI.Instructions) : 0);
}
/// EmitIntValue - Special case of EmitValue that avoids the client having to
/// pass in a MCExpr for constant integers.
void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
- assert(Size <= 8 && "Invalid size");
+ assert(1 <= Size && Size <= 8 && "Invalid size");
assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
"Invalid size");
char buf[8];
@@ -137,19 +104,20 @@ void MCStreamer::EmitSLEB128IntValue(int64_t Value) {
EmitBytes(OSE.str());
}
-void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size) {
- const MCExpr *ABS = ForceExpAbs(Value);
- EmitValue(ABS, Size);
-}
-
-
void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
const SMLoc &Loc) {
EmitValueImpl(Value, Size, Loc);
}
-void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size) {
- EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size);
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+ bool IsSectionRelative) {
+ assert((!IsSectionRelative || Size == 4) &&
+ "SectionRelative value requires 4-bytes");
+
+ if (!IsSectionRelative)
+ EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size);
+ else
+ EmitCOFFSecRel32(Sym);
}
void MCStreamer::EmitGPRel64Value(const MCExpr *Value) {
@@ -198,14 +166,14 @@ MCSymbol *MCStreamer::getDwarfLineTableSymbol(unsigned CUID) {
return Table.getLabel();
}
-MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
- if (FrameInfos.empty())
+MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() {
+ if (DwarfFrameInfos.empty())
return nullptr;
- return &FrameInfos.back();
+ return &DwarfFrameInfos.back();
}
-void MCStreamer::EnsureValidFrame() {
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+void MCStreamer::EnsureValidDwarfFrame() {
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame || CurFrame->End)
report_fatal_error("No open frame");
}
@@ -214,7 +182,7 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol) {
}
-void MCStreamer::InitSections() {
+void MCStreamer::InitSections(bool NoExecStack) {
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
@@ -239,18 +207,12 @@ void MCStreamer::EmitLabel(MCSymbol *Symbol) {
TS->emitLabel(Symbol);
}
-void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) {
- EnsureValidFrame();
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- CurFrame->CompactUnwindEncoding = CompactUnwindEncoding;
-}
-
void MCStreamer::EmitCFISections(bool EH, bool Debug) {
assert(EH || Debug);
}
void MCStreamer::EmitCFIStartProc(bool IsSimple) {
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (CurFrame && !CurFrame->End)
report_fatal_error("Starting a frame before finishing the previous one!");
@@ -258,15 +220,25 @@ void MCStreamer::EmitCFIStartProc(bool IsSimple) {
Frame.IsSimple = IsSimple;
EmitCFIStartProcImpl(Frame);
- FrameInfos.push_back(Frame);
+ const MCAsmInfo* MAI = Context.getAsmInfo();
+ if (MAI) {
+ for (const MCCFIInstruction& Inst : MAI->getInitialFrameState()) {
+ if (Inst.getOperation() == MCCFIInstruction::OpDefCfa ||
+ Inst.getOperation() == MCCFIInstruction::OpDefCfaRegister) {
+ Frame.CurrentCfaRegister = Inst.getRegister();
+ }
+ }
+ }
+
+ DwarfFrameInfos.push_back(Frame);
}
void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
}
void MCStreamer::EmitCFIEndProc() {
- EnsureValidFrame();
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ EnsureValidDwarfFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
EmitCFIEndProcImpl(*CurFrame);
}
@@ -277,7 +249,7 @@ void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
}
MCSymbol *MCStreamer::EmitCFICommon() {
- EnsureValidFrame();
+ EnsureValidDwarfFrame();
MCSymbol *Label = getContext().CreateTempSymbol();
EmitLabel(Label);
return Label;
@@ -287,15 +259,16 @@ void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createDefCfa(Label, Register, Offset);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
+ CurFrame->CurrentCfaRegister = static_cast<unsigned>(Register);
}
void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createDefCfaOffset(Label, Offset);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -303,7 +276,7 @@ void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createAdjustCfaOffset(Label, Adjustment);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -311,15 +284,16 @@ void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createDefCfaRegister(Label, Register);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
+ CurFrame->CurrentCfaRegister = static_cast<unsigned>(Register);
}
void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createOffset(Label, Register, Offset);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -327,21 +301,21 @@ void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createRelOffset(Label, Register, Offset);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
unsigned Encoding) {
- EnsureValidFrame();
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ EnsureValidDwarfFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Personality = Sym;
CurFrame->PersonalityEncoding = Encoding;
}
void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
- EnsureValidFrame();
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ EnsureValidDwarfFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Lsda = Sym;
CurFrame->LsdaEncoding = Encoding;
}
@@ -349,7 +323,7 @@ void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
void MCStreamer::EmitCFIRememberState() {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction = MCCFIInstruction::createRememberState(Label);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -357,7 +331,7 @@ void MCStreamer::EmitCFIRestoreState() {
// FIXME: Error if there is no matching cfi_remember_state.
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction = MCCFIInstruction::createRestoreState(Label);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -365,7 +339,7 @@ void MCStreamer::EmitCFISameValue(int64_t Register) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createSameValue(Label, Register);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -373,20 +347,20 @@ void MCStreamer::EmitCFIRestore(int64_t Register) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createRestore(Label, Register);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIEscape(StringRef Values) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction = MCCFIInstruction::createEscape(Label, Values);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFISignalFrame() {
- EnsureValidFrame();
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ EnsureValidDwarfFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->IsSignalFrame = true;
}
@@ -394,7 +368,7 @@ void MCStreamer::EmitCFIUndefined(int64_t Register) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createUndefined(Label, Register);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -402,7 +376,7 @@ void MCStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createRegister(Label, Register1, Register2);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
@@ -410,164 +384,167 @@ void MCStreamer::EmitCFIWindowSave() {
MCSymbol *Label = EmitCFICommon();
MCCFIInstruction Instruction =
MCCFIInstruction::createWindowSave(Label);
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
- W64UnwindInfos.push_back(Frame);
- CurrentW64UnwindInfo = W64UnwindInfos.back();
-}
-
-void MCStreamer::EnsureValidW64UnwindInfo() {
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (!CurFrame || CurFrame->End)
+void MCStreamer::EnsureValidWinFrameInfo() {
+ if (!CurrentWinFrameInfo || CurrentWinFrameInfo->End)
report_fatal_error("No open Win64 EH frame function!");
}
void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) {
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (CurFrame && !CurFrame->End)
+ if (CurrentWinFrameInfo && !CurrentWinFrameInfo->End)
report_fatal_error("Starting a function before ending the previous one!");
- MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
- Frame->Begin = getContext().CreateTempSymbol();
- Frame->Function = Symbol;
- EmitLabel(Frame->Begin);
- setCurrentW64UnwindInfo(Frame);
+
+ MCSymbol *StartProc = getContext().CreateTempSymbol();
+ EmitLabel(StartProc);
+
+ WinFrameInfos.push_back(new WinEH::FrameInfo(Symbol, StartProc));
+ CurrentWinFrameInfo = WinFrameInfos.back();
}
void MCStreamer::EmitWinCFIEndProc() {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (CurFrame->ChainedParent)
+ EnsureValidWinFrameInfo();
+ if (CurrentWinFrameInfo->ChainedParent)
report_fatal_error("Not all chained regions terminated!");
- CurFrame->End = getContext().CreateTempSymbol();
- EmitLabel(CurFrame->End);
+
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ CurrentWinFrameInfo->End = Label;
}
void MCStreamer::EmitWinCFIStartChained() {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- Frame->Begin = getContext().CreateTempSymbol();
- Frame->Function = CurFrame->Function;
- Frame->ChainedParent = CurFrame;
- EmitLabel(Frame->Begin);
- setCurrentW64UnwindInfo(Frame);
+ EnsureValidWinFrameInfo();
+
+ MCSymbol *StartProc = getContext().CreateTempSymbol();
+ EmitLabel(StartProc);
+
+ WinFrameInfos.push_back(new WinEH::FrameInfo(CurrentWinFrameInfo->Function,
+ StartProc, CurrentWinFrameInfo));
+ CurrentWinFrameInfo = WinFrameInfos.back();
}
void MCStreamer::EmitWinCFIEndChained() {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (!CurFrame->ChainedParent)
+ EnsureValidWinFrameInfo();
+ if (!CurrentWinFrameInfo->ChainedParent)
report_fatal_error("End of a chained region outside a chained region!");
- CurFrame->End = getContext().CreateTempSymbol();
- EmitLabel(CurFrame->End);
- CurrentW64UnwindInfo = CurFrame->ChainedParent;
+
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+
+ CurrentWinFrameInfo->End = Label;
+ CurrentWinFrameInfo =
+ const_cast<WinEH::FrameInfo *>(CurrentWinFrameInfo->ChainedParent);
}
void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind,
bool Except) {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (CurFrame->ChainedParent)
+ EnsureValidWinFrameInfo();
+ if (CurrentWinFrameInfo->ChainedParent)
report_fatal_error("Chained unwind areas can't have handlers!");
- CurFrame->ExceptionHandler = Sym;
+ CurrentWinFrameInfo->ExceptionHandler = Sym;
if (!Except && !Unwind)
report_fatal_error("Don't know what kind of handler this is!");
if (Unwind)
- CurFrame->HandlesUnwind = true;
+ CurrentWinFrameInfo->HandlesUnwind = true;
if (Except)
- CurFrame->HandlesExceptions = true;
+ CurrentWinFrameInfo->HandlesExceptions = true;
}
void MCStreamer::EmitWinEHHandlerData() {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (CurFrame->ChainedParent)
+ EnsureValidWinFrameInfo();
+ if (CurrentWinFrameInfo->ChainedParent)
report_fatal_error("Chained unwind areas can't have handlers!");
}
void MCStreamer::EmitWinCFIPushReg(unsigned Register) {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+ EnsureValidWinFrameInfo();
+
MCSymbol *Label = getContext().CreateTempSymbol();
- MCWin64EHInstruction Inst(Win64EH::UOP_PushNonVol, Label, Register);
EmitLabel(Label);
- CurFrame->Instructions.push_back(Inst);
+
+ WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(Label, Register);
+ CurrentWinFrameInfo->Instructions.push_back(Inst);
}
void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (CurFrame->LastFrameInst >= 0)
+ EnsureValidWinFrameInfo();
+ if (CurrentWinFrameInfo->LastFrameInst >= 0)
report_fatal_error("Frame register and offset already specified!");
if (Offset & 0x0F)
report_fatal_error("Misaligned frame pointer offset!");
if (Offset > 240)
report_fatal_error("Frame offset must be less than or equal to 240!");
+
MCSymbol *Label = getContext().CreateTempSymbol();
- MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset);
EmitLabel(Label);
- CurFrame->LastFrameInst = CurFrame->Instructions.size();
- CurFrame->Instructions.push_back(Inst);
+
+ WinEH::Instruction Inst =
+ Win64EH::Instruction::SetFPReg(Label, Register, Offset);
+ CurrentWinFrameInfo->LastFrameInst = CurrentWinFrameInfo->Instructions.size();
+ CurrentWinFrameInfo->Instructions.push_back(Inst);
}
void MCStreamer::EmitWinCFIAllocStack(unsigned Size) {
- EnsureValidW64UnwindInfo();
+ EnsureValidWinFrameInfo();
if (Size == 0)
report_fatal_error("Allocation size must be non-zero!");
if (Size & 7)
report_fatal_error("Misaligned stack allocation!");
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+
MCSymbol *Label = getContext().CreateTempSymbol();
- MCWin64EHInstruction Inst(Label, Size);
EmitLabel(Label);
- CurFrame->Instructions.push_back(Inst);
+
+ WinEH::Instruction Inst = Win64EH::Instruction::Alloc(Label, Size);
+ CurrentWinFrameInfo->Instructions.push_back(Inst);
}
void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) {
- EnsureValidW64UnwindInfo();
+ EnsureValidWinFrameInfo();
if (Offset & 7)
report_fatal_error("Misaligned saved register offset!");
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+
MCSymbol *Label = getContext().CreateTempSymbol();
- MCWin64EHInstruction Inst(
- Offset > 512*1024-8 ? Win64EH::UOP_SaveNonVolBig : Win64EH::UOP_SaveNonVol,
- Label, Register, Offset);
EmitLabel(Label);
- CurFrame->Instructions.push_back(Inst);
+
+ WinEH::Instruction Inst =
+ Win64EH::Instruction::SaveNonVol(Label, Register, Offset);
+ CurrentWinFrameInfo->Instructions.push_back(Inst);
}
void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) {
- EnsureValidW64UnwindInfo();
+ EnsureValidWinFrameInfo();
if (Offset & 0x0F)
report_fatal_error("Misaligned saved vector register offset!");
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
+
MCSymbol *Label = getContext().CreateTempSymbol();
- MCWin64EHInstruction Inst(
- Offset > 512*1024-16 ? Win64EH::UOP_SaveXMM128Big : Win64EH::UOP_SaveXMM128,
- Label, Register, Offset);
EmitLabel(Label);
- CurFrame->Instructions.push_back(Inst);
+
+ WinEH::Instruction Inst =
+ Win64EH::Instruction::SaveXMM(Label, Register, Offset);
+ CurrentWinFrameInfo->Instructions.push_back(Inst);
}
void MCStreamer::EmitWinCFIPushFrame(bool Code) {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- if (CurFrame->Instructions.size() > 0)
+ EnsureValidWinFrameInfo();
+ if (CurrentWinFrameInfo->Instructions.size() > 0)
report_fatal_error("If present, PushMachFrame must be the first UOP");
+
MCSymbol *Label = getContext().CreateTempSymbol();
- MCWin64EHInstruction Inst(Win64EH::UOP_PushMachFrame, Label, Code);
EmitLabel(Label);
- CurFrame->Instructions.push_back(Inst);
+
+ WinEH::Instruction Inst = Win64EH::Instruction::PushMachFrame(Label, Code);
+ CurrentWinFrameInfo->Instructions.push_back(Inst);
}
void MCStreamer::EmitWinCFIEndProlog() {
- EnsureValidW64UnwindInfo();
- MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
- CurFrame->PrologEnd = getContext().CreateTempSymbol();
- EmitLabel(CurFrame->PrologEnd);
+ EnsureValidWinFrameInfo();
+
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+
+ CurrentWinFrameInfo->PrologEnd = Label;
}
void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
@@ -590,15 +567,11 @@ void MCStreamer::EmitRawText(const Twine &T) {
EmitRawTextImpl(T.toStringRef(Str));
}
-void MCStreamer::EmitW64Tables() {
- if (!getNumW64UnwindInfos())
- return;
-
- MCWin64EHUnwindEmitter::Emit(*this);
+void MCStreamer::EmitWindowsUnwindTables() {
}
void MCStreamer::Finish() {
- if (!FrameInfos.empty() && !FrameInfos.back().End)
+ if (!DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End)
report_fatal_error("Unfinished frame!");
MCTargetStreamer *TS = getTargetStreamer();
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 4424c91..b8e42bd 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -17,8 +17,6 @@
using namespace llvm;
-MCSchedModel MCSchedModel::DefaultSchedModel; // For unknown processors.
-
/// InitMCProcessorInfo - Set or change the CPU (optionally supplemented
/// with feature string). Recompute feature bits and scheduling model.
void
@@ -33,7 +31,7 @@ MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) {
if (!CPU.empty())
CPUSchedModel = getSchedModelForCPU(CPU);
else
- CPUSchedModel = &MCSchedModel::DefaultSchedModel;
+ CPUSchedModel = MCSchedModel::GetDefaultSchedModel();
}
void
@@ -78,7 +76,7 @@ uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
}
-const MCSchedModel *
+MCSchedModel
MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
assert(ProcSchedModels && "Processor machine model not available!");
@@ -97,15 +95,15 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
errs() << "'" << CPU
<< "' is not a recognized processor for this target"
<< " (ignoring processor)\n";
- return &MCSchedModel::DefaultSchedModel;
+ return MCSchedModel::GetDefaultSchedModel();
}
assert(Found->Value && "Missing processor SchedModel value");
- return (const MCSchedModel *)Found->Value;
+ return *(const MCSchedModel *)Found->Value;
}
InstrItineraryData
MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
- const MCSchedModel *SchedModel = getSchedModelForCPU(CPU);
+ const MCSchedModel SchedModel = getSchedModelForCPU(CPU);
return InstrItineraryData(SchedModel, Stages, OperandCycles, ForwardingPaths);
}
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index efd724a..3093ba2 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -13,8 +13,8 @@ namespace llvm {
MCTargetOptions::MCTargetOptions()
: SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
- MCSaveTempLabels(false), MCUseDwarfDirectory(false),
- ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
- DwarfVersion(0) {}
+ MCFatalWarnings(false), MCSaveTempLabels(false),
+ MCUseDwarfDirectory(false), ShowMCEncoding(false), ShowMCInst(false),
+ AsmVerbose(false), DwarfVersion(0) {}
} // end namespace llvm
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index bb651647..dfadb3c 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -15,15 +15,16 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Win64EH.h"
namespace llvm {
// NOTE: All relocations generated here are 4-byte image-relative.
-static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &Insns) {
+static uint8_t CountOfUnwindCodes(std::vector<WinEH::Instruction> &Insns) {
uint8_t Count = 0;
for (const auto &I : Insns) {
- switch (I.getOperation()) {
+ switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
case Win64EH::UOP_PushNonVol:
case Win64EH::UOP_AllocSmall:
case Win64EH::UOP_SetFPReg:
@@ -39,86 +40,83 @@ static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &Insns) {
Count += 3;
break;
case Win64EH::UOP_AllocLarge:
- Count += (I.getSize() > 512 * 1024 - 8) ? 3 : 2;
+ Count += (I.Offset > 512 * 1024 - 8) ? 3 : 2;
break;
}
}
return Count;
}
-static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs,
- MCSymbol *rhs) {
- MCContext &context = streamer.getContext();
- const MCExpr *diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(
- lhs, context),
- MCSymbolRefExpr::Create(
- rhs, context),
- context);
- streamer.EmitAbsValue(diff, 1);
-
+static void EmitAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
+ const MCSymbol *RHS) {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Diff =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LHS, Context),
+ MCSymbolRefExpr::Create(RHS, Context), Context);
+ Streamer.EmitValue(Diff, 1);
}
-static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin,
- MCWin64EHInstruction &inst) {
+static void EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
+ WinEH::Instruction &inst) {
uint8_t b2;
uint16_t w;
- b2 = (inst.getOperation() & 0x0F);
- switch (inst.getOperation()) {
+ b2 = (inst.Operation & 0x0F);
+ switch (static_cast<Win64EH::UnwindOpcodes>(inst.Operation)) {
case Win64EH::UOP_PushNonVol:
- EmitAbsDifference(streamer, inst.getLabel(), begin);
- b2 |= (inst.getRegister() & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.Label, begin);
+ b2 |= (inst.Register & 0x0F) << 4;
streamer.EmitIntValue(b2, 1);
break;
case Win64EH::UOP_AllocLarge:
- EmitAbsDifference(streamer, inst.getLabel(), begin);
- if (inst.getSize() > 512*1024-8) {
+ EmitAbsDifference(streamer, inst.Label, begin);
+ if (inst.Offset > 512 * 1024 - 8) {
b2 |= 0x10;
streamer.EmitIntValue(b2, 1);
- w = inst.getSize() & 0xFFF8;
+ w = inst.Offset & 0xFFF8;
streamer.EmitIntValue(w, 2);
- w = inst.getSize() >> 16;
+ w = inst.Offset >> 16;
} else {
streamer.EmitIntValue(b2, 1);
- w = inst.getSize() >> 3;
+ w = inst.Offset >> 3;
}
streamer.EmitIntValue(w, 2);
break;
case Win64EH::UOP_AllocSmall:
- b2 |= (((inst.getSize()-8) >> 3) & 0x0F) << 4;
- EmitAbsDifference(streamer, inst.getLabel(), begin);
+ b2 |= (((inst.Offset - 8) >> 3) & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.Label, begin);
streamer.EmitIntValue(b2, 1);
break;
case Win64EH::UOP_SetFPReg:
- EmitAbsDifference(streamer, inst.getLabel(), begin);
+ EmitAbsDifference(streamer, inst.Label, begin);
streamer.EmitIntValue(b2, 1);
break;
case Win64EH::UOP_SaveNonVol:
case Win64EH::UOP_SaveXMM128:
- b2 |= (inst.getRegister() & 0x0F) << 4;
- EmitAbsDifference(streamer, inst.getLabel(), begin);
+ b2 |= (inst.Register & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.Label, begin);
streamer.EmitIntValue(b2, 1);
- w = inst.getOffset() >> 3;
- if (inst.getOperation() == Win64EH::UOP_SaveXMM128)
+ w = inst.Offset >> 3;
+ if (inst.Operation == Win64EH::UOP_SaveXMM128)
w >>= 1;
streamer.EmitIntValue(w, 2);
break;
case Win64EH::UOP_SaveNonVolBig:
case Win64EH::UOP_SaveXMM128Big:
- b2 |= (inst.getRegister() & 0x0F) << 4;
- EmitAbsDifference(streamer, inst.getLabel(), begin);
+ b2 |= (inst.Register & 0x0F) << 4;
+ EmitAbsDifference(streamer, inst.Label, begin);
streamer.EmitIntValue(b2, 1);
- if (inst.getOperation() == Win64EH::UOP_SaveXMM128Big)
- w = inst.getOffset() & 0xFFF0;
+ if (inst.Operation == Win64EH::UOP_SaveXMM128Big)
+ w = inst.Offset & 0xFFF0;
else
- w = inst.getOffset() & 0xFFF8;
+ w = inst.Offset & 0xFFF8;
streamer.EmitIntValue(w, 2);
- w = inst.getOffset() >> 16;
+ w = inst.Offset >> 16;
streamer.EmitIntValue(w, 2);
break;
case Win64EH::UOP_PushMachFrame:
- if (inst.isPushCodeFrame())
+ if (inst.Offset == 1)
b2 |= 0x10;
- EmitAbsDifference(streamer, inst.getLabel(), begin);
+ EmitAbsDifference(streamer, inst.Label, begin);
streamer.EmitIntValue(b2, 1);
break;
}
@@ -138,7 +136,7 @@ static void EmitSymbolRefWithOfs(MCStreamer &streamer,
}
static void EmitRuntimeFunction(MCStreamer &streamer,
- const MCWin64EHUnwindInfo *info) {
+ const WinEH::FrameInfo *info) {
MCContext &context = streamer.getContext();
streamer.EmitValueToAlignment(4);
@@ -149,14 +147,17 @@ static void EmitRuntimeFunction(MCStreamer &streamer,
context), 4);
}
-static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
+static void EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
// If this UNWIND_INFO already has a symbol, it's already been emitted.
- if (info->Symbol) return;
+ if (info->Symbol)
+ return;
MCContext &context = streamer.getContext();
+ MCSymbol *Label = context.CreateTempSymbol();
+
streamer.EmitValueToAlignment(4);
- info->Symbol = context.CreateTempSymbol();
- streamer.EmitLabel(info->Symbol);
+ streamer.EmitLabel(Label);
+ info->Symbol = Label;
// Upper 3 bits are the version number (currently 1).
uint8_t flags = 0x01;
@@ -180,17 +181,16 @@ static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
uint8_t frame = 0;
if (info->LastFrameInst >= 0) {
- MCWin64EHInstruction &frameInst = info->Instructions[info->LastFrameInst];
- assert(frameInst.getOperation() == Win64EH::UOP_SetFPReg);
- frame = (frameInst.getRegister() & 0x0F) |
- (frameInst.getOffset() & 0xF0);
+ WinEH::Instruction &frameInst = info->Instructions[info->LastFrameInst];
+ assert(frameInst.Operation == Win64EH::UOP_SetFPReg);
+ frame = (frameInst.Register & 0x0F) | (frameInst.Offset & 0xF0);
}
streamer.EmitIntValue(frame, 1);
// Emit unwind instructions (in reverse order).
uint8_t numInst = info->Instructions.size();
for (uint8_t c = 0; c < numInst; ++c) {
- MCWin64EHInstruction inst = info->Instructions.back();
+ WinEH::Instruction inst = info->Instructions.back();
info->Instructions.pop_back();
EmitUnwindCode(streamer, info->Begin, inst);
}
@@ -218,77 +218,38 @@ static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) {
}
}
-StringRef MCWin64EHUnwindEmitter::GetSectionSuffix(const MCSymbol *func) {
- if (!func || !func->isInSection()) return "";
- const MCSection *section = &func->getSection();
- const MCSectionCOFF *COFFSection;
- if ((COFFSection = dyn_cast<MCSectionCOFF>(section))) {
- StringRef name = COFFSection->getSectionName();
- size_t dollar = name.find('$');
- size_t dot = name.find('.', 1);
- if (dollar == StringRef::npos && dot == StringRef::npos)
- return "";
- if (dot == StringRef::npos)
- return name.substr(dollar);
- if (dollar == StringRef::npos || dot < dollar)
- return name.substr(dot);
- return name.substr(dollar);
- }
- return "";
-}
-
-static const MCSection *getWin64EHTableSection(StringRef suffix,
- MCContext &context) {
- if (suffix == "")
- return context.getObjectFileInfo()->getXDataSection();
-
- return context.getCOFFSection((".xdata"+suffix).str(),
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getDataRel());
-}
-
-static const MCSection *getWin64EHFuncTableSection(StringRef suffix,
- MCContext &context) {
- if (suffix == "")
- return context.getObjectFileInfo()->getPDataSection();
- return context.getCOFFSection((".pdata"+suffix).str(),
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getDataRel());
-}
-
-void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
- MCWin64EHUnwindInfo *info) {
- // Switch sections (the static function above is meant to be called from
- // here and from Emit().
- MCContext &context = streamer.getContext();
- const MCSection *xdataSect =
- getWin64EHTableSection(GetSectionSuffix(info->Function), context);
- streamer.SwitchSection(xdataSect);
-
- llvm::EmitUnwindInfo(streamer, info);
-}
-
-void MCWin64EHUnwindEmitter::Emit(MCStreamer &Streamer) {
+namespace Win64EH {
+void UnwindEmitter::Emit(MCStreamer &Streamer) const {
MCContext &Context = Streamer.getContext();
// Emit the unwind info structs first.
- for (const auto &CFI : Streamer.getW64UnwindInfos()) {
+ for (const auto &CFI : Streamer.getWinFrameInfos()) {
const MCSection *XData =
- getWin64EHTableSection(GetSectionSuffix(CFI->Function), Context);
+ getXDataSection(CFI->Function, Context);
Streamer.SwitchSection(XData);
EmitUnwindInfo(Streamer, CFI);
}
// Now emit RUNTIME_FUNCTION entries.
- for (const auto &CFI : Streamer.getW64UnwindInfos()) {
+ for (const auto &CFI : Streamer.getWinFrameInfos()) {
const MCSection *PData =
- getWin64EHFuncTableSection(GetSectionSuffix(CFI->Function), Context);
+ getPDataSection(CFI->Function, Context);
Streamer.SwitchSection(PData);
EmitRuntimeFunction(Streamer, CFI);
}
}
+void UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
+ WinEH::FrameInfo *info) const {
+ // Switch sections (the static function above is meant to be called from
+ // here and from Emit().
+ MCContext &context = Streamer.getContext();
+ const MCSection *xdataSect =
+ getXDataSection(info->Function, context);
+ Streamer.SwitchSection(xdataSect);
+
+ llvm::EmitUnwindInfo(Streamer, info);
+}
+}
} // End of namespace llvm
diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp
new file mode 100644
index 0000000..f0c354f
--- /dev/null
+++ b/lib/MC/MCWinEH.cpp
@@ -0,0 +1,84 @@
+//===- lib/MC/MCWinEH.cpp - Windows EH implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCWinEH.h"
+#include "llvm/Support/COFF.h"
+
+namespace llvm {
+namespace WinEH {
+static StringRef getSectionSuffix(const MCSymbol *Function) {
+ if (!Function || !Function->isInSection())
+ return "";
+
+ const MCSection *FunctionSection = &Function->getSection();
+ if (const auto Section = dyn_cast<MCSectionCOFF>(FunctionSection)) {
+ StringRef Name = Section->getSectionName();
+ size_t Dollar = Name.find('$');
+ size_t Dot = Name.find('.', 1);
+
+ if (Dollar == StringRef::npos && Dot == StringRef::npos)
+ return "";
+ if (Dot == StringRef::npos)
+ return Name.substr(Dollar);
+ if (Dollar == StringRef::npos || Dot < Dollar)
+ return Name.substr(Dot);
+
+ return Name.substr(Dollar);
+ }
+
+ return "";
+}
+
+static const MCSection *getUnwindInfoSection(
+ StringRef SecName, const MCSectionCOFF *UnwindSec, const MCSymbol *Function,
+ MCContext &Context) {
+ // If Function is in a COMDAT, get or create an unwind info section in that
+ // COMDAT group.
+ if (Function && Function->isInSection()) {
+ const MCSectionCOFF *FunctionSection =
+ cast<MCSectionCOFF>(&Function->getSection());
+ if (FunctionSection->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
+ return Context.getAssociativeCOFFSection(
+ UnwindSec, FunctionSection->getCOMDATSymbol());
+ }
+ }
+
+ // If Function is in a section other than .text, create a new .pdata section.
+ // Otherwise use the plain .pdata section.
+ StringRef Suffix = getSectionSuffix(Function);
+ if (Suffix.empty())
+ return UnwindSec;
+ return Context.getCOFFSection((SecName + Suffix).str(),
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getDataRel());
+}
+
+const MCSection *UnwindEmitter::getPDataSection(const MCSymbol *Function,
+ MCContext &Context) {
+ const MCSectionCOFF *PData =
+ cast<MCSectionCOFF>(Context.getObjectFileInfo()->getPDataSection());
+ return getUnwindInfoSection(".pdata", PData, Function, Context);
+}
+
+const MCSection *UnwindEmitter::getXDataSection(const MCSymbol *Function,
+ MCContext &Context) {
+ const MCSectionCOFF *XData =
+ cast<MCSectionCOFF>(Context.getObjectFileInfo()->getXDataSection());
+ return getUnwindInfoSection(".xdata", XData, Function, Context);
+}
+
+}
+}
+
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 5214398..577c4b7 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -41,7 +41,7 @@ void MachObjectWriter::reset() {
bool MachObjectWriter::
doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
// Undefined symbols are always extern.
- if (SD->Symbol->isUndefined())
+ if (SD->getSymbol().isUndefined())
return true;
// References to weak definitions require external relocation entries; the
@@ -84,7 +84,7 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
MCValue Target;
- if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout))
+ if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr))
report_fatal_error("unable to evaluate offset for variable '" +
S.getName() + "'");
@@ -525,15 +525,10 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
}
/// ComputeSymbolTable - Compute the symbol table data
-///
-/// \param StringTable [out] - The string table data.
-/// \param StringIndexMap [out] - Map from symbol names to offsets in the
-/// string table.
-void MachObjectWriter::
-ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
- std::vector<MachSymbolData> &LocalSymbolData,
- std::vector<MachSymbolData> &ExternalSymbolData,
- std::vector<MachSymbolData> &UndefinedSymbolData) {
+void MachObjectWriter::ComputeSymbolTable(
+ MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
+ std::vector<MachSymbolData> &ExternalSymbolData,
+ std::vector<MachSymbolData> &UndefinedSymbolData) {
// Build section lookup table.
DenseMap<const MCSection*, uint8_t> SectionIndexMap;
unsigned Index = 1;
@@ -542,37 +537,34 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
SectionIndexMap[&it->getSection()] = Index;
assert(Index <= 256 && "Too many sections!");
- // Index 0 is always the empty string.
- StringMap<uint64_t> StringIndexMap;
- StringTable += '\x00';
+ // Build the string table.
+ for (MCSymbolData &SD : Asm.symbols()) {
+ const MCSymbol &Symbol = SD.getSymbol();
+ if (!Asm.isSymbolLinkerVisible(Symbol))
+ continue;
+
+ StringTable.add(Symbol.getName());
+ }
+ StringTable.finalize(StringTableBuilder::MachO);
- // Build the symbol arrays and the string table, but only for non-local
- // symbols.
+ // Build the symbol arrays but only for non-local symbols.
//
- // The particular order that we collect the symbols and create the string
- // table, then sort the symbols is chosen to match 'as'. Even though it
- // doesn't matter for correctness, this is important for letting us diff .o
- // files.
+ // The particular order that we collect and then sort the symbols is chosen to
+ // match 'as'. Even though it doesn't matter for correctness, this is
+ // important for letting us diff .o files.
for (MCSymbolData &SD : Asm.symbols()) {
const MCSymbol &Symbol = SD.getSymbol();
// Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(SD.getSymbol()))
+ if (!Asm.isSymbolLinkerVisible(Symbol))
continue;
if (!SD.isExternal() && !Symbol.isUndefined())
continue;
- uint64_t &Entry = StringIndexMap[Symbol.getName()];
- if (!Entry) {
- Entry = StringTable.size();
- StringTable += Symbol.getName();
- StringTable += '\x00';
- }
-
MachSymbolData MSD;
MSD.SymbolData = &SD;
- MSD.StringIndex = Entry;
+ MSD.StringIndex = StringTable.getOffset(Symbol.getName());
if (Symbol.isUndefined()) {
MSD.SectionIndex = 0;
@@ -592,22 +584,15 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
const MCSymbol &Symbol = SD.getSymbol();
// Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(SD.getSymbol()))
+ if (!Asm.isSymbolLinkerVisible(Symbol))
continue;
if (SD.isExternal() || Symbol.isUndefined())
continue;
- uint64_t &Entry = StringIndexMap[Symbol.getName()];
- if (!Entry) {
- Entry = StringTable.size();
- StringTable += Symbol.getName();
- StringTable += '\x00';
- }
-
MachSymbolData MSD;
MSD.SymbolData = &SD;
- MSD.StringIndex = Entry;
+ MSD.StringIndex = StringTable.getOffset(Symbol.getName());
if (Symbol.isAbsolute()) {
MSD.SectionIndex = 0;
@@ -631,10 +616,6 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
ExternalSymbolData[i].SymbolData->setIndex(Index++);
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
UndefinedSymbolData[i].SymbolData->setIndex(Index++);
-
- // The string table is padded to a multiple of 4.
- while (StringTable.size() % 4)
- StringTable += '\x00';
}
void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
@@ -664,7 +645,7 @@ void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm,
// and neither symbol is external, mark the variable as absolute.
const MCExpr *Expr = SD.getSymbol().getVariableValue();
MCValue Value;
- if (Expr->EvaluateAsRelocatable(Value, &Layout)) {
+ if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) {
if (Value.getSymA() && Value.getSymB())
const_cast<MCSymbol*>(&SD.getSymbol())->setAbsolute();
}
@@ -683,7 +664,7 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
markAbsoluteVariableSymbols(Asm, Layout);
// Compute symbol table information and bind symbol indices.
- ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+ ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
UndefinedSymbolData);
}
@@ -745,6 +726,10 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
return false;
}
+ // If they are not in the same section, we can't compute the diff.
+ if (&SecA != &SecB)
+ return false;
+
const MCFragment *FA = Asm.getSymbolData(SA).getFragment();
// Bail if the symbol has no fragment.
@@ -752,12 +737,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
return false;
A_Base = FA->getAtom();
- if (!A_Base)
- return false;
-
B_Base = FB.getAtom();
- if (!B_Base)
- return false;
// If the atoms are the same, they are guaranteed to have the same address.
if (A_Base == B_Base)
@@ -922,7 +902,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
sizeof(MachO::nlist_64) :
sizeof(MachO::nlist));
WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
- StringTableOffset, StringTable.size());
+ StringTableOffset, StringTable.data().size());
WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
FirstExternalSymbol, NumExternalSymbols,
@@ -1028,7 +1008,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
WriteNlist(UndefinedSymbolData[i], Layout);
// Write the string table.
- OS << StringTable.str();
+ OS << StringTable.data();
}
}
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
index a10f17e..bf8b7c0 100644
--- a/lib/MC/Makefile
+++ b/lib/MC/Makefile
@@ -10,7 +10,7 @@
LEVEL = ../..
LIBRARYNAME = LLVMMC
BUILD_ARCHIVE := 1
-PARALLEL_DIRS := MCAnalysis MCParser MCDisassembler
+PARALLEL_DIRS := MCParser MCDisassembler
include $(LEVEL)/Makefile.common
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index db58ece..9de9363 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -9,6 +9,8 @@
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Endian.h"
using namespace llvm;
@@ -25,19 +27,32 @@ static bool compareBySuffix(StringRef a, StringRef b) {
return sizeA > sizeB;
}
-void StringTableBuilder::finalize() {
+void StringTableBuilder::finalize(Kind kind) {
SmallVector<StringRef, 8> Strings;
+ Strings.reserve(StringIndexMap.size());
+
for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i)
Strings.push_back(i->getKey());
std::sort(Strings.begin(), Strings.end(), compareBySuffix);
- // FIXME: Starting with a null byte is ELF specific. Generalize this so we
- // can use the class with other object formats.
- StringTable += '\x00';
+ switch (kind) {
+ case ELF:
+ case MachO:
+ // Start the table with a NUL byte.
+ StringTable += '\x00';
+ break;
+ case WinCOFF:
+ // Make room to write the table size later.
+ StringTable.append(4, '\x00');
+ break;
+ }
StringRef Previous;
for (StringRef s : Strings) {
+ if (kind == WinCOFF)
+ assert(s.size() > COFF::NameSize && "Short string in COFF string table!");
+
if (Previous.endswith(s)) {
StringIndexMap[s] = StringTable.size() - 1 - s.size();
continue;
@@ -48,4 +63,26 @@ void StringTableBuilder::finalize() {
StringTable += '\x00';
Previous = s;
}
+
+ switch (kind) {
+ case ELF:
+ break;
+ case MachO:
+ // Pad to multiple of 4.
+ while (StringTable.size() % 4)
+ StringTable += '\x00';
+ break;
+ case WinCOFF:
+ // Write the table size in the first word.
+ assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
+ uint32_t size = static_cast<uint32_t>(StringTable.size());
+ support::endian::write<uint32_t, support::little, support::unaligned>(
+ StringTable.data(), size);
+ break;
+ }
+}
+
+void StringTableBuilder::clear() {
+ StringTable.clear();
+ StringIndexMap.clear();
}
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 27525c7..587be54 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
/// hasFlag - Determine if a feature has a flag; '+' or '-'
///
-static inline bool hasFlag(const StringRef Feature) {
+static inline bool hasFlag(StringRef Feature) {
assert(!Feature.empty() && "Empty string");
// Get first character
char Ch = Feature[0];
@@ -37,13 +37,13 @@ static inline bool hasFlag(const StringRef Feature) {
/// StripFlag - Return string stripped of flag.
///
-static inline std::string StripFlag(const StringRef Feature) {
+static inline std::string StripFlag(StringRef Feature) {
return hasFlag(Feature) ? Feature.substr(1) : Feature;
}
/// isEnabled - Return true if enable flag; '+'.
///
-static inline bool isEnabled(const StringRef Feature) {
+static inline bool isEnabled(StringRef Feature) {
assert(!Feature.empty() && "Empty string");
// Get first character
char Ch = Feature[0];
@@ -53,8 +53,8 @@ static inline bool isEnabled(const StringRef Feature) {
/// Split - Splits a string of comma separated items in to a vector of strings.
///
-static void Split(std::vector<std::string> &V, const StringRef S) {
- SmallVector<StringRef, 2> Tmp;
+static void Split(std::vector<std::string> &V, StringRef S) {
+ SmallVector<StringRef, 3> Tmp;
S.split(Tmp, ",", -1, false /* KeepEmpty */);
V.assign(Tmp.begin(), Tmp.end());
}
@@ -81,7 +81,7 @@ static std::string Join(const std::vector<std::string> &V) {
}
/// Adding features.
-void SubtargetFeatures::AddFeature(const StringRef String) {
+void SubtargetFeatures::AddFeature(StringRef String) {
// Don't add empty features or features we already have.
if (!String.empty())
// Convert to lowercase, prepend flag if we don't already have a flag.
@@ -136,7 +136,7 @@ static void Help(ArrayRef<SubtargetFeatureKV> CPUTable,
// SubtargetFeatures Implementation
//===----------------------------------------------------------------------===//
-SubtargetFeatures::SubtargetFeatures(const StringRef Initial) {
+SubtargetFeatures::SubtargetFeatures(StringRef Initial) {
// Break up string into separate features
Split(Features, Initial);
}
@@ -181,7 +181,7 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
/// ToggleFeature - Toggle a feature and returns the newly updated feature
/// bits.
uint64_t
-SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
+SubtargetFeatures::ToggleFeature(uint64_t Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
// Find feature in table.
@@ -213,7 +213,7 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
/// getFeatureBits - Get feature bits a CPU.
///
uint64_t
-SubtargetFeatures::getFeatureBits(const StringRef CPU,
+SubtargetFeatures::getFeatureBits(StringRef CPU,
ArrayRef<SubtargetFeatureKV> CPUTable,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index a462c0d..1046e04 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -26,8 +26,10 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TimeValue.h"
#include <cstdio>
@@ -71,7 +73,6 @@ public:
MCSymbolData const *MCData;
COFFSymbol(StringRef name);
- size_t size() const;
void set_name_offset(uint32_t Offset);
bool should_keep() const;
@@ -102,20 +103,6 @@ public:
static size_t size();
};
-// This class holds the COFF string table.
-class StringTable {
- typedef StringMap<size_t> map;
- map Map;
-
- void update_length();
-public:
- std::vector<char> Data;
-
- StringTable();
- size_t size() const;
- size_t insert(StringRef String);
-};
-
class WinCOFFObjectWriter : public MCObjectWriter {
public:
@@ -131,13 +118,26 @@ public:
COFF::header Header;
sections Sections;
symbols Symbols;
- StringTable Strings;
+ StringTableBuilder Strings;
// Maps used during object file creation.
section_map SectionMap;
symbol_map SymbolMap;
+ bool UseBigObj;
+
WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
+
+ void reset() override {
+ memset(&Header, 0, sizeof(Header));
+ Header.Machine = TargetObjectWriter->getMachine();
+ Sections.clear();
+ Symbols.clear();
+ Strings.clear();
+ SectionMap.clear();
+ SymbolMap.clear();
+ MCObjectWriter::reset();
+ }
COFFSymbol *createSymbol(StringRef Name);
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
@@ -150,10 +150,10 @@ public:
void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler,
const MCAsmLayout &Layout);
- void MakeSymbolReal(COFFSymbol &S, size_t Index);
- void MakeSectionReal(COFFSection &S, size_t Number);
+ void SetSymbolName(COFFSymbol &S);
+ void SetSectionName(COFFSection &S);
- bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+ bool ExportSymbol(const MCSymbol &Symbol, MCAssembler &Asm);
bool IsPhysicalSection(COFFSection *S);
@@ -170,6 +170,11 @@ public:
void ExecutePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
+ bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB, bool InSet,
+ bool IsPCRel) const override;
+
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
@@ -179,12 +184,9 @@ public:
};
}
-static inline void write_uint32_le(void *Data, uint32_t const &Value) {
- uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
- Ptr[0] = (Value & 0x000000FF) >> 0;
- Ptr[1] = (Value & 0x0000FF00) >> 8;
- Ptr[2] = (Value & 0x00FF0000) >> 16;
- Ptr[3] = (Value & 0xFF000000) >> 24;
+static inline void write_uint32_le(void *Data, uint32_t Value) {
+ support::endian::write<uint32_t, support::little, support::unaligned>(Data,
+ Value);
}
//------------------------------------------------------------------------------
@@ -199,10 +201,6 @@ COFFSymbol::COFFSymbol(StringRef name)
memset(&Data, 0, sizeof(Data));
}
-size_t COFFSymbol::size() const {
- return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize);
-}
-
// In the case that the name does not fit within 8 bytes, the offset
// into the string table is stored in the last 4 bytes instead, leaving
// the first 4 bytes as 0.
@@ -254,55 +252,11 @@ size_t COFFSection::size() {
}
//------------------------------------------------------------------------------
-// StringTable class implementation
-
-/// Write the length of the string table into Data.
-/// The length of the string table includes uint32 length header.
-void StringTable::update_length() {
- write_uint32_le(&Data.front(), Data.size());
-}
-
-StringTable::StringTable() {
- // The string table data begins with the length of the entire string table
- // including the length header. Allocate space for this header.
- Data.resize(4);
- update_length();
-}
-
-size_t StringTable::size() const {
- return Data.size();
-}
-
-/// Add String to the table iff it is not already there.
-/// @returns the index into the string table where the string is now located.
-size_t StringTable::insert(StringRef String) {
- map::iterator i = Map.find(String);
-
- if (i != Map.end())
- return i->second;
-
- size_t Offset = Data.size();
-
- // Insert string data into string table.
- Data.insert(Data.end(), String.begin(), String.end());
- Data.push_back('\0');
-
- // Put a reference to it in the map.
- Map[String] = Offset;
-
- // Update the internal length field.
- update_length();
-
- return Offset;
-}
-
-//------------------------------------------------------------------------------
// WinCOFFObjectWriter class implementation
WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
raw_ostream &OS)
- : MCObjectWriter(OS, true)
- , TargetObjectWriter(MOTW) {
+ : MCObjectWriter(OS, true), TargetObjectWriter(MOTW) {
memset(&Header, 0, sizeof(Header));
Header.Machine = TargetObjectWriter->getMachine();
@@ -456,19 +410,22 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
// If no storage class was specified in the streamer, define it here.
if (coff_symbol->Data.StorageClass == 0) {
- bool external = ResSymData.isExternal() || !ResSymData.Fragment;
+ bool IsExternal =
+ ResSymData.isExternal() ||
+ (!ResSymData.getFragment() && !ResSymData.getSymbol().isVariable());
- coff_symbol->Data.StorageClass =
- external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+ coff_symbol->Data.StorageClass = IsExternal
+ ? COFF::IMAGE_SYM_CLASS_EXTERNAL
+ : COFF::IMAGE_SYM_CLASS_STATIC;
}
if (!Base) {
coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
} else {
const MCSymbolData &BaseData = Assembler.getSymbolData(*Base);
- if (BaseData.Fragment) {
+ if (BaseData.getFragment()) {
COFFSection *Sec =
- SectionMap[&BaseData.Fragment->getParent()->getSection()];
+ SectionMap[&BaseData.getFragment()->getParent()->getSection()];
if (coff_symbol->Section && coff_symbol->Section != Sec)
report_fatal_error("conflicting sections for symbol");
@@ -508,11 +465,9 @@ static void encodeBase64StringEntry(char* Buffer, uint64_t Value) {
}
}
-/// making a section real involves assigned it a number and putting
-/// name into the string table if needed
-void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
+void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
if (S.Name.size() > COFF::NameSize) {
- uint64_t StringTableEntry = Strings.insert(S.Name.c_str());
+ uint64_t StringTableEntry = Strings.getOffset(S.Name);
if (StringTableEntry <= Max6DecimalOffset) {
std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry));
@@ -530,32 +485,33 @@ void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
}
} else
std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
-
- S.Number = Number;
- S.Symbol->Data.SectionNumber = S.Number;
- S.Symbol->Aux[0].Aux.SectionDefinition.Number = S.Number;
}
-void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
- if (S.Name.size() > COFF::NameSize) {
- size_t StringTableEntry = Strings.insert(S.Name.c_str());
-
- S.set_name_offset(StringTableEntry);
- } else
+void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
+ if (S.Name.size() > COFF::NameSize)
+ S.set_name_offset(Strings.getOffset(S.Name));
+ else
std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
- S.Index = Index;
}
-bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
+bool WinCOFFObjectWriter::ExportSymbol(const MCSymbol &Symbol,
MCAssembler &Asm) {
// This doesn't seem to be right. Strings referred to from the .data section
// need symbols so they can be linked to code in the .text section right?
- // return Asm.isSymbolLinkerVisible(SymbolData.getSymbol());
+ // return Asm.isSymbolLinkerVisible(Symbol);
+
+ // Non-temporary labels should always be visible to the linker.
+ if (!Symbol.isTemporary())
+ return true;
+
+ // Absolute temporary labels are never visible.
+ if (!Symbol.isInSection())
+ return false;
// For now, all non-variable symbols are exported,
// the linker will sort the rest out for us.
- return SymbolData.isExternal() || !SymbolData.getSymbol().isVariable();
+ return !Symbol.isVariable();
}
bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
@@ -567,19 +523,39 @@ bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
// entity writing methods
void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
- WriteLE16(Header.Machine);
- WriteLE16(Header.NumberOfSections);
- WriteLE32(Header.TimeDateStamp);
- WriteLE32(Header.PointerToSymbolTable);
- WriteLE32(Header.NumberOfSymbols);
- WriteLE16(Header.SizeOfOptionalHeader);
- WriteLE16(Header.Characteristics);
+ if (UseBigObj) {
+ WriteLE16(COFF::IMAGE_FILE_MACHINE_UNKNOWN);
+ WriteLE16(0xFFFF);
+ WriteLE16(COFF::BigObjHeader::MinBigObjectVersion);
+ WriteLE16(Header.Machine);
+ WriteLE32(Header.TimeDateStamp);
+ for (uint8_t MagicChar : COFF::BigObjMagic)
+ Write8(MagicChar);
+ WriteLE32(0);
+ WriteLE32(0);
+ WriteLE32(0);
+ WriteLE32(0);
+ WriteLE32(Header.NumberOfSections);
+ WriteLE32(Header.PointerToSymbolTable);
+ WriteLE32(Header.NumberOfSymbols);
+ } else {
+ WriteLE16(Header.Machine);
+ WriteLE16(static_cast<int16_t>(Header.NumberOfSections));
+ WriteLE32(Header.TimeDateStamp);
+ WriteLE32(Header.PointerToSymbolTable);
+ WriteLE32(Header.NumberOfSymbols);
+ WriteLE16(Header.SizeOfOptionalHeader);
+ WriteLE16(Header.Characteristics);
+ }
}
void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) {
WriteBytes(StringRef(S.Data.Name, COFF::NameSize));
WriteLE32(S.Data.Value);
- WriteLE16(S.Data.SectionNumber);
+ if (UseBigObj)
+ WriteLE32(S.Data.SectionNumber);
+ else
+ WriteLE16(static_cast<int16_t>(S.Data.SectionNumber));
WriteLE16(S.Data.Type);
Write8(S.Data.StorageClass);
Write8(S.Data.NumberOfAuxSymbols);
@@ -597,6 +573,8 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
WriteZeros(sizeof(i->Aux.FunctionDefinition.unused));
+ if (UseBigObj)
+ WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
break;
case ATbfAndefSymbol:
WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1));
@@ -604,24 +582,32 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2));
WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3));
+ if (UseBigObj)
+ WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
break;
case ATWeakExternal:
WriteLE32(i->Aux.WeakExternal.TagIndex);
WriteLE32(i->Aux.WeakExternal.Characteristics);
WriteZeros(sizeof(i->Aux.WeakExternal.unused));
+ if (UseBigObj)
+ WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
break;
case ATFile:
- WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName),
- sizeof(i->Aux.File.FileName)));
+ WriteBytes(
+ StringRef(reinterpret_cast<const char *>(&i->Aux),
+ UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size));
break;
case ATSectionDefinition:
WriteLE32(i->Aux.SectionDefinition.Length);
WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
WriteLE32(i->Aux.SectionDefinition.CheckSum);
- WriteLE16(i->Aux.SectionDefinition.Number);
+ WriteLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number));
Write8(i->Aux.SectionDefinition.Selection);
WriteZeros(sizeof(i->Aux.SectionDefinition.unused));
+ WriteLE16(static_cast<int16_t>(i->Aux.SectionDefinition.Number >> 16));
+ if (UseBigObj)
+ WriteZeros(COFF::Symbol32Size - COFF::Symbol16Size);
break;
}
}
@@ -654,45 +640,27 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
// "Define" each section & symbol. This creates section & symbol
// entries in the staging area.
-
- static_assert(sizeof(((COFF::AuxiliaryFile *)nullptr)->FileName) == COFF::SymbolSize,
- "size mismatch for COFF::AuxiliaryFile::FileName");
- for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end();
- FI != FE; ++FI) {
- // round up to calculate the number of auxiliary symbols required
- unsigned Count = (FI->size() + COFF::SymbolSize - 1) / COFF::SymbolSize;
-
- COFFSymbol *file = createSymbol(".file");
- file->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG;
- file->Data.StorageClass = COFF::IMAGE_SYM_CLASS_FILE;
- file->Aux.resize(Count);
-
- unsigned Offset = 0;
- unsigned Length = FI->size();
- for (auto & Aux : file->Aux) {
- Aux.AuxType = ATFile;
-
- if (Length > COFF::SymbolSize) {
- memcpy(Aux.Aux.File.FileName, FI->c_str() + Offset, COFF::SymbolSize);
- Length = Length - COFF::SymbolSize;
- } else {
- memcpy(Aux.Aux.File.FileName, FI->c_str() + Offset, Length);
- memset(&Aux.Aux.File.FileName[Length], 0, COFF::SymbolSize - Length);
- Length = 0;
- }
-
- Offset = Offset + COFF::SymbolSize;
- }
- }
-
for (const auto & Section : Asm)
DefineSection(Section);
for (MCSymbolData &SD : Asm.symbols())
- if (ExportSymbol(SD, Asm))
+ if (ExportSymbol(SD.getSymbol(), Asm))
DefineSymbol(SD, Asm, Layout);
}
+bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+ const MCAssembler &Asm, const MCSymbolData &DataA, const MCFragment &FB,
+ bool InSet, bool IsPCRel) const {
+ // MS LINK expects to be able to replace all references to a function with a
+ // thunk to implement their /INCREMENTAL feature. Make sure we don't optimize
+ // away any relocations to functions.
+ if ((((DataA.getFlags() & COFF::SF_TypeMask) >> COFF::SF_TypeShift) >>
+ COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
+ return false;
+ return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA, FB,
+ InSet, IsPCRel);
+}
+
void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -744,7 +712,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
// Offset of the symbol in the section
int64_t a = Layout.getSymbolOffset(&B_SD);
- // Ofeset of the relocation in the section
+ // Offset of the relocation in the section
int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
FixedValue = b - a;
@@ -765,8 +733,8 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
// Turn relocations for temporary symbols into section relocations.
if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) {
Reloc.Symb = coff_symbol->Section->Symbol;
- FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
- + coff_symbol->MCData->getOffset();
+ FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->getFragment()) +
+ coff_symbol->MCData->getOffset();
} else
Reloc.Symb = coff_symbol;
@@ -828,26 +796,67 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
+ size_t SectionsSize = Sections.size();
+ if (SectionsSize > static_cast<size_t>(INT32_MAX))
+ report_fatal_error(
+ "PE COFF object files can't have more than 2147483647 sections");
+
// Assign symbol and section indexes and offsets.
- Header.NumberOfSections = 0;
+ int32_t NumberOfSections = static_cast<int32_t>(SectionsSize);
- DenseMap<COFFSection *, uint16_t> SectionIndices;
- for (auto & Section : Sections) {
- size_t Number = ++Header.NumberOfSections;
+ UseBigObj = NumberOfSections > COFF::MaxNumberOfSections16;
+
+ DenseMap<COFFSection *, int32_t> SectionIndices(
+ NextPowerOf2(NumberOfSections));
+
+ // Assign section numbers.
+ size_t Number = 1;
+ for (const auto &Section : Sections) {
SectionIndices[Section.get()] = Number;
- MakeSectionReal(*Section, Number);
+ Section->Number = Number;
+ Section->Symbol->Data.SectionNumber = Number;
+ Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Number;
+ ++Number;
}
+ Header.NumberOfSections = NumberOfSections;
Header.NumberOfSymbols = 0;
- for (auto & Symbol : Symbols) {
+ for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end();
+ FI != FE; ++FI) {
+ // round up to calculate the number of auxiliary symbols required
+ unsigned SymbolSize = UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size;
+ unsigned Count = (FI->size() + SymbolSize - 1) / SymbolSize;
+
+ COFFSymbol *file = createSymbol(".file");
+ file->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG;
+ file->Data.StorageClass = COFF::IMAGE_SYM_CLASS_FILE;
+ file->Aux.resize(Count);
+
+ unsigned Offset = 0;
+ unsigned Length = FI->size();
+ for (auto & Aux : file->Aux) {
+ Aux.AuxType = ATFile;
+
+ if (Length > SymbolSize) {
+ memcpy(&Aux.Aux, FI->c_str() + Offset, SymbolSize);
+ Length = Length - SymbolSize;
+ } else {
+ memcpy(&Aux.Aux, FI->c_str() + Offset, Length);
+ memset((char *)&Aux.Aux + Length, 0, SymbolSize - Length);
+ break;
+ }
+
+ Offset += SymbolSize;
+ }
+ }
+
+ for (auto &Symbol : Symbols) {
// Update section number & offset for symbols that have them.
if (Symbol->Section)
Symbol->Data.SectionNumber = Symbol->Section->Number;
-
if (Symbol->should_keep()) {
- MakeSymbolReal(*Symbol, Header.NumberOfSymbols++);
-
+ Symbol->Index = Header.NumberOfSymbols++;
// Update auxiliary symbol info.
Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
@@ -855,6 +864,22 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
Symbol->Index = -1;
}
+ // Build string table.
+ for (const auto &S : Sections)
+ if (S->Name.size() > COFF::NameSize)
+ Strings.add(S->Name);
+ for (const auto &S : Symbols)
+ if (S->should_keep() && S->Name.size() > COFF::NameSize)
+ Strings.add(S->Name);
+ Strings.finalize(StringTableBuilder::WinCOFF);
+
+ // Set names.
+ for (const auto &S : Sections)
+ SetSectionName(*S);
+ for (auto &S : Symbols)
+ if (S->should_keep())
+ SetSymbolName(*S);
+
// Fixup weak external references.
for (auto & Symbol : Symbols) {
if (Symbol->Other) {
@@ -897,7 +922,10 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
unsigned offset = 0;
- offset += COFF::HeaderSize;
+ if (UseBigObj)
+ offset += COFF::Header32Size;
+ else
+ offset += COFF::Header16Size;
offset += COFF::SectionSize * Header.NumberOfSections;
for (const auto & Section : Asm) {
@@ -918,7 +946,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
if (RelocationsOverflow) {
- // Signal overflow by setting NumberOfSections to max value. Actual
+ // Signal overflow by setting NumberOfRelocations to max value. Actual
// size is found in reloc #0. Microsoft tools understand this.
Sec->Header.NumberOfRelocations = 0xffff;
} else {
@@ -1014,7 +1042,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
if (Symbol->Index != -1)
WriteSymbol(*Symbol);
- OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
+ OS.write(Strings.data().data(), Strings.data().size());
}
MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) :
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index d391a3f..6a8054d 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -25,11 +25,11 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCWin64EH.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -61,7 +61,7 @@ void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
DF->getContents().append(Code.begin(), Code.end());
}
-void MCWinCOFFStreamer::InitSections() {
+void MCWinCOFFStreamer::InitSections(bool NoExecStack) {
// FIXME: this is identical to the ELF one.
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
@@ -133,7 +133,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
if (!CurSymbol)
FatalError("storage class specified outside of symbol definition");
- if (StorageClass & ~0xff)
+ if (StorageClass & ~COFF::SSC_Invalid)
FatalError(Twine("storage class value '") + itostr(StorageClass) +
"' out of range");
@@ -163,7 +163,7 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext());
MCFixup Fixup = MCFixup::Create(DF->getContents().size(), SRE, FK_SecRel_2);
DF->getFixups().push_back(Fixup);
- DF->getContents().resize(DF->getContents().size() + 4, 0);
+ DF->getContents().resize(DF->getContents().size() + 2, 0);
}
void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
@@ -184,14 +184,35 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
Symbol->getSection().getVariant() == MCSection::SV_COFF) &&
"Got non-COFF section in the COFF backend!");
- if (ByteAlignment > 32)
- report_fatal_error("alignment is limited to 32-bytes");
+ const Triple &T = getContext().getObjectFileInfo()->getTargetTriple();
+ if (T.isKnownWindowsMSVCEnvironment()) {
+ if (ByteAlignment > 32)
+ report_fatal_error("alignment is limited to 32-bytes");
+
+ // Round size up to alignment so that we will honor the alignment request.
+ Size = std::max(Size, static_cast<uint64_t>(ByteAlignment));
+ }
AssignSection(Symbol, nullptr);
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
SD.setExternal(true);
SD.setCommon(Size, ByteAlignment);
+
+ if (!T.isKnownWindowsMSVCEnvironment() && ByteAlignment > 1) {
+ SmallString<128> Directive;
+ raw_svector_ostream OS(Directive);
+ const MCObjectFileInfo *MFI = getContext().getObjectFileInfo();
+
+ OS << " -aligncomm:\"" << Symbol->getName() << "\","
+ << Log2_32_Ceil(ByteAlignment);
+ OS.flush();
+
+ PushSection();
+ SwitchSection(MFI->getDrectveSection());
+ EmitBytes(Directive);
+ PopSection();
+ }
}
void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 6d09bdb..d169dbe 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -109,7 +109,7 @@ Archive::Child Archive::Child::getNext() const {
const char *NextLoc = Data.data() + SpaceToSkip;
// Check to see if this is past the end of the archive.
- if (NextLoc >= Parent->Data->getBufferEnd())
+ if (NextLoc >= Parent->Data.getBufferEnd())
return Child(Parent, nullptr);
return Child(Parent, NextLoc);
@@ -159,46 +159,36 @@ ErrorOr<StringRef> Archive::Child::getName() const {
return name;
}
-ErrorOr<std::unique_ptr<MemoryBuffer>>
-Archive::Child::getMemoryBuffer(bool FullPath) const {
+ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
ErrorOr<StringRef> NameOrErr = getName();
if (std::error_code EC = NameOrErr.getError())
return EC;
StringRef Name = NameOrErr.get();
- SmallString<128> Path;
- std::unique_ptr<MemoryBuffer> Ret(MemoryBuffer::getMemBuffer(
- getBuffer(),
- FullPath
- ? (Twine(Parent->getFileName()) + "(" + Name + ")").toStringRef(Path)
- : Name,
- false));
- return std::move(Ret);
+ return MemoryBufferRef(getBuffer(), Name);
}
ErrorOr<std::unique_ptr<Binary>>
Archive::Child::getAsBinary(LLVMContext *Context) const {
- std::unique_ptr<Binary> ret;
- ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = getMemoryBuffer();
+ ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
if (std::error_code EC = BuffOrErr.getError())
return EC;
- std::unique_ptr<MemoryBuffer> Buff(BuffOrErr.get().release());
- return createBinary(Buff, Context);
+ return createBinary(BuffOrErr.get(), Context);
}
-ErrorOr<Archive *> Archive::create(std::unique_ptr<MemoryBuffer> Source) {
+ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
std::error_code EC;
- std::unique_ptr<Archive> Ret(new Archive(std::move(Source), EC));
+ std::unique_ptr<Archive> Ret(new Archive(Source, EC));
if (EC)
return EC;
- return Ret.release();
+ return std::move(Ret);
}
-Archive::Archive(std::unique_ptr<MemoryBuffer> Source, std::error_code &ec)
- : Binary(Binary::ID_Archive, std::move(Source)), SymbolTable(child_end()) {
+Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
+ : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) {
// Check for sufficient magic.
- if (Data->getBufferSize() < 8 ||
- StringRef(Data->getBufferStart(), 8) != Magic) {
+ if (Data.getBufferSize() < 8 ||
+ StringRef(Data.getBufferStart(), 8) != Magic) {
ec = object_error::invalid_file_type;
return;
}
@@ -250,7 +240,7 @@ Archive::Archive(std::unique_ptr<MemoryBuffer> Source, std::error_code &ec)
if (ec)
return;
Name = NameOrErr.get();
- if (Name == "__.SYMDEF SORTED") {
+ if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
SymbolTable = i;
++i;
}
@@ -312,13 +302,13 @@ Archive::Archive(std::unique_ptr<MemoryBuffer> Source, std::error_code &ec)
}
Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
- if (Data->getBufferSize() == 8) // empty archive.
+ if (Data.getBufferSize() == 8) // empty archive.
return child_end();
if (SkipInternal)
return FirstRegular;
- const char *Loc = Data->getBufferStart() + strlen(Magic);
+ const char *Loc = Data.getBufferStart() + strlen(Magic);
Child c(this, Loc);
return c;
}
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index 9f6a685..c56eeb1 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -27,24 +27,23 @@ using namespace object;
Binary::~Binary() {}
-Binary::Binary(unsigned int Type, std::unique_ptr<MemoryBuffer> Source)
- : TypeID(Type), Data(std::move(Source)) {}
+Binary::Binary(unsigned int Type, MemoryBufferRef Source)
+ : TypeID(Type), Data(Source) {}
-StringRef Binary::getData() const {
- return Data->getBuffer();
-}
+StringRef Binary::getData() const { return Data.getBuffer(); }
-StringRef Binary::getFileName() const {
- return Data->getBufferIdentifier();
-}
+StringRef Binary::getFileName() const { return Data.getBufferIdentifier(); }
+
+MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; }
-ErrorOr<Binary *> object::createBinary(std::unique_ptr<MemoryBuffer> &Buffer,
- LLVMContext *Context) {
- sys::fs::file_magic Type = sys::fs::identify_magic(Buffer->getBuffer());
+ErrorOr<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
+ LLVMContext *Context) {
+ sys::fs::file_magic Type = sys::fs::identify_magic(Buffer.getBuffer());
switch (Type) {
case sys::fs::file_magic::archive:
- return Archive::create(std::move(Buffer));
+ return Archive::create(Buffer);
+ case sys::fs::file_magic::elf:
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::elf_executable:
case sys::fs::file_magic::elf_shared_object:
@@ -65,7 +64,7 @@ ErrorOr<Binary *> object::createBinary(std::unique_ptr<MemoryBuffer> &Buffer,
case sys::fs::file_magic::bitcode:
return ObjectFile::createSymbolicFile(Buffer, Type, Context);
case sys::fs::file_magic::macho_universal_binary:
- return MachOUniversalBinary::create(std::move(Buffer));
+ return MachOUniversalBinary::create(Buffer);
case sys::fs::file_magic::unknown:
case sys::fs::file_magic::windows_resource:
// Unrecognized object file format.
@@ -74,10 +73,18 @@ ErrorOr<Binary *> object::createBinary(std::unique_ptr<MemoryBuffer> &Buffer,
llvm_unreachable("Unexpected Binary File Type");
}
-ErrorOr<Binary *> object::createBinary(StringRef Path) {
+ErrorOr<OwningBinary<Binary>> object::createBinary(StringRef Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Path);
if (std::error_code EC = FileOrErr.getError())
return EC;
- return createBinary(FileOrErr.get());
+ std::unique_ptr<MemoryBuffer> &Buffer = FileOrErr.get();
+
+ ErrorOr<std::unique_ptr<Binary>> BinOrErr =
+ createBinary(Buffer->getMemBufferRef());
+ if (std::error_code EC = BinOrErr.getError())
+ return EC;
+ std::unique_ptr<Binary> &Bin = BinOrErr.get();
+
+ return OwningBinary<Binary>(std::move(Bin), std::move(Buffer));
}
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 46ef87d..d5ff7d6 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -25,14 +25,13 @@
using namespace llvm;
using namespace object;
-using support::ulittle8_t;
using support::ulittle16_t;
using support::ulittle32_t;
+using support::ulittle64_t;
using support::little16_t;
// Returns false if size is greater than the buffer size. And sets ec.
-static bool checkSize(const MemoryBuffer &M, std::error_code &EC,
- uint64_t Size) {
+static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Size) {
if (M.getBufferSize() < Size) {
EC = object_error::unexpected_eof;
return false;
@@ -40,17 +39,25 @@ static bool checkSize(const MemoryBuffer &M, std::error_code &EC,
return true;
}
+static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr,
+ const uint64_t Size) {
+ if (Addr + Size < Addr || Addr + Size < Size ||
+ Addr + Size > uintptr_t(M.getBufferEnd()) ||
+ Addr < uintptr_t(M.getBufferStart())) {
+ return object_error::unexpected_eof;
+ }
+ return object_error::success;
+}
+
// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m.
// Returns unexpected_eof if error.
template <typename T>
-static std::error_code getObject(const T *&Obj, const MemoryBuffer &M,
- const uint8_t *Ptr,
- const size_t Size = sizeof(T)) {
+static std::error_code getObject(const T *&Obj, MemoryBufferRef M,
+ const void *Ptr,
+ const uint64_t Size = sizeof(T)) {
uintptr_t Addr = uintptr_t(Ptr);
- if (Addr + Size < Addr || Addr + Size < Size ||
- Addr + Size > uintptr_t(M.getBufferEnd())) {
- return object_error::unexpected_eof;
- }
+ if (std::error_code EC = checkOffset(M, Addr, Size))
+ return EC;
Obj = reinterpret_cast<const T *>(Addr);
return object_error::success;
}
@@ -89,20 +96,19 @@ static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) {
return false;
}
-const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Ref) const {
- const coff_symbol *Addr = reinterpret_cast<const coff_symbol*>(Ref.p);
+template <typename coff_symbol_type>
+const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const {
+ const coff_symbol_type *Addr =
+ reinterpret_cast<const coff_symbol_type *>(Ref.p);
-# ifndef NDEBUG
+ assert(!checkOffset(Data, uintptr_t(Addr), sizeof(*Addr)));
+#ifndef NDEBUG
// Verify that the symbol points to a valid entry in the symbol table.
uintptr_t Offset = uintptr_t(Addr) - uintptr_t(base());
- if (Offset < COFFHeader->PointerToSymbolTable
- || Offset >= COFFHeader->PointerToSymbolTable
- + (COFFHeader->NumberOfSymbols * sizeof(coff_symbol)))
- report_fatal_error("Symbol was outside of symbol table.");
- assert((Offset - COFFHeader->PointerToSymbolTable) % sizeof(coff_symbol)
- == 0 && "Symbol did not point to the beginning of a symbol");
-# endif
+ assert((Offset - getPointerToSymbolTable()) % sizeof(coff_symbol_type) == 0 &&
+ "Symbol did not point to the beginning of a symbol");
+#endif
return Addr;
}
@@ -112,8 +118,7 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const {
# ifndef NDEBUG
// Verify that the section points to a valid entry in the section table.
- if (Addr < SectionTable
- || Addr >= (SectionTable + COFFHeader->NumberOfSections))
+ if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections()))
report_fatal_error("Section was outside of section table.");
uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable);
@@ -125,112 +130,180 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const {
}
void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const {
- const coff_symbol *Symb = toSymb(Ref);
- Symb += 1 + Symb->NumberOfAuxSymbols;
- Ref.p = reinterpret_cast<uintptr_t>(Symb);
+ auto End = reinterpret_cast<uintptr_t>(StringTable);
+ if (SymbolTable16) {
+ const coff_symbol16 *Symb = toSymb<coff_symbol16>(Ref);
+ Symb += 1 + Symb->NumberOfAuxSymbols;
+ Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End);
+ } else if (SymbolTable32) {
+ const coff_symbol32 *Symb = toSymb<coff_symbol32>(Ref);
+ Symb += 1 + Symb->NumberOfAuxSymbols;
+ Ref.p = std::min(reinterpret_cast<uintptr_t>(Symb), End);
+ } else {
+ llvm_unreachable("no symbol table pointer!");
+ }
}
std::error_code COFFObjectFile::getSymbolName(DataRefImpl Ref,
StringRef &Result) const {
- const coff_symbol *Symb = toSymb(Ref);
+ COFFSymbolRef Symb = getCOFFSymbol(Ref);
return getSymbolName(Symb, Result);
}
std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
uint64_t &Result) const {
- const coff_symbol *Symb = toSymb(Ref);
- const coff_section *Section = nullptr;
- if (std::error_code EC = getSection(Symb->SectionNumber, Section))
- return EC;
+ COFFSymbolRef Symb = getCOFFSymbol(Ref);
- if (Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
+ if (Symb.isAnyUndefined()) {
Result = UnknownAddressOrSize;
- else if (Section)
- Result = Section->VirtualAddress + Symb->Value;
- else
- Result = Symb->Value;
+ return object_error::success;
+ }
+ if (Symb.isCommon()) {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ int32_t SectionNumber = Symb.getSectionNumber();
+ if (!COFF::isReservedSectionNumber(SectionNumber)) {
+ const coff_section *Section = nullptr;
+ if (std::error_code EC = getSection(SectionNumber, Section))
+ return EC;
+
+ Result = Section->VirtualAddress + Symb.getValue();
+ return object_error::success;
+ }
+
+ Result = Symb.getValue();
return object_error::success;
}
std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
SymbolRef::Type &Result) const {
- const coff_symbol *Symb = toSymb(Ref);
+ COFFSymbolRef Symb = getCOFFSymbol(Ref);
+ int32_t SectionNumber = Symb.getSectionNumber();
Result = SymbolRef::ST_Other;
- if (Symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
- Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) {
+
+ if (Symb.isAnyUndefined()) {
Result = SymbolRef::ST_Unknown;
- } else if (Symb->isFunctionDefinition()) {
+ } else if (Symb.isFunctionDefinition()) {
Result = SymbolRef::ST_Function;
- } else {
- uint32_t Characteristics = 0;
- if (!COFF::isReservedSectionNumber(Symb->SectionNumber)) {
- const coff_section *Section = nullptr;
- if (std::error_code EC = getSection(Symb->SectionNumber, Section))
- return EC;
- Characteristics = Section->Characteristics;
- }
- if (Characteristics & COFF::IMAGE_SCN_MEM_READ &&
- ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+ } else if (Symb.isCommon()) {
+ Result = SymbolRef::ST_Data;
+ } else if (Symb.isFileRecord()) {
+ Result = SymbolRef::ST_File;
+ } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG) {
+ Result = SymbolRef::ST_Debug;
+ } else if (!COFF::isReservedSectionNumber(SectionNumber)) {
+ const coff_section *Section = nullptr;
+ if (std::error_code EC = getSection(SectionNumber, Section))
+ return EC;
+ uint32_t Characteristics = Section->Characteristics;
+ if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+ Result = SymbolRef::ST_Function;
+ else if (Characteristics & (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA))
Result = SymbolRef::ST_Data;
}
return object_error::success;
}
uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const {
- const coff_symbol *Symb = toSymb(Ref);
+ COFFSymbolRef Symb = getCOFFSymbol(Ref);
uint32_t Result = SymbolRef::SF_None;
- // TODO: Correctly set SF_FormatSpecific, SF_Common
-
- if (Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) {
- if (Symb->Value == 0)
- Result |= SymbolRef::SF_Undefined;
- else
- Result |= SymbolRef::SF_Common;
- }
-
-
- // TODO: This are certainly too restrictive.
- if (Symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+ if (Symb.isExternal() || Symb.isWeakExternal())
Result |= SymbolRef::SF_Global;
- if (Symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+ if (Symb.isWeakExternal())
Result |= SymbolRef::SF_Weak;
- if (Symb->SectionNumber == COFF::IMAGE_SYM_ABSOLUTE)
+ if (Symb.getSectionNumber() == COFF::IMAGE_SYM_ABSOLUTE)
Result |= SymbolRef::SF_Absolute;
+ if (Symb.isFileRecord())
+ Result |= SymbolRef::SF_FormatSpecific;
+
+ if (Symb.isSectionDefinition())
+ Result |= SymbolRef::SF_FormatSpecific;
+
+ if (Symb.isCommon())
+ Result |= SymbolRef::SF_Common;
+
+ if (Symb.isAnyUndefined())
+ Result |= SymbolRef::SF_Undefined;
+
return Result;
}
std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
uint64_t &Result) const {
- // FIXME: Return the correct size. This requires looking at all the symbols
- // in the same section as this symbol, and looking for either the next
- // symbol, or the end of the section.
- const coff_symbol *Symb = toSymb(Ref);
- const coff_section *Section = nullptr;
- if (std::error_code EC = getSection(Symb->SectionNumber, Section))
- return EC;
+ COFFSymbolRef Symb = getCOFFSymbol(Ref);
- if (Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
+ if (Symb.isAnyUndefined()) {
Result = UnknownAddressOrSize;
- else if (Section)
- Result = Section->SizeOfRawData - Symb->Value;
- else
+ return object_error::success;
+ }
+ if (Symb.isCommon()) {
+ Result = Symb.getValue();
+ return object_error::success;
+ }
+
+ // Let's attempt to get the size of the symbol by looking at the address of
+ // the symbol after the symbol in question.
+ uint64_t SymbAddr;
+ if (std::error_code EC = getSymbolAddress(Ref, SymbAddr))
+ return EC;
+ int32_t SectionNumber = Symb.getSectionNumber();
+ if (COFF::isReservedSectionNumber(SectionNumber)) {
+ // Absolute and debug symbols aren't sorted in any interesting way.
Result = 0;
+ return object_error::success;
+ }
+ const section_iterator SecEnd = section_end();
+ uint64_t AfterAddr = UnknownAddressOrSize;
+ for (const symbol_iterator &SymbI : symbols()) {
+ section_iterator SecI = SecEnd;
+ if (std::error_code EC = SymbI->getSection(SecI))
+ return EC;
+ // Check the symbol's section, skip it if it's in the wrong section.
+ // First, make sure it is in any section.
+ if (SecI == SecEnd)
+ continue;
+ // Second, make sure it is in the same section as the symbol in question.
+ if (!sectionContainsSymbol(SecI->getRawDataRefImpl(), Ref))
+ continue;
+ uint64_t Addr;
+ if (std::error_code EC = SymbI->getAddress(Addr))
+ return EC;
+ // We want to compare our symbol in question with the closest possible
+ // symbol that comes after.
+ if (AfterAddr > Addr && Addr > SymbAddr)
+ AfterAddr = Addr;
+ }
+ if (AfterAddr == UnknownAddressOrSize) {
+ // No symbol comes after this one, assume that everything after our symbol
+ // is part of it.
+ const coff_section *Section = nullptr;
+ if (std::error_code EC = getSection(SectionNumber, Section))
+ return EC;
+ Result = Section->SizeOfRawData - Symb.getValue();
+ } else {
+ // Take the difference between our symbol and the symbol that comes after
+ // our symbol.
+ Result = AfterAddr - SymbAddr;
+ }
+
return object_error::success;
}
std::error_code
COFFObjectFile::getSymbolSection(DataRefImpl Ref,
section_iterator &Result) const {
- const coff_symbol *Symb = toSymb(Ref);
- if (COFF::isReservedSectionNumber(Symb->SectionNumber)) {
+ COFFSymbolRef Symb = getCOFFSymbol(Ref);
+ if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
Result = section_end();
} else {
const coff_section *Sec = nullptr;
- if (std::error_code EC = getSection(Symb->SectionNumber, Sec))
+ if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec))
return EC;
DataRefImpl Ref;
Ref.p = reinterpret_cast<uintptr_t>(Sec);
@@ -251,18 +324,13 @@ std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref,
return getSectionName(Sec, Result);
}
-std::error_code COFFObjectFile::getSectionAddress(DataRefImpl Ref,
- uint64_t &Result) const {
+uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- Result = Sec->VirtualAddress;
- return object_error::success;
+ return Sec->VirtualAddress;
}
-std::error_code COFFObjectFile::getSectionSize(DataRefImpl Ref,
- uint64_t &Result) const {
- const coff_section *Sec = toSec(Ref);
- Result = Sec->SizeOfRawData;
- return object_error::success;
+uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const {
+ return getSectionSize(toSec(Ref));
}
std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref,
@@ -274,146 +342,144 @@ std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref,
return EC;
}
-std::error_code COFFObjectFile::getSectionAlignment(DataRefImpl Ref,
- uint64_t &Res) const {
+uint64_t COFFObjectFile::getSectionAlignment(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- if (!Sec)
- return object_error::parse_failed;
- Res = uint64_t(1) << (((Sec->Characteristics & 0x00F00000) >> 20) - 1);
- return object_error::success;
+ return uint64_t(1) << (((Sec->Characteristics & 0x00F00000) >> 20) - 1);
}
-std::error_code COFFObjectFile::isSectionText(DataRefImpl Ref,
- bool &Result) const {
+bool COFFObjectFile::isSectionText(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
- return object_error::success;
+ return Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
}
-std::error_code COFFObjectFile::isSectionData(DataRefImpl Ref,
- bool &Result) const {
+bool COFFObjectFile::isSectionData(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
- return object_error::success;
+ return Sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
}
-std::error_code COFFObjectFile::isSectionBSS(DataRefImpl Ref,
- bool &Result) const {
+bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
- return object_error::success;
+ return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
}
-std::error_code
-COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Ref,
- bool &Result) const {
- // FIXME: Unimplemented
- Result = true;
- return object_error::success;
+bool COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Ref) const {
+ // Sections marked 'Info', 'Remove', or 'Discardable' aren't required for
+ // execution.
+ const coff_section *Sec = toSec(Ref);
+ return !(Sec->Characteristics &
+ (COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE |
+ COFF::IMAGE_SCN_MEM_DISCARDABLE));
}
-std::error_code COFFObjectFile::isSectionVirtual(DataRefImpl Ref,
- bool &Result) const {
+bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
- return object_error::success;
+ return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
}
-std::error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Ref,
- bool &Result) const {
- // FIXME: Unimplemented.
- Result = false;
- return object_error::success;
+bool COFFObjectFile::isSectionZeroInit(DataRefImpl Ref) const {
+ const coff_section *Sec = toSec(Ref);
+ return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
}
-std::error_code COFFObjectFile::isSectionReadOnlyData(DataRefImpl Ref,
- bool &Result) const {
- // FIXME: Unimplemented.
- Result = false;
- return object_error::success;
+bool COFFObjectFile::isSectionReadOnlyData(DataRefImpl Ref) const {
+ const coff_section *Sec = toSec(Ref);
+ // Check if it's any sort of data section.
+ if (!(Sec->Characteristics & (COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)))
+ return false;
+ // If it's writable or executable or contains code, it isn't read-only data.
+ if (Sec->Characteristics &
+ (COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_WRITE))
+ return false;
+ return true;
}
-std::error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
- DataRefImpl SymbRef,
- bool &Result) const {
+bool COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
+ DataRefImpl SymbRef) const {
const coff_section *Sec = toSec(SecRef);
- const coff_symbol *Symb = toSymb(SymbRef);
- const coff_section *SymbSec = nullptr;
- if (std::error_code EC = getSection(Symb->SectionNumber, SymbSec))
- return EC;
- if (SymbSec == Sec)
- Result = true;
- else
- Result = false;
- return object_error::success;
-}
-
-relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const {
- const coff_section *Sec = toSec(Ref);
- DataRefImpl Ret;
- if (Sec->NumberOfRelocations == 0) {
- Ret.p = 0;
- } else {
- auto begin = reinterpret_cast<const coff_relocation*>(
- base() + Sec->PointerToRelocations);
- if (Sec->hasExtendedRelocations()) {
- // Skip the first relocation entry repurposed to store the number of
- // relocations.
- begin++;
- }
- Ret.p = reinterpret_cast<uintptr_t>(begin);
- }
- return relocation_iterator(RelocationRef(Ret, this));
+ COFFSymbolRef Symb = getCOFFSymbol(SymbRef);
+ int32_t SecNumber = (Sec - SectionTable) + 1;
+ return SecNumber == Symb.getSectionNumber();
}
static uint32_t getNumberOfRelocations(const coff_section *Sec,
- const uint8_t *base) {
+ MemoryBufferRef M, const uint8_t *base) {
// The field for the number of relocations in COFF section table is only
// 16-bit wide. If a section has more than 65535 relocations, 0xFFFF is set to
// NumberOfRelocations field, and the actual relocation count is stored in the
// VirtualAddress field in the first relocation entry.
if (Sec->hasExtendedRelocations()) {
- auto *FirstReloc = reinterpret_cast<const coff_relocation*>(
- base + Sec->PointerToRelocations);
+ const coff_relocation *FirstReloc;
+ if (getObject(FirstReloc, M, reinterpret_cast<const coff_relocation*>(
+ base + Sec->PointerToRelocations)))
+ return 0;
return FirstReloc->VirtualAddress;
}
return Sec->NumberOfRelocations;
}
+static const coff_relocation *
+getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) {
+ uint64_t NumRelocs = getNumberOfRelocations(Sec, M, Base);
+ if (!NumRelocs)
+ return nullptr;
+ auto begin = reinterpret_cast<const coff_relocation *>(
+ Base + Sec->PointerToRelocations);
+ if (Sec->hasExtendedRelocations()) {
+ // Skip the first relocation entry repurposed to store the number of
+ // relocations.
+ begin++;
+ }
+ if (checkOffset(M, uintptr_t(begin), sizeof(coff_relocation) * NumRelocs))
+ return nullptr;
+ return begin;
+}
+
+relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const {
+ const coff_section *Sec = toSec(Ref);
+ const coff_relocation *begin = getFirstReloc(Sec, Data, base());
+ DataRefImpl Ret;
+ Ret.p = reinterpret_cast<uintptr_t>(begin);
+ return relocation_iterator(RelocationRef(Ret, this));
+}
+
relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
+ const coff_relocation *I = getFirstReloc(Sec, Data, base());
+ if (I)
+ I += getNumberOfRelocations(Sec, Data, base());
DataRefImpl Ret;
- if (Sec->NumberOfRelocations == 0) {
- Ret.p = 0;
- } else {
- auto begin = reinterpret_cast<const coff_relocation*>(
- base() + Sec->PointerToRelocations);
- uint32_t NumReloc = getNumberOfRelocations(Sec, base());
- Ret.p = reinterpret_cast<uintptr_t>(begin + NumReloc);
- }
+ Ret.p = reinterpret_cast<uintptr_t>(I);
return relocation_iterator(RelocationRef(Ret, this));
}
// Initialize the pointer to the symbol table.
std::error_code COFFObjectFile::initSymbolTablePtr() {
- if (std::error_code EC = getObject(
- SymbolTable, *Data, base() + COFFHeader->PointerToSymbolTable,
- COFFHeader->NumberOfSymbols * sizeof(coff_symbol)))
- return EC;
+ if (COFFHeader)
+ if (std::error_code EC = getObject(
+ SymbolTable16, Data, base() + getPointerToSymbolTable(),
+ (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize()))
+ return EC;
+
+ if (COFFBigObjHeader)
+ if (std::error_code EC = getObject(
+ SymbolTable32, Data, base() + getPointerToSymbolTable(),
+ (uint64_t)getNumberOfSymbols() * getSymbolTableEntrySize()))
+ return EC;
// Find string table. The first four byte of the string table contains the
// total size of the string table, including the size field itself. If the
// string table is empty, the value of the first four byte would be 4.
- const uint8_t *StringTableAddr =
- base() + COFFHeader->PointerToSymbolTable +
- COFFHeader->NumberOfSymbols * sizeof(coff_symbol);
+ uint32_t StringTableOffset = getPointerToSymbolTable() +
+ getNumberOfSymbols() * getSymbolTableEntrySize();
+ const uint8_t *StringTableAddr = base() + StringTableOffset;
const ulittle32_t *StringTableSizePtr;
- if (std::error_code EC =
- getObject(StringTableSizePtr, *Data, StringTableAddr))
+ if (std::error_code EC = getObject(StringTableSizePtr, Data, StringTableAddr))
return EC;
StringTableSize = *StringTableSizePtr;
if (std::error_code EC =
- getObject(StringTable, *Data, StringTableAddr, StringTableSize))
+ getObject(StringTable, Data, StringTableAddr, StringTableSize))
return EC;
// Treat table sizes < 4 as empty because contrary to the PECOFF spec, some
@@ -477,8 +543,9 @@ std::error_code COFFObjectFile::initImportTablePtr() {
return object_error::success;
uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress;
+ // -1 because the last entry is the null entry.
NumberOfImportDirectory = DataEntry->Size /
- sizeof(import_directory_table_entry);
+ sizeof(import_directory_table_entry) - 1;
// Find the section that contains the RVA. This is needed because the RVA is
// the import table's memory address which is different from its file offset.
@@ -490,6 +557,26 @@ std::error_code COFFObjectFile::initImportTablePtr() {
return object_error::success;
}
+// Initializes DelayImportDirectory and NumberOfDelayImportDirectory.
+std::error_code COFFObjectFile::initDelayImportTablePtr() {
+ const data_directory *DataEntry;
+ if (getDataDirectory(COFF::DELAY_IMPORT_DESCRIPTOR, DataEntry))
+ return object_error::success;
+ if (DataEntry->RelativeVirtualAddress == 0)
+ return object_error::success;
+
+ uint32_t RVA = DataEntry->RelativeVirtualAddress;
+ NumberOfDelayImportDirectory = DataEntry->Size /
+ sizeof(delay_import_directory_table_entry) - 1;
+
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC = getRvaPtr(RVA, IntPtr))
+ return EC;
+ DelayImportDirectory = reinterpret_cast<
+ const delay_import_directory_table_entry *>(IntPtr);
+ return object_error::success;
+}
+
// Find the export table.
std::error_code COFFObjectFile::initExportTablePtr() {
// First, we get the RVA of the export table. If the file lacks a pointer to
@@ -511,15 +598,34 @@ std::error_code COFFObjectFile::initExportTablePtr() {
return object_error::success;
}
-COFFObjectFile::COFFObjectFile(std::unique_ptr<MemoryBuffer> Object,
- std::error_code &EC)
- : ObjectFile(Binary::ID_COFF, std::move(Object)), COFFHeader(nullptr),
- PE32Header(nullptr), PE32PlusHeader(nullptr), DataDirectory(nullptr),
- SectionTable(nullptr), SymbolTable(nullptr), StringTable(nullptr),
- StringTableSize(0), ImportDirectory(nullptr), NumberOfImportDirectory(0),
- ExportDirectory(nullptr) {
+std::error_code COFFObjectFile::initBaseRelocPtr() {
+ const data_directory *DataEntry;
+ if (getDataDirectory(COFF::BASE_RELOCATION_TABLE, DataEntry))
+ return object_error::success;
+ if (DataEntry->RelativeVirtualAddress == 0)
+ return object_error::success;
+
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC = getRvaPtr(DataEntry->RelativeVirtualAddress, IntPtr))
+ return EC;
+ BaseRelocHeader = reinterpret_cast<const coff_base_reloc_block_header *>(
+ IntPtr);
+ BaseRelocEnd = reinterpret_cast<coff_base_reloc_block_header *>(
+ IntPtr + DataEntry->Size);
+ return object_error::success;
+}
+
+COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
+ : ObjectFile(Binary::ID_COFF, Object), COFFHeader(nullptr),
+ COFFBigObjHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr),
+ DataDirectory(nullptr), SectionTable(nullptr), SymbolTable16(nullptr),
+ SymbolTable32(nullptr), StringTable(nullptr), StringTableSize(0),
+ ImportDirectory(nullptr), NumberOfImportDirectory(0),
+ DelayImportDirectory(nullptr), NumberOfDelayImportDirectory(0),
+ ExportDirectory(nullptr), BaseRelocHeader(nullptr),
+ BaseRelocEnd(nullptr) {
// Check that we at least have enough room for a header.
- if (!checkSize(*Data, EC, sizeof(coff_file_header)))
+ if (!checkSize(Data, EC, sizeof(coff_file_header)))
return;
// The current location in the file where we are looking at.
@@ -530,37 +636,66 @@ COFFObjectFile::COFFObjectFile(std::unique_ptr<MemoryBuffer> Object,
bool HasPEHeader = false;
// Check if this is a PE/COFF file.
- if (base()[0] == 0x4d && base()[1] == 0x5a) {
+ if (checkSize(Data, EC, sizeof(dos_header) + sizeof(COFF::PEMagic))) {
// PE/COFF, seek through MS-DOS compatibility stub and 4-byte
// PE signature to find 'normal' COFF header.
- if (!checkSize(*Data, EC, 0x3c + 8))
- return;
- CurPtr = *reinterpret_cast<const ulittle16_t *>(base() + 0x3c);
- // Check the PE magic bytes. ("PE\0\0")
- if (std::memcmp(base() + CurPtr, "PE\0\0", 4) != 0) {
- EC = object_error::parse_failed;
- return;
+ const auto *DH = reinterpret_cast<const dos_header *>(base());
+ if (DH->Magic[0] == 'M' && DH->Magic[1] == 'Z') {
+ CurPtr = DH->AddressOfNewExeHeader;
+ // Check the PE magic bytes. ("PE\0\0")
+ if (memcmp(base() + CurPtr, COFF::PEMagic, sizeof(COFF::PEMagic)) != 0) {
+ EC = object_error::parse_failed;
+ return;
+ }
+ CurPtr += sizeof(COFF::PEMagic); // Skip the PE magic bytes.
+ HasPEHeader = true;
}
- CurPtr += 4; // Skip the PE magic bytes.
- HasPEHeader = true;
}
- if ((EC = getObject(COFFHeader, *Data, base() + CurPtr)))
+ if ((EC = getObject(COFFHeader, Data, base() + CurPtr)))
return;
- CurPtr += sizeof(coff_file_header);
+
+ // It might be a bigobj file, let's check. Note that COFF bigobj and COFF
+ // import libraries share a common prefix but bigobj is more restrictive.
+ if (!HasPEHeader && COFFHeader->Machine == COFF::IMAGE_FILE_MACHINE_UNKNOWN &&
+ COFFHeader->NumberOfSections == uint16_t(0xffff) &&
+ checkSize(Data, EC, sizeof(coff_bigobj_file_header))) {
+ if ((EC = getObject(COFFBigObjHeader, Data, base() + CurPtr)))
+ return;
+
+ // Verify that we are dealing with bigobj.
+ if (COFFBigObjHeader->Version >= COFF::BigObjHeader::MinBigObjectVersion &&
+ std::memcmp(COFFBigObjHeader->UUID, COFF::BigObjMagic,
+ sizeof(COFF::BigObjMagic)) == 0) {
+ COFFHeader = nullptr;
+ CurPtr += sizeof(coff_bigobj_file_header);
+ } else {
+ // It's not a bigobj.
+ COFFBigObjHeader = nullptr;
+ }
+ }
+ if (COFFHeader) {
+ // The prior checkSize call may have failed. This isn't a hard error
+ // because we were just trying to sniff out bigobj.
+ EC = object_error::success;
+ CurPtr += sizeof(coff_file_header);
+
+ if (COFFHeader->isImportLibrary())
+ return;
+ }
if (HasPEHeader) {
const pe32_header *Header;
- if ((EC = getObject(Header, *Data, base() + CurPtr)))
+ if ((EC = getObject(Header, Data, base() + CurPtr)))
return;
const uint8_t *DataDirAddr;
uint64_t DataDirSize;
- if (Header->Magic == 0x10b) {
+ if (Header->Magic == COFF::PE32Header::PE32) {
PE32Header = Header;
DataDirAddr = base() + CurPtr + sizeof(pe32_header);
DataDirSize = sizeof(data_directory) * PE32Header->NumberOfRvaAndSize;
- } else if (Header->Magic == 0x20b) {
+ } else if (Header->Magic == COFF::PE32Header::PE32_PLUS) {
PE32PlusHeader = reinterpret_cast<const pe32plus_header *>(Header);
DataDirAddr = base() + CurPtr + sizeof(pe32plus_header);
DataDirSize = sizeof(data_directory) * PE32PlusHeader->NumberOfRvaAndSize;
@@ -569,37 +704,47 @@ COFFObjectFile::COFFObjectFile(std::unique_ptr<MemoryBuffer> Object,
EC = object_error::parse_failed;
return;
}
- if ((EC = getObject(DataDirectory, *Data, DataDirAddr, DataDirSize)))
+ if ((EC = getObject(DataDirectory, Data, DataDirAddr, DataDirSize)))
return;
CurPtr += COFFHeader->SizeOfOptionalHeader;
}
- if (COFFHeader->isImportLibrary())
- return;
-
- if ((EC = getObject(SectionTable, *Data, base() + CurPtr,
- COFFHeader->NumberOfSections * sizeof(coff_section))))
+ if ((EC = getObject(SectionTable, Data, base() + CurPtr,
+ (uint64_t)getNumberOfSections() * sizeof(coff_section))))
return;
// Initialize the pointer to the symbol table.
- if (COFFHeader->PointerToSymbolTable != 0)
+ if (getPointerToSymbolTable() != 0) {
if ((EC = initSymbolTablePtr()))
return;
+ } else {
+ // We had better not have any symbols if we don't have a symbol table.
+ if (getNumberOfSymbols() != 0) {
+ EC = object_error::parse_failed;
+ return;
+ }
+ }
// Initialize the pointer to the beginning of the import table.
if ((EC = initImportTablePtr()))
return;
+ if ((EC = initDelayImportTablePtr()))
+ return;
// Initialize the pointer to the export table.
if ((EC = initExportTablePtr()))
return;
+ // Initialize the pointer to the base relocation table.
+ if ((EC = initBaseRelocPtr()))
+ return;
+
EC = object_error::success;
}
basic_symbol_iterator COFFObjectFile::symbol_begin_impl() const {
DataRefImpl Ret;
- Ret.p = reinterpret_cast<uintptr_t>(SymbolTable);
+ Ret.p = getSymbolTable();
return basic_symbol_iterator(SymbolRef(Ret, this));
}
@@ -610,21 +755,6 @@ basic_symbol_iterator COFFObjectFile::symbol_end_impl() const {
return basic_symbol_iterator(SymbolRef(Ret, this));
}
-library_iterator COFFObjectFile::needed_library_begin() const {
- // TODO: implement
- report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
-}
-
-library_iterator COFFObjectFile::needed_library_end() const {
- // TODO: implement
- report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
-}
-
-StringRef COFFObjectFile::getLoadName() const {
- // COFF does not have this field.
- return "";
-}
-
import_directory_iterator COFFObjectFile::import_directory_begin() const {
return import_directory_iterator(
ImportDirectoryEntryRef(ImportDirectory, 0, this));
@@ -635,6 +765,19 @@ import_directory_iterator COFFObjectFile::import_directory_end() const {
ImportDirectoryEntryRef(ImportDirectory, NumberOfImportDirectory, this));
}
+delay_import_directory_iterator
+COFFObjectFile::delay_import_directory_begin() const {
+ return delay_import_directory_iterator(
+ DelayImportDirectoryEntryRef(DelayImportDirectory, 0, this));
+}
+
+delay_import_directory_iterator
+COFFObjectFile::delay_import_directory_end() const {
+ return delay_import_directory_iterator(
+ DelayImportDirectoryEntryRef(
+ DelayImportDirectory, NumberOfDelayImportDirectory, this));
+}
+
export_directory_iterator COFFObjectFile::export_directory_begin() const {
return export_directory_iterator(
ExportDirectoryEntryRef(ExportDirectory, 0, this));
@@ -656,18 +799,26 @@ section_iterator COFFObjectFile::section_begin() const {
section_iterator COFFObjectFile::section_end() const {
DataRefImpl Ret;
- int NumSections = COFFHeader->isImportLibrary()
- ? 0 : COFFHeader->NumberOfSections;
+ int NumSections =
+ COFFHeader && COFFHeader->isImportLibrary() ? 0 : getNumberOfSections();
Ret.p = reinterpret_cast<uintptr_t>(SectionTable + NumSections);
return section_iterator(SectionRef(Ret, this));
}
+base_reloc_iterator COFFObjectFile::base_reloc_begin() const {
+ return base_reloc_iterator(BaseRelocRef(BaseRelocHeader, this));
+}
+
+base_reloc_iterator COFFObjectFile::base_reloc_end() const {
+ return base_reloc_iterator(BaseRelocRef(BaseRelocEnd, this));
+}
+
uint8_t COFFObjectFile::getBytesInAddress() const {
return getArch() == Triple::x86_64 ? 8 : 4;
}
StringRef COFFObjectFile::getFileFormatName() const {
- switch(COFFHeader->Machine) {
+ switch(getMachine()) {
case COFF::IMAGE_FILE_MACHINE_I386:
return "COFF-i386";
case COFF::IMAGE_FILE_MACHINE_AMD64:
@@ -680,7 +831,7 @@ StringRef COFFObjectFile::getFileFormatName() const {
}
unsigned COFFObjectFile::getArch() const {
- switch(COFFHeader->Machine) {
+ switch (getMachine()) {
case COFF::IMAGE_FILE_MACHINE_I386:
return Triple::x86;
case COFF::IMAGE_FILE_MACHINE_AMD64:
@@ -692,16 +843,24 @@ unsigned COFFObjectFile::getArch() const {
}
}
-// This method is kept here because lld uses this. As soon as we make
-// lld to use getCOFFHeader, this method will be removed.
-std::error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const {
- return getCOFFHeader(Res);
+iterator_range<import_directory_iterator>
+COFFObjectFile::import_directories() const {
+ return make_range(import_directory_begin(), import_directory_end());
}
-std::error_code
-COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const {
- Res = COFFHeader;
- return object_error::success;
+iterator_range<delay_import_directory_iterator>
+COFFObjectFile::delay_import_directories() const {
+ return make_range(delay_import_directory_begin(),
+ delay_import_directory_end());
+}
+
+iterator_range<export_directory_iterator>
+COFFObjectFile::export_directories() const {
+ return make_range(export_directory_begin(), export_directory_end());
+}
+
+iterator_range<base_reloc_iterator> COFFObjectFile::base_relocs() const {
+ return make_range(base_reloc_begin(), base_reloc_end());
}
std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const {
@@ -719,28 +878,32 @@ std::error_code
COFFObjectFile::getDataDirectory(uint32_t Index,
const data_directory *&Res) const {
// Error if if there's no data directory or the index is out of range.
- if (!DataDirectory)
+ if (!DataDirectory) {
+ Res = nullptr;
return object_error::parse_failed;
+ }
assert(PE32Header || PE32PlusHeader);
uint32_t NumEnt = PE32Header ? PE32Header->NumberOfRvaAndSize
: PE32PlusHeader->NumberOfRvaAndSize;
- if (Index > NumEnt)
+ if (Index >= NumEnt) {
+ Res = nullptr;
return object_error::parse_failed;
+ }
Res = &DataDirectory[Index];
return object_error::success;
}
std::error_code COFFObjectFile::getSection(int32_t Index,
const coff_section *&Result) const {
- // Check for special index values.
+ Result = nullptr;
if (COFF::isReservedSectionNumber(Index))
- Result = nullptr;
- else if (Index > 0 && Index <= COFFHeader->NumberOfSections)
+ return object_error::success;
+ if (static_cast<uint32_t>(Index) <= getNumberOfSections()) {
// We already verified the section table data, so no need to check again.
Result = SectionTable + (Index - 1);
- else
- return object_error::parse_failed;
- return object_error::success;
+ return object_error::success;
+ }
+ return object_error::parse_failed;
}
std::error_code COFFObjectFile::getString(uint32_t Offset,
@@ -754,71 +917,62 @@ std::error_code COFFObjectFile::getString(uint32_t Offset,
return object_error::success;
}
-std::error_code COFFObjectFile::getSymbol(uint32_t Index,
- const coff_symbol *&Result) const {
- if (Index < COFFHeader->NumberOfSymbols)
- Result = SymbolTable + Index;
- else
- return object_error::parse_failed;
- return object_error::success;
-}
-
-std::error_code COFFObjectFile::getSymbolName(const coff_symbol *Symbol,
+std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol,
StringRef &Res) const {
// Check for string table entry. First 4 bytes are 0.
- if (Symbol->Name.Offset.Zeroes == 0) {
- uint32_t Offset = Symbol->Name.Offset.Offset;
+ if (Symbol.getStringTableOffset().Zeroes == 0) {
+ uint32_t Offset = Symbol.getStringTableOffset().Offset;
if (std::error_code EC = getString(Offset, Res))
return EC;
return object_error::success;
}
- if (Symbol->Name.ShortName[7] == 0)
+ if (Symbol.getShortName()[COFF::NameSize - 1] == 0)
// Null terminated, let ::strlen figure out the length.
- Res = StringRef(Symbol->Name.ShortName);
+ Res = StringRef(Symbol.getShortName());
else
// Not null terminated, use all 8 bytes.
- Res = StringRef(Symbol->Name.ShortName, 8);
+ Res = StringRef(Symbol.getShortName(), COFF::NameSize);
return object_error::success;
}
-ArrayRef<uint8_t> COFFObjectFile::getSymbolAuxData(
- const coff_symbol *Symbol) const {
+ArrayRef<uint8_t>
+COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const {
const uint8_t *Aux = nullptr;
- if (Symbol->NumberOfAuxSymbols > 0) {
- // AUX data comes immediately after the symbol in COFF
- Aux = reinterpret_cast<const uint8_t *>(Symbol + 1);
+ size_t SymbolSize = getSymbolTableEntrySize();
+ if (Symbol.getNumberOfAuxSymbols() > 0) {
+ // AUX data comes immediately after the symbol in COFF
+ Aux = reinterpret_cast<const uint8_t *>(Symbol.getRawPtr()) + SymbolSize;
# ifndef NDEBUG
// Verify that the Aux symbol points to a valid entry in the symbol table.
uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base());
- if (Offset < COFFHeader->PointerToSymbolTable
- || Offset >= COFFHeader->PointerToSymbolTable
- + (COFFHeader->NumberOfSymbols * sizeof(coff_symbol)))
+ if (Offset < getPointerToSymbolTable() ||
+ Offset >=
+ getPointerToSymbolTable() + (getNumberOfSymbols() * SymbolSize))
report_fatal_error("Aux Symbol data was outside of symbol table.");
- assert((Offset - COFFHeader->PointerToSymbolTable) % sizeof(coff_symbol)
- == 0 && "Aux Symbol data did not point to the beginning of a symbol");
+ assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 &&
+ "Aux Symbol data did not point to the beginning of a symbol");
# endif
}
- return ArrayRef<uint8_t>(Aux,
- Symbol->NumberOfAuxSymbols * sizeof(coff_symbol));
+ return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize);
}
std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
StringRef &Res) const {
StringRef Name;
- if (Sec->Name[7] == 0)
+ if (Sec->Name[COFF::NameSize - 1] == 0)
// Null terminated, let ::strlen figure out the length.
Name = Sec->Name;
else
// Not null terminated, use all 8 bytes.
- Name = StringRef(Sec->Name, 8);
+ Name = StringRef(Sec->Name, COFF::NameSize);
// Check for string table entry. First byte is '/'.
- if (Name[0] == '/') {
+ if (Name.startswith("/")) {
uint32_t Offset;
- if (Name[1] == '/') {
+ if (Name.startswith("//")) {
if (decodeBase64StringEntry(Name.substr(2), Offset))
return object_error::parse_failed;
} else {
@@ -833,18 +987,41 @@ std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
return object_error::success;
}
+uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
+ // SizeOfRawData and VirtualSize change what they represent depending on
+ // whether or not we have an executable image.
+ //
+ // For object files, SizeOfRawData contains the size of section's data;
+ // VirtualSize is always zero.
+ //
+ // For executables, SizeOfRawData *must* be a multiple of FileAlignment; the
+ // actual section size is in VirtualSize. It is possible for VirtualSize to
+ // be greater than SizeOfRawData; the contents past that point should be
+ // considered to be zero.
+ uint32_t SectionSize;
+ if (Sec->VirtualSize)
+ SectionSize = std::min(Sec->VirtualSize, Sec->SizeOfRawData);
+ else
+ SectionSize = Sec->SizeOfRawData;
+
+ return SectionSize;
+}
+
std::error_code
COFFObjectFile::getSectionContents(const coff_section *Sec,
ArrayRef<uint8_t> &Res) const {
+ // PointerToRawData and SizeOfRawData won't make sense for BSS sections,
+ // don't do anything interesting for them.
+ assert((Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0 &&
+ "BSS sections don't have contents!");
// The only thing that we need to verify is that the contents is contained
// within the file bounds. We don't need to make sure it doesn't cover other
// data, as there's nothing that says that is not allowed.
uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
- uintptr_t ConEnd = ConStart + Sec->SizeOfRawData;
- if (ConEnd > uintptr_t(Data->getBufferEnd()))
+ uint32_t SectionSize = getSectionSize(Sec);
+ if (checkOffset(Data, ConStart, SectionSize))
return object_error::parse_failed;
- Res = ArrayRef<uint8_t>(reinterpret_cast<const unsigned char*>(ConStart),
- Sec->SizeOfRawData);
+ Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize);
return object_error::success;
}
@@ -864,14 +1041,26 @@ std::error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
std::error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
uint64_t &Res) const {
- Res = toRel(Rel)->VirtualAddress;
+ const coff_relocation *R = toRel(Rel);
+ const support::ulittle32_t *VirtualAddressPtr;
+ if (std::error_code EC =
+ getObject(VirtualAddressPtr, Data, &R->VirtualAddress))
+ return EC;
+ Res = *VirtualAddressPtr;
return object_error::success;
}
symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
- const coff_relocation* R = toRel(Rel);
+ const coff_relocation *R = toRel(Rel);
DataRefImpl Ref;
- Ref.p = reinterpret_cast<uintptr_t>(SymbolTable + R->SymbolTableIndex);
+ if (R->SymbolTableIndex >= getNumberOfSymbols())
+ return symbol_end();
+ if (SymbolTable16)
+ Ref.p = reinterpret_cast<uintptr_t>(SymbolTable16 + R->SymbolTableIndex);
+ else if (SymbolTable32)
+ Ref.p = reinterpret_cast<uintptr_t>(SymbolTable32 + R->SymbolTableIndex);
+ else
+ return symbol_end();
return symbol_iterator(SymbolRef(Ref, this));
}
@@ -887,9 +1076,16 @@ COFFObjectFile::getCOFFSection(const SectionRef &Section) const {
return toSec(Section.getRawDataRefImpl());
}
-const coff_symbol *
-COFFObjectFile::getCOFFSymbol(const SymbolRef &Symbol) const {
- return toSymb(Symbol.getRawDataRefImpl());
+COFFSymbolRef COFFObjectFile::getCOFFSymbol(const DataRefImpl &Ref) const {
+ if (SymbolTable16)
+ return toSymb<coff_symbol16>(Ref);
+ if (SymbolTable32)
+ return toSymb<coff_symbol32>(Ref);
+ llvm_unreachable("no symbol table pointer!");
+}
+
+COFFSymbolRef COFFObjectFile::getCOFFSymbol(const SymbolRef &Symbol) const {
+ return getCOFFSymbol(Symbol.getRawDataRefImpl());
}
const coff_relocation *
@@ -907,7 +1103,7 @@ COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
const coff_relocation *Reloc = toRel(Rel);
StringRef Res;
- switch (COFFHeader->Machine) {
+ switch (getMachine()) {
case COFF::IMAGE_FILE_MACHINE_AMD64:
switch (Reloc->Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE);
@@ -982,11 +1178,11 @@ std::error_code
COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
const coff_relocation *Reloc = toRel(Rel);
- const coff_symbol *Symb = nullptr;
- if (std::error_code EC = getSymbol(Reloc->SymbolTableIndex, Symb))
- return EC;
DataRefImpl Sym;
- Sym.p = reinterpret_cast<uintptr_t>(Symb);
+ ErrorOr<COFFSymbolRef> Symb = getSymbol(Reloc->SymbolTableIndex);
+ if (std::error_code EC = Symb.getError())
+ return EC;
+ Sym.p = reinterpret_cast<uintptr_t>(Symb->getRawPtr());
StringRef SymName;
if (std::error_code EC = getSymbolName(Sym, SymName))
return EC;
@@ -994,14 +1190,8 @@ COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
return object_error::success;
}
-std::error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData,
- LibraryRef &Result) const {
- report_fatal_error("getLibraryNext not implemented in COFFObjectFile");
-}
-
-std::error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData,
- StringRef &Result) const {
- report_fatal_error("getLibraryPath not implemented in COFFObjectFile");
+bool COFFObjectFile::isRelocatableObject() const {
+ return !DataDirectory;
}
bool ImportDirectoryEntryRef::
@@ -1015,29 +1205,148 @@ void ImportDirectoryEntryRef::moveNext() {
std::error_code ImportDirectoryEntryRef::getImportTableEntry(
const import_directory_table_entry *&Result) const {
- Result = ImportTable;
+ Result = ImportTable + Index;
return object_error::success;
}
+static imported_symbol_iterator
+makeImportedSymbolIterator(const COFFObjectFile *Object,
+ uintptr_t Ptr, int Index) {
+ if (Object->getBytesInAddress() == 4) {
+ auto *P = reinterpret_cast<const import_lookup_table_entry32 *>(Ptr);
+ return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object));
+ }
+ auto *P = reinterpret_cast<const import_lookup_table_entry64 *>(Ptr);
+ return imported_symbol_iterator(ImportedSymbolRef(P, Index, Object));
+}
+
+static imported_symbol_iterator
+importedSymbolBegin(uint32_t RVA, const COFFObjectFile *Object) {
+ uintptr_t IntPtr = 0;
+ Object->getRvaPtr(RVA, IntPtr);
+ return makeImportedSymbolIterator(Object, IntPtr, 0);
+}
+
+static imported_symbol_iterator
+importedSymbolEnd(uint32_t RVA, const COFFObjectFile *Object) {
+ uintptr_t IntPtr = 0;
+ Object->getRvaPtr(RVA, IntPtr);
+ // Forward the pointer to the last entry which is null.
+ int Index = 0;
+ if (Object->getBytesInAddress() == 4) {
+ auto *Entry = reinterpret_cast<ulittle32_t *>(IntPtr);
+ while (*Entry++)
+ ++Index;
+ } else {
+ auto *Entry = reinterpret_cast<ulittle64_t *>(IntPtr);
+ while (*Entry++)
+ ++Index;
+ }
+ return makeImportedSymbolIterator(Object, IntPtr, Index);
+}
+
+imported_symbol_iterator
+ImportDirectoryEntryRef::imported_symbol_begin() const {
+ return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA,
+ OwningObject);
+}
+
+imported_symbol_iterator
+ImportDirectoryEntryRef::imported_symbol_end() const {
+ return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA,
+ OwningObject);
+}
+
+iterator_range<imported_symbol_iterator>
+ImportDirectoryEntryRef::imported_symbols() const {
+ return make_range(imported_symbol_begin(), imported_symbol_end());
+}
+
std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const {
uintptr_t IntPtr = 0;
if (std::error_code EC =
- OwningObject->getRvaPtr(ImportTable->NameRVA, IntPtr))
+ OwningObject->getRvaPtr(ImportTable[Index].NameRVA, IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return object_error::success;
}
+std::error_code
+ImportDirectoryEntryRef::getImportLookupTableRVA(uint32_t &Result) const {
+ Result = ImportTable[Index].ImportLookupTableRVA;
+ return object_error::success;
+}
+
+std::error_code
+ImportDirectoryEntryRef::getImportAddressTableRVA(uint32_t &Result) const {
+ Result = ImportTable[Index].ImportAddressTableRVA;
+ return object_error::success;
+}
+
std::error_code ImportDirectoryEntryRef::getImportLookupEntry(
const import_lookup_table_entry32 *&Result) const {
uintptr_t IntPtr = 0;
- if (std::error_code EC =
- OwningObject->getRvaPtr(ImportTable->ImportLookupTableRVA, IntPtr))
+ uint32_t RVA = ImportTable[Index].ImportLookupTableRVA;
+ if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr))
return EC;
Result = reinterpret_cast<const import_lookup_table_entry32 *>(IntPtr);
return object_error::success;
}
+bool DelayImportDirectoryEntryRef::
+operator==(const DelayImportDirectoryEntryRef &Other) const {
+ return Table == Other.Table && Index == Other.Index;
+}
+
+void DelayImportDirectoryEntryRef::moveNext() {
+ ++Index;
+}
+
+imported_symbol_iterator
+DelayImportDirectoryEntryRef::imported_symbol_begin() const {
+ return importedSymbolBegin(Table[Index].DelayImportNameTable,
+ OwningObject);
+}
+
+imported_symbol_iterator
+DelayImportDirectoryEntryRef::imported_symbol_end() const {
+ return importedSymbolEnd(Table[Index].DelayImportNameTable,
+ OwningObject);
+}
+
+iterator_range<imported_symbol_iterator>
+DelayImportDirectoryEntryRef::imported_symbols() const {
+ return make_range(imported_symbol_begin(), imported_symbol_end());
+}
+
+std::error_code DelayImportDirectoryEntryRef::getName(StringRef &Result) const {
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC = OwningObject->getRvaPtr(Table[Index].Name, IntPtr))
+ return EC;
+ Result = StringRef(reinterpret_cast<const char *>(IntPtr));
+ return object_error::success;
+}
+
+std::error_code DelayImportDirectoryEntryRef::
+getDelayImportTable(const delay_import_directory_table_entry *&Result) const {
+ Result = Table;
+ return object_error::success;
+}
+
+std::error_code DelayImportDirectoryEntryRef::
+getImportAddress(int AddrIndex, uint64_t &Result) const {
+ uint32_t RVA = Table[Index].DelayImportAddressTable +
+ AddrIndex * (OwningObject->is64() ? 8 : 4);
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ return EC;
+ if (OwningObject->is64())
+ Result = *reinterpret_cast<const ulittle64_t *>(IntPtr);
+ else
+ Result = *reinterpret_cast<const ulittle32_t *>(IntPtr);
+ return object_error::success;
+}
+
bool ExportDirectoryEntryRef::
operator==(const ExportDirectoryEntryRef &Other) const {
return ExportTable == Other.ExportTable && Index == Other.Index;
@@ -1112,12 +1421,98 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
return object_error::success;
}
-ErrorOr<ObjectFile *>
-ObjectFile::createCOFFObjectFile(std::unique_ptr<MemoryBuffer> Object) {
+bool ImportedSymbolRef::
+operator==(const ImportedSymbolRef &Other) const {
+ return Entry32 == Other.Entry32 && Entry64 == Other.Entry64
+ && Index == Other.Index;
+}
+
+void ImportedSymbolRef::moveNext() {
+ ++Index;
+}
+
+std::error_code
+ImportedSymbolRef::getSymbolName(StringRef &Result) const {
+ uint32_t RVA;
+ if (Entry32) {
+ // If a symbol is imported only by ordinal, it has no name.
+ if (Entry32[Index].isOrdinal())
+ return object_error::success;
+ RVA = Entry32[Index].getHintNameRVA();
+ } else {
+ if (Entry64[Index].isOrdinal())
+ return object_error::success;
+ RVA = Entry64[Index].getHintNameRVA();
+ }
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ return EC;
+ // +2 because the first two bytes is hint.
+ Result = StringRef(reinterpret_cast<const char *>(IntPtr + 2));
+ return object_error::success;
+}
+
+std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const {
+ uint32_t RVA;
+ if (Entry32) {
+ if (Entry32[Index].isOrdinal()) {
+ Result = Entry32[Index].getOrdinal();
+ return object_error::success;
+ }
+ RVA = Entry32[Index].getHintNameRVA();
+ } else {
+ if (Entry64[Index].isOrdinal()) {
+ Result = Entry64[Index].getOrdinal();
+ return object_error::success;
+ }
+ RVA = Entry64[Index].getHintNameRVA();
+ }
+ uintptr_t IntPtr = 0;
+ if (std::error_code EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ return EC;
+ Result = *reinterpret_cast<const ulittle16_t *>(IntPtr);
+ return object_error::success;
+}
+
+ErrorOr<std::unique_ptr<COFFObjectFile>>
+ObjectFile::createCOFFObjectFile(MemoryBufferRef Object) {
std::error_code EC;
- std::unique_ptr<COFFObjectFile> Ret(
- new COFFObjectFile(std::move(Object), EC));
+ std::unique_ptr<COFFObjectFile> Ret(new COFFObjectFile(Object, EC));
if (EC)
return EC;
- return Ret.release();
+ return std::move(Ret);
+}
+
+bool BaseRelocRef::operator==(const BaseRelocRef &Other) const {
+ return Header == Other.Header && Index == Other.Index;
+}
+
+void BaseRelocRef::moveNext() {
+ // Header->BlockSize is the size of the current block, including the
+ // size of the header itself.
+ uint32_t Size = sizeof(*Header) +
+ sizeof(coff_base_reloc_block_entry) * (Index + 1);
+ if (Size == Header->BlockSize) {
+ // .reloc contains a list of base relocation blocks. Each block
+ // consists of the header followed by entries. The header contains
+ // how many entories will follow. When we reach the end of the
+ // current block, proceed to the next block.
+ Header = reinterpret_cast<const coff_base_reloc_block_header *>(
+ reinterpret_cast<const uint8_t *>(Header) + Size);
+ Index = 0;
+ } else {
+ ++Index;
+ }
+}
+
+std::error_code BaseRelocRef::getType(uint8_t &Type) const {
+ auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1);
+ Type = Entry[Index].getType();
+ return object_error::success;
+}
+
+std::error_code BaseRelocRef::getRVA(uint32_t &Result) const {
+ auto *Entry = reinterpret_cast<const coff_base_reloc_block_entry *>(Header + 1);
+ Result = Header->PageRVA + Entry[Index].getOffset();
+ return object_error::success;
}
diff --git a/lib/Object/COFFYAML.cpp b/lib/Object/COFFYAML.cpp
index 49c5dda..9a24b53 100644
--- a/lib/Object/COFFYAML.cpp
+++ b/lib/Object/COFFYAML.cpp
@@ -168,6 +168,24 @@ void ScalarEnumerationTraits<COFF::RelocationTypeAMD64>::enumeration(
ECase(IMAGE_REL_AMD64_PAIR);
ECase(IMAGE_REL_AMD64_SSPAN32);
}
+
+void ScalarEnumerationTraits<COFF::WindowsSubsystem>::enumeration(
+ IO &IO, COFF::WindowsSubsystem &Value) {
+ ECase(IMAGE_SUBSYSTEM_UNKNOWN);
+ ECase(IMAGE_SUBSYSTEM_NATIVE);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_GUI);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_CUI);
+ ECase(IMAGE_SUBSYSTEM_OS2_CUI);
+ ECase(IMAGE_SUBSYSTEM_POSIX_CUI);
+ ECase(IMAGE_SUBSYSTEM_NATIVE_WINDOWS);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_CE_GUI);
+ ECase(IMAGE_SUBSYSTEM_EFI_APPLICATION);
+ ECase(IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER);
+ ECase(IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER);
+ ECase(IMAGE_SUBSYSTEM_EFI_ROM);
+ ECase(IMAGE_SUBSYSTEM_XBOX);
+ ECase(IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION);
+}
#undef ECase
#define BCase(X) IO.bitSetCase(Value, #X, COFF::X);
@@ -214,6 +232,21 @@ void ScalarBitSetTraits<COFF::SectionCharacteristics>::bitset(
BCase(IMAGE_SCN_MEM_READ);
BCase(IMAGE_SCN_MEM_WRITE);
}
+
+void ScalarBitSetTraits<COFF::DLLCharacteristics>::bitset(
+ IO &IO, COFF::DLLCharacteristics &Value) {
+ BCase(IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA);
+ BCase(IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE);
+ BCase(IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY);
+ BCase(IMAGE_DLL_CHARACTERISTICS_NX_COMPAT);
+ BCase(IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION);
+ BCase(IMAGE_DLL_CHARACTERISTICS_NO_SEH);
+ BCase(IMAGE_DLL_CHARACTERISTICS_NO_BIND);
+ BCase(IMAGE_DLL_CHARACTERISTICS_APPCONTAINER);
+ BCase(IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER);
+ BCase(IMAGE_DLL_CHARACTERISTICS_GUARD_CF);
+ BCase(IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE);
+}
#undef BCase
namespace {
@@ -285,6 +318,23 @@ struct NType {
RelocType Type;
};
+struct NWindowsSubsystem {
+ NWindowsSubsystem(IO &) : Subsystem(COFF::WindowsSubsystem(0)) {}
+ NWindowsSubsystem(IO &, uint16_t C) : Subsystem(COFF::WindowsSubsystem(C)) {}
+ uint16_t denormalize(IO &) { return Subsystem; }
+
+ COFF::WindowsSubsystem Subsystem;
+};
+
+struct NDLLCharacteristics {
+ NDLLCharacteristics(IO &) : Characteristics(COFF::DLLCharacteristics(0)) {}
+ NDLLCharacteristics(IO &, uint16_t C)
+ : Characteristics(COFF::DLLCharacteristics(C)) {}
+ uint16_t denormalize(IO &) { return Characteristics; }
+
+ COFF::DLLCharacteristics Characteristics;
+};
+
}
void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
@@ -306,6 +356,59 @@ void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
}
}
+void MappingTraits<COFF::DataDirectory>::mapping(IO &IO,
+ COFF::DataDirectory &DD) {
+ IO.mapRequired("RelativeVirtualAddress", DD.RelativeVirtualAddress);
+ IO.mapRequired("Size", DD.Size);
+}
+
+void MappingTraits<COFFYAML::PEHeader>::mapping(IO &IO,
+ COFFYAML::PEHeader &PH) {
+ MappingNormalization<NWindowsSubsystem, uint16_t> NWS(IO,
+ PH.Header.Subsystem);
+ MappingNormalization<NDLLCharacteristics, uint16_t> NDC(
+ IO, PH.Header.DLLCharacteristics);
+
+ IO.mapRequired("AddressOfEntryPoint", PH.Header.AddressOfEntryPoint);
+ IO.mapRequired("ImageBase", PH.Header.ImageBase);
+ IO.mapRequired("SectionAlignment", PH.Header.SectionAlignment);
+ IO.mapRequired("FileAlignment", PH.Header.FileAlignment);
+ IO.mapRequired("MajorOperatingSystemVersion",
+ PH.Header.MajorOperatingSystemVersion);
+ IO.mapRequired("MinorOperatingSystemVersion",
+ PH.Header.MinorOperatingSystemVersion);
+ IO.mapRequired("MajorImageVersion", PH.Header.MajorImageVersion);
+ IO.mapRequired("MinorImageVersion", PH.Header.MinorImageVersion);
+ IO.mapRequired("MajorSubsystemVersion", PH.Header.MajorSubsystemVersion);
+ IO.mapRequired("MinorSubsystemVersion", PH.Header.MinorSubsystemVersion);
+ IO.mapRequired("Subsystem", NWS->Subsystem);
+ IO.mapRequired("DLLCharacteristics", NDC->Characteristics);
+ IO.mapRequired("SizeOfStackReserve", PH.Header.SizeOfStackReserve);
+ IO.mapRequired("SizeOfStackCommit", PH.Header.SizeOfStackCommit);
+ IO.mapRequired("SizeOfHeapReserve", PH.Header.SizeOfHeapReserve);
+ IO.mapRequired("SizeOfHeapCommit", PH.Header.SizeOfHeapCommit);
+
+ IO.mapOptional("ExportTable", PH.DataDirectories[COFF::EXPORT_TABLE]);
+ IO.mapOptional("ImportTable", PH.DataDirectories[COFF::IMPORT_TABLE]);
+ IO.mapOptional("ResourceTable", PH.DataDirectories[COFF::RESOURCE_TABLE]);
+ IO.mapOptional("ExceptionTable", PH.DataDirectories[COFF::EXCEPTION_TABLE]);
+ IO.mapOptional("CertificateTable", PH.DataDirectories[COFF::CERTIFICATE_TABLE]);
+ IO.mapOptional("BaseRelocationTable",
+ PH.DataDirectories[COFF::BASE_RELOCATION_TABLE]);
+ IO.mapOptional("Debug", PH.DataDirectories[COFF::DEBUG]);
+ IO.mapOptional("Architecture", PH.DataDirectories[COFF::ARCHITECTURE]);
+ IO.mapOptional("GlobalPtr", PH.DataDirectories[COFF::GLOBAL_PTR]);
+ IO.mapOptional("TlsTable", PH.DataDirectories[COFF::TLS_TABLE]);
+ IO.mapOptional("LoadConfigTable",
+ PH.DataDirectories[COFF::LOAD_CONFIG_TABLE]);
+ IO.mapOptional("BoundImport", PH.DataDirectories[COFF::BOUND_IMPORT]);
+ IO.mapOptional("IAT", PH.DataDirectories[COFF::IAT]);
+ IO.mapOptional("DelayImportDescriptor",
+ PH.DataDirectories[COFF::DELAY_IMPORT_DESCRIPTOR]);
+ IO.mapOptional("ClrRuntimeHeader",
+ PH.DataDirectories[COFF::CLR_RUNTIME_HEADER]);
+}
+
void MappingTraits<COFF::header>::mapping(IO &IO, COFF::header &H) {
MappingNormalization<NMachine, uint16_t> NM(IO, H.Machine);
MappingNormalization<NHeaderCharacteristics, uint16_t> NC(IO,
@@ -380,12 +483,15 @@ void MappingTraits<COFFYAML::Section>::mapping(IO &IO, COFFYAML::Section &Sec) {
IO, Sec.Header.Characteristics);
IO.mapRequired("Name", Sec.Name);
IO.mapRequired("Characteristics", NC->Characteristics);
+ IO.mapOptional("VirtualAddress", Sec.Header.VirtualAddress, 0U);
+ IO.mapOptional("VirtualSize", Sec.Header.VirtualSize, 0U);
IO.mapOptional("Alignment", Sec.Alignment);
IO.mapRequired("SectionData", Sec.SectionData);
IO.mapOptional("Relocations", Sec.Relocations);
}
void MappingTraits<COFFYAML::Object>::mapping(IO &IO, COFFYAML::Object &Obj) {
+ IO.mapOptional("OptionalHeader", Obj.OptionalHeader);
IO.mapRequired("header", Obj.Header);
IO.mapRequired("sections", Obj.Sections);
IO.mapRequired("symbols", Obj.Symbols);
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index df4dd5e..11099bd 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -223,6 +223,8 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_GOTREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_GOTREL32);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2);
@@ -266,6 +268,15 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_COPY);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP_SLOT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_RELATIVE);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLS_DTPREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLS_DTPMOD64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLS_TPREL64);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_IRELATIVE);
default:
break;
}
@@ -519,6 +530,7 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_LO);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_HI);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_HA);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_PLTREL24);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL32);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TLS);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPMOD32);
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 4f0f60b..8ccb253 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -17,61 +17,56 @@
namespace llvm {
using namespace object;
-ErrorOr<ObjectFile *>
-ObjectFile::createELFObjectFile(std::unique_ptr<MemoryBuffer> &Obj) {
+ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source)
+ : ObjectFile(Type, Source) {}
+
+ErrorOr<std::unique_ptr<ObjectFile>>
+ObjectFile::createELFObjectFile(MemoryBufferRef Obj) {
std::pair<unsigned char, unsigned char> Ident =
- getElfArchType(Obj->getBuffer());
+ getElfArchType(Obj.getBuffer());
std::size_t MaxAlignment =
- 1ULL << countTrailingZeros(uintptr_t(Obj->getBufferStart()));
+ 1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart()));
std::error_code EC;
std::unique_ptr<ObjectFile> R;
if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 4)
- R.reset(new ELFObjectFile<ELFType<support::little, 4, false>>(
- std::move(Obj), EC));
+ R.reset(new ELFObjectFile<ELFType<support::little, 4, false>>(Obj, EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::little, 2, false>>(
- std::move(Obj), EC));
+ R.reset(new ELFObjectFile<ELFType<support::little, 2, false>>(Obj, EC));
else
return object_error::parse_failed;
else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 4)
- R.reset(new ELFObjectFile<ELFType<support::big, 4, false>>(std::move(Obj),
- EC));
+ R.reset(new ELFObjectFile<ELFType<support::big, 4, false>>(Obj, EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::big, 2, false>>(std::move(Obj),
- EC));
+ R.reset(new ELFObjectFile<ELFType<support::big, 2, false>>(Obj, EC));
else
return object_error::parse_failed;
else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 8)
- R.reset(new ELFObjectFile<ELFType<support::big, 8, true>>(std::move(Obj),
- EC));
+ R.reset(new ELFObjectFile<ELFType<support::big, 8, true>>(Obj, EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::big, 2, true>>(std::move(Obj),
- EC));
+ R.reset(new ELFObjectFile<ELFType<support::big, 2, true>>(Obj, EC));
else
return object_error::parse_failed;
else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 8)
- R.reset(new ELFObjectFile<ELFType<support::little, 8, true>>(
- std::move(Obj), EC));
+ R.reset(new ELFObjectFile<ELFType<support::little, 8, true>>(Obj, EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::little, 2, true>>(
- std::move(Obj), EC));
+ R.reset(new ELFObjectFile<ELFType<support::little, 2, true>>(Obj, EC));
else
return object_error::parse_failed;
}
@@ -80,7 +75,7 @@ ObjectFile::createELFObjectFile(std::unique_ptr<MemoryBuffer> &Obj) {
if (EC)
return EC;
- return R.release();
+ return std::move(R);
}
} // end namespace llvm
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index dc3d467..f513c11 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -264,6 +264,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCase(EF_MIPS_CPIC)
BCase(EF_MIPS_ABI2)
BCase(EF_MIPS_32BITMODE)
+ BCase(EF_MIPS_NAN2008)
BCase(EF_MIPS_ABI_O32)
BCase(EF_MIPS_MICROMIPS)
BCase(EF_MIPS_ARCH_ASE_M16)
@@ -298,6 +299,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
IO &IO, ELFYAML::ELF_SHT &Value) {
+ const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+ assert(Object && "The IO context is not initialized");
#define ECase(X) IO.enumCase(Value, #X, ELF::X);
ECase(SHT_NULL)
ECase(SHT_PROGBITS)
@@ -325,15 +328,29 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_GNU_versym)
ECase(SHT_HIOS)
ECase(SHT_LOPROC)
- ECase(SHT_ARM_EXIDX)
- ECase(SHT_ARM_PREEMPTMAP)
- ECase(SHT_ARM_ATTRIBUTES)
- ECase(SHT_ARM_DEBUGOVERLAY)
- ECase(SHT_ARM_OVERLAYSECTION)
- ECase(SHT_HEX_ORDERED)
- ECase(SHT_X86_64_UNWIND)
- ECase(SHT_MIPS_REGINFO)
- ECase(SHT_MIPS_OPTIONS)
+ switch (Object->Header.Machine) {
+ case ELF::EM_ARM:
+ ECase(SHT_ARM_EXIDX)
+ ECase(SHT_ARM_PREEMPTMAP)
+ ECase(SHT_ARM_ATTRIBUTES)
+ ECase(SHT_ARM_DEBUGOVERLAY)
+ ECase(SHT_ARM_OVERLAYSECTION)
+ break;
+ case ELF::EM_HEXAGON:
+ ECase(SHT_HEX_ORDERED)
+ break;
+ case ELF::EM_X86_64:
+ ECase(SHT_X86_64_UNWIND)
+ break;
+ case ELF::EM_MIPS:
+ ECase(SHT_MIPS_REGINFO)
+ ECase(SHT_MIPS_OPTIONS)
+ ECase(SHT_MIPS_ABIFLAGS)
+ break;
+ default:
+ // Nothing to do.
+ break;
+ }
#undef ECase
}
@@ -378,6 +395,25 @@ void ScalarEnumerationTraits<ELFYAML::ELF_STV>::enumeration(
#undef ECase
}
+void ScalarBitSetTraits<ELFYAML::ELF_STO>::bitset(IO &IO,
+ ELFYAML::ELF_STO &Value) {
+ const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+ assert(Object && "The IO context is not initialized");
+#define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
+ switch (Object->Header.Machine) {
+ case ELF::EM_MIPS:
+ BCase(STO_MIPS_OPTIONAL)
+ BCase(STO_MIPS_PLT)
+ BCase(STO_MIPS_PIC)
+ BCase(STO_MIPS_MICROMIPS)
+ break;
+ default:
+ break; // Nothing to do
+ }
+#undef BCase
+#undef BCaseMask
+}
+
void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
IO &IO, ELFYAML::ELF_REL &Value) {
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
@@ -636,6 +672,92 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
ECase(R_386_IRELATIVE)
ECase(R_386_NUM)
break;
+ case ELF::EM_AARCH64:
+ ECase(R_AARCH64_NONE)
+ ECase(R_AARCH64_ABS64)
+ ECase(R_AARCH64_ABS32)
+ ECase(R_AARCH64_ABS16)
+ ECase(R_AARCH64_PREL64)
+ ECase(R_AARCH64_PREL32)
+ ECase(R_AARCH64_PREL16)
+ ECase(R_AARCH64_MOVW_UABS_G0)
+ ECase(R_AARCH64_MOVW_UABS_G0_NC)
+ ECase(R_AARCH64_MOVW_UABS_G1)
+ ECase(R_AARCH64_MOVW_UABS_G1_NC)
+ ECase(R_AARCH64_MOVW_UABS_G2)
+ ECase(R_AARCH64_MOVW_UABS_G2_NC)
+ ECase(R_AARCH64_MOVW_UABS_G3)
+ ECase(R_AARCH64_MOVW_SABS_G0)
+ ECase(R_AARCH64_MOVW_SABS_G1)
+ ECase(R_AARCH64_MOVW_SABS_G2)
+ ECase(R_AARCH64_LD_PREL_LO19)
+ ECase(R_AARCH64_ADR_PREL_LO21)
+ ECase(R_AARCH64_ADR_PREL_PG_HI21)
+ ECase(R_AARCH64_ADD_ABS_LO12_NC)
+ ECase(R_AARCH64_LDST8_ABS_LO12_NC)
+ ECase(R_AARCH64_TSTBR14)
+ ECase(R_AARCH64_CONDBR19)
+ ECase(R_AARCH64_JUMP26)
+ ECase(R_AARCH64_CALL26)
+ ECase(R_AARCH64_LDST16_ABS_LO12_NC)
+ ECase(R_AARCH64_LDST32_ABS_LO12_NC)
+ ECase(R_AARCH64_LDST64_ABS_LO12_NC)
+ ECase(R_AARCH64_LDST128_ABS_LO12_NC)
+ ECase(R_AARCH64_GOTREL64)
+ ECase(R_AARCH64_GOTREL32)
+ ECase(R_AARCH64_ADR_GOT_PAGE)
+ ECase(R_AARCH64_LD64_GOT_LO12_NC)
+ ECase(R_AARCH64_TLSLD_MOVW_DTPREL_G2)
+ ECase(R_AARCH64_TLSLD_MOVW_DTPREL_G1)
+ ECase(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC)
+ ECase(R_AARCH64_TLSLD_MOVW_DTPREL_G0)
+ ECase(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC)
+ ECase(R_AARCH64_TLSLD_ADD_DTPREL_HI12)
+ ECase(R_AARCH64_TLSLD_ADD_DTPREL_LO12)
+ ECase(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLD_LDST8_DTPREL_LO12)
+ ECase(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLD_LDST16_DTPREL_LO12)
+ ECase(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLD_LDST32_DTPREL_LO12)
+ ECase(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLD_LDST64_DTPREL_LO12)
+ ECase(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC)
+ ECase(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1)
+ ECase(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC)
+ ECase(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21)
+ ECase(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC)
+ ECase(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19)
+ ECase(R_AARCH64_TLSLE_MOVW_TPREL_G2)
+ ECase(R_AARCH64_TLSLE_MOVW_TPREL_G1)
+ ECase(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC)
+ ECase(R_AARCH64_TLSLE_MOVW_TPREL_G0)
+ ECase(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC)
+ ECase(R_AARCH64_TLSLE_ADD_TPREL_HI12)
+ ECase(R_AARCH64_TLSLE_ADD_TPREL_LO12)
+ ECase(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLE_LDST8_TPREL_LO12)
+ ECase(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLE_LDST16_TPREL_LO12)
+ ECase(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLE_LDST32_TPREL_LO12)
+ ECase(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC)
+ ECase(R_AARCH64_TLSLE_LDST64_TPREL_LO12)
+ ECase(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC)
+ ECase(R_AARCH64_TLSDESC_ADR_PAGE)
+ ECase(R_AARCH64_TLSDESC_LD64_LO12_NC)
+ ECase(R_AARCH64_TLSDESC_ADD_LO12_NC)
+ ECase(R_AARCH64_TLSDESC_CALL)
+ ECase(R_AARCH64_COPY)
+ ECase(R_AARCH64_GLOB_DAT)
+ ECase(R_AARCH64_JUMP_SLOT)
+ ECase(R_AARCH64_RELATIVE)
+ ECase(R_AARCH64_TLS_DTPREL64)
+ ECase(R_AARCH64_TLS_DTPMOD64)
+ ECase(R_AARCH64_TLS_TPREL64)
+ ECase(R_AARCH64_TLSDESC)
+ ECase(R_AARCH64_IRELATIVE)
+ break;
default:
llvm_unreachable("Unsupported architecture");
}
@@ -653,13 +775,30 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
IO.mapOptional("Entry", FileHdr.Entry, Hex64(0));
}
+namespace {
+struct NormalizedOther {
+ NormalizedOther(IO &)
+ : Visibility(ELFYAML::ELF_STV(0)), Other(ELFYAML::ELF_STO(0)) {}
+ NormalizedOther(IO &, uint8_t Original)
+ : Visibility(Original & 0x3), Other(Original & ~0x3) {}
+
+ uint8_t denormalize(IO &) { return Visibility | Other; }
+
+ ELFYAML::ELF_STV Visibility;
+ ELFYAML::ELF_STO Other;
+};
+}
+
void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Name", Symbol.Name, StringRef());
IO.mapOptional("Type", Symbol.Type, ELFYAML::ELF_STT(0));
IO.mapOptional("Section", Symbol.Section, StringRef());
IO.mapOptional("Value", Symbol.Value, Hex64(0));
IO.mapOptional("Size", Symbol.Size, Hex64(0));
- IO.mapOptional("Visibility", Symbol.Visibility, ELFYAML::ELF_STV(0));
+
+ MappingNormalization<NormalizedOther, uint8_t> Keys(IO, Symbol.Other);
+ IO.mapOptional("Visibility", Keys->Visibility, ELFYAML::ELF_STV(0));
+ IO.mapOptional("Other", Keys->Other, ELFYAML::ELF_STO(0));
}
void MappingTraits<ELFYAML::LocalGlobalWeakSymbols>::mapping(
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
index 9d25269..d2daab7 100644
--- a/lib/Object/Error.cpp
+++ b/lib/Object/Error.cpp
@@ -13,6 +13,7 @@
#include "llvm/Object/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
using namespace object;
@@ -25,7 +26,7 @@ public:
};
}
-const char *_object_error_category::name() const {
+const char *_object_error_category::name() const LLVM_NOEXCEPT {
return "llvm.object";
}
@@ -41,12 +42,15 @@ std::string _object_error_category::message(int EV) const {
return "Invalid data was encountered while parsing the file";
case object_error::unexpected_eof:
return "The end of the file was unexpectedly encountered";
+ case object_error::bitcode_section_not_found:
+ return "Bitcode section not found in object file";
}
llvm_unreachable("An enumerator of object_error does not have a message "
"defined.");
}
+static ManagedStatic<_object_error_category> error_category;
+
const std::error_category &object::object_category() {
- static _object_error_category o;
- return o;
+ return *error_category;
}
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index 5323d92..7256a2f 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
@@ -32,9 +33,8 @@
using namespace llvm;
using namespace object;
-IRObjectFile::IRObjectFile(std::unique_ptr<MemoryBuffer> Object,
- std::unique_ptr<Module> Mod)
- : SymbolicFile(Binary::ID_IR, std::move(Object)), M(std::move(Mod)) {
+IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
+ : SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) {
// If we have a DataLayout, setup a mangler.
const DataLayout *DL = M->getDataLayout();
if (!DL)
@@ -76,7 +76,7 @@ IRObjectFile::IRObjectFile(std::unique_ptr<MemoryBuffer> Object,
std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
SourceMgr SrcMgr;
- SrcMgr.AddNewSourceBuffer(Buffer.release(), SMLoc());
+ SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, MCCtx, *Streamer, *MAI));
@@ -114,9 +114,6 @@ IRObjectFile::IRObjectFile(std::unique_ptr<MemoryBuffer> Object,
}
IRObjectFile::~IRObjectFile() {
- GVMaterializer *GVM = M->getMaterializer();
- if (GVM)
- GVM->releaseBuffer();
}
static const GlobalValue *getGV(DataRefImpl &Symb) {
@@ -207,16 +204,6 @@ std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
return object_error::success;
}
-static bool isDeclaration(const GlobalValue &V) {
- if (V.hasAvailableExternallyLinkage())
- return true;
-
- if (V.isMaterializable())
- return false;
-
- return V.isDeclaration();
-}
-
uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
const GlobalValue *GV = getGV(Symb);
@@ -227,7 +214,7 @@ uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
}
uint32_t Res = BasicSymbolRef::SF_None;
- if (isDeclaration(*GV))
+ if (GV->isDeclarationForLinker())
Res |= BasicSymbolRef::SF_Undefined;
if (GV->hasPrivateLinkage())
Res |= BasicSymbolRef::SF_FormatSpecific;
@@ -268,12 +255,55 @@ basic_symbol_iterator IRObjectFile::symbol_end_impl() const {
return basic_symbol_iterator(BasicSymbolRef(Ret, this));
}
-ErrorOr<IRObjectFile *> llvm::object::IRObjectFile::createIRObjectFile(
- std::unique_ptr<MemoryBuffer> Object, LLVMContext &Context) {
- ErrorOr<Module *> MOrErr = getLazyBitcodeModule(Object.get(), Context);
+ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
+ for (const SectionRef &Sec : Obj.sections()) {
+ StringRef SecName;
+ if (std::error_code EC = Sec.getName(SecName))
+ return EC;
+ if (SecName == ".llvmbc") {
+ StringRef SecContents;
+ if (std::error_code EC = Sec.getContents(SecContents))
+ return EC;
+ return MemoryBufferRef(SecContents, Obj.getFileName());
+ }
+ }
+
+ return object_error::bitcode_section_not_found;
+}
+
+ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) {
+ sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
+ switch (Type) {
+ case sys::fs::file_magic::bitcode:
+ return Object;
+ case sys::fs::file_magic::elf_relocatable:
+ case sys::fs::file_magic::macho_object:
+ case sys::fs::file_magic::coff_object: {
+ ErrorOr<std::unique_ptr<ObjectFile>> ObjFile =
+ ObjectFile::createObjectFile(Object, Type);
+ if (!ObjFile)
+ return ObjFile.getError();
+ return findBitcodeInObject(*ObjFile->get());
+ }
+ default:
+ return object_error::invalid_file_type;
+ }
+}
+
+ErrorOr<std::unique_ptr<IRObjectFile>>
+llvm::object::IRObjectFile::createIRObjectFile(MemoryBufferRef Object,
+ LLVMContext &Context) {
+ ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
+ if (!BCOrErr)
+ return BCOrErr.getError();
+
+ std::unique_ptr<MemoryBuffer> Buff(
+ MemoryBuffer::getMemBuffer(BCOrErr.get(), false));
+
+ ErrorOr<Module *> MOrErr = getLazyBitcodeModule(std::move(Buff), Context);
if (std::error_code EC = MOrErr.getError())
return EC;
std::unique_ptr<Module> M(MOrErr.get());
- return new IRObjectFile(std::move(Object), std::move(M));
+ return llvm::make_unique<IRObjectFile>(Object, std::move(M));
}
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
index 8acacba..bae578c 100644
--- a/lib/Object/LLVMBuild.txt
+++ b/lib/Object/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = Object
parent = Libraries
-required_libraries = BitReader Core Support MC MCParser
+required_libraries = BitReader Core MC MCParser Support
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 4919114..bbef639 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -14,10 +14,14 @@
#include "llvm/Object/MachO.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@@ -27,220 +31,19 @@
using namespace llvm;
using namespace object;
-namespace llvm {
-
-namespace object {
-
-struct nlist_base {
- uint32_t n_strx;
- uint8_t n_type;
- uint8_t n_sect;
- uint16_t n_desc;
-};
-
-struct section_base {
- char sectname[16];
- char segname[16];
-};
-
-template<typename T>
-static void SwapStruct(T &Value);
-
-template<>
-void SwapStruct(MachO::any_relocation_info &H) {
- sys::swapByteOrder(H.r_word0);
- sys::swapByteOrder(H.r_word1);
-}
-
-template<>
-void SwapStruct(MachO::load_command &L) {
- sys::swapByteOrder(L.cmd);
- sys::swapByteOrder(L.cmdsize);
-}
-
-template<>
-void SwapStruct(nlist_base &S) {
- sys::swapByteOrder(S.n_strx);
- sys::swapByteOrder(S.n_desc);
-}
-
-template<>
-void SwapStruct(MachO::section &S) {
- sys::swapByteOrder(S.addr);
- sys::swapByteOrder(S.size);
- sys::swapByteOrder(S.offset);
- sys::swapByteOrder(S.align);
- sys::swapByteOrder(S.reloff);
- sys::swapByteOrder(S.nreloc);
- sys::swapByteOrder(S.flags);
- sys::swapByteOrder(S.reserved1);
- sys::swapByteOrder(S.reserved2);
-}
-
-template<>
-void SwapStruct(MachO::section_64 &S) {
- sys::swapByteOrder(S.addr);
- sys::swapByteOrder(S.size);
- sys::swapByteOrder(S.offset);
- sys::swapByteOrder(S.align);
- sys::swapByteOrder(S.reloff);
- sys::swapByteOrder(S.nreloc);
- sys::swapByteOrder(S.flags);
- sys::swapByteOrder(S.reserved1);
- sys::swapByteOrder(S.reserved2);
- sys::swapByteOrder(S.reserved3);
-}
-
-template<>
-void SwapStruct(MachO::nlist &S) {
- sys::swapByteOrder(S.n_strx);
- sys::swapByteOrder(S.n_desc);
- sys::swapByteOrder(S.n_value);
-}
-
-template<>
-void SwapStruct(MachO::nlist_64 &S) {
- sys::swapByteOrder(S.n_strx);
- sys::swapByteOrder(S.n_desc);
- sys::swapByteOrder(S.n_value);
-}
-
-template<>
-void SwapStruct(MachO::mach_header &H) {
- sys::swapByteOrder(H.magic);
- sys::swapByteOrder(H.cputype);
- sys::swapByteOrder(H.cpusubtype);
- sys::swapByteOrder(H.filetype);
- sys::swapByteOrder(H.ncmds);
- sys::swapByteOrder(H.sizeofcmds);
- sys::swapByteOrder(H.flags);
-}
-
-template<>
-void SwapStruct(MachO::mach_header_64 &H) {
- sys::swapByteOrder(H.magic);
- sys::swapByteOrder(H.cputype);
- sys::swapByteOrder(H.cpusubtype);
- sys::swapByteOrder(H.filetype);
- sys::swapByteOrder(H.ncmds);
- sys::swapByteOrder(H.sizeofcmds);
- sys::swapByteOrder(H.flags);
- sys::swapByteOrder(H.reserved);
-}
-
-template<>
-void SwapStruct(MachO::symtab_command &C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.symoff);
- sys::swapByteOrder(C.nsyms);
- sys::swapByteOrder(C.stroff);
- sys::swapByteOrder(C.strsize);
-}
-
-template<>
-void SwapStruct(MachO::dysymtab_command &C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.ilocalsym);
- sys::swapByteOrder(C.nlocalsym);
- sys::swapByteOrder(C.iextdefsym);
- sys::swapByteOrder(C.nextdefsym);
- sys::swapByteOrder(C.iundefsym);
- sys::swapByteOrder(C.nundefsym);
- sys::swapByteOrder(C.tocoff);
- sys::swapByteOrder(C.ntoc);
- sys::swapByteOrder(C.modtaboff);
- sys::swapByteOrder(C.nmodtab);
- sys::swapByteOrder(C.extrefsymoff);
- sys::swapByteOrder(C.nextrefsyms);
- sys::swapByteOrder(C.indirectsymoff);
- sys::swapByteOrder(C.nindirectsyms);
- sys::swapByteOrder(C.extreloff);
- sys::swapByteOrder(C.nextrel);
- sys::swapByteOrder(C.locreloff);
- sys::swapByteOrder(C.nlocrel);
-}
-
-template<>
-void SwapStruct(MachO::linkedit_data_command &C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.dataoff);
- sys::swapByteOrder(C.datasize);
-}
-
-template<>
-void SwapStruct(MachO::segment_command &C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.vmaddr);
- sys::swapByteOrder(C.vmsize);
- sys::swapByteOrder(C.fileoff);
- sys::swapByteOrder(C.filesize);
- sys::swapByteOrder(C.maxprot);
- sys::swapByteOrder(C.initprot);
- sys::swapByteOrder(C.nsects);
- sys::swapByteOrder(C.flags);
-}
-
-template<>
-void SwapStruct(MachO::segment_command_64 &C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.vmaddr);
- sys::swapByteOrder(C.vmsize);
- sys::swapByteOrder(C.fileoff);
- sys::swapByteOrder(C.filesize);
- sys::swapByteOrder(C.maxprot);
- sys::swapByteOrder(C.initprot);
- sys::swapByteOrder(C.nsects);
- sys::swapByteOrder(C.flags);
-}
-
-template<>
-void SwapStruct(uint32_t &C) {
- sys::swapByteOrder(C);
-}
-
-template<>
-void SwapStruct(MachO::linker_options_command &C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.count);
-}
-
-template<>
-void SwapStruct(MachO::version_min_command&C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.version);
- sys::swapByteOrder(C.reserved);
-}
-
-template<>
-void SwapStruct(MachO::dylib_command&C) {
- sys::swapByteOrder(C.cmd);
- sys::swapByteOrder(C.cmdsize);
- sys::swapByteOrder(C.dylib.name);
- sys::swapByteOrder(C.dylib.timestamp);
- sys::swapByteOrder(C.dylib.current_version);
- sys::swapByteOrder(C.dylib.compatibility_version);
-}
-
-template<>
-void SwapStruct(MachO::data_in_code_entry &C) {
- sys::swapByteOrder(C.offset);
- sys::swapByteOrder(C.length);
- sys::swapByteOrder(C.kind);
+namespace {
+ struct section_base {
+ char sectname[16];
+ char segname[16];
+ };
}
template<typename T>
-T getStruct(const MachOObjectFile *O, const char *P) {
+static T getStruct(const MachOObjectFile *O, const char *P) {
T Cmd;
memcpy(&Cmd, P, sizeof(T));
if (O->isLittleEndian() != sys::IsLittleEndianHost)
- SwapStruct(Cmd);
+ MachO::swapStruct(Cmd);
return Cmd;
}
@@ -255,6 +58,17 @@ getSegmentLoadCommandNumSections(const MachOObjectFile *O,
return S.nsects;
}
+static bool isPageZeroSegment(const MachOObjectFile *O,
+ const MachOObjectFile::LoadCommandInfo &L) {
+ if (O->is64Bit()) {
+ MachO::segment_command_64 S = O->getSegment64LoadCommand(L);
+ return StringRef("__PAGEZERO").equals(S.segname);
+ }
+ MachO::segment_command S = O->getSegmentLoadCommand(L);
+ return StringRef("__PAGEZERO").equals(S.segname);
+}
+
+
static const char *
getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
unsigned Sec) {
@@ -274,10 +88,10 @@ static const char *getPtr(const MachOObjectFile *O, size_t Offset) {
return O->getData().substr(Offset, 1).data();
}
-static nlist_base
+static MachO::nlist_base
getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) {
const char *P = reinterpret_cast<const char *>(DRI.p);
- return getStruct<nlist_base>(O, P);
+ return getStruct<MachO::nlist_base>(O, P);
}
static StringRef parseSegmentOrSectionName(const char *P) {
@@ -330,11 +144,9 @@ static void printRelocationTargetName(const MachOObjectFile *O,
// to find a section beginning instead.
for (const SectionRef &Section : O->sections()) {
std::error_code ec;
- uint64_t Addr;
- StringRef Name;
- if ((ec = Section.getAddress(Addr)))
- report_fatal_error(ec.message());
+ StringRef Name;
+ uint64_t Addr = Section.getAddress();
if (Addr != Val)
continue;
if ((ec = Section.getName(Name)))
@@ -407,11 +219,6 @@ static unsigned getPlainRelocationType(const MachOObjectFile *O,
return RE.r_word1 & 0xf;
}
-static unsigned
-getScatteredRelocationType(const MachO::any_relocation_info &RE) {
- return (RE.r_word0 >> 24) & 0xf;
-}
-
static uint32_t getSectionFlags(const MachOObjectFile *O,
DataRefImpl Sec) {
if (O->is64Bit()) {
@@ -422,12 +229,12 @@ static uint32_t getSectionFlags(const MachOObjectFile *O,
return Sect.flags;
}
-MachOObjectFile::MachOObjectFile(std::unique_ptr<MemoryBuffer> Object,
- bool IsLittleEndian, bool Is64bits,
- std::error_code &EC)
- : ObjectFile(getMachOType(IsLittleEndian, Is64bits), std::move(Object)),
+MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
+ bool Is64bits, std::error_code &EC)
+ : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
- DataInCodeLoadCmd(nullptr) {
+ DataInCodeLoadCmd(nullptr), DyldInfoLoadCmd(nullptr),
+ UuidLoadCmd(nullptr), HasPageZeroSegment(false) {
uint32_t LoadCommandCount = this->getHeader().ncmds;
MachO::LoadCommandType SegmentLoadType = is64Bit() ?
MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT;
@@ -435,20 +242,49 @@ MachOObjectFile::MachOObjectFile(std::unique_ptr<MemoryBuffer> Object,
MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo();
for (unsigned I = 0; ; ++I) {
if (Load.C.cmd == MachO::LC_SYMTAB) {
- assert(!SymtabLoadCmd && "Multiple symbol tables");
+ // Multiple symbol tables
+ if (SymtabLoadCmd) {
+ EC = object_error::parse_failed;
+ return;
+ }
SymtabLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_DYSYMTAB) {
- assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
+ // Multiple dynamic symbol tables
+ if (DysymtabLoadCmd) {
+ EC = object_error::parse_failed;
+ return;
+ }
DysymtabLoadCmd = Load.Ptr;
} else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
- assert(!DataInCodeLoadCmd && "Multiple data in code tables");
+ // Multiple data in code tables
+ if (DataInCodeLoadCmd) {
+ EC = object_error::parse_failed;
+ return;
+ }
DataInCodeLoadCmd = Load.Ptr;
+ } else if (Load.C.cmd == MachO::LC_DYLD_INFO ||
+ Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
+ // Multiple dyldinfo load commands
+ if (DyldInfoLoadCmd) {
+ EC = object_error::parse_failed;
+ return;
+ }
+ DyldInfoLoadCmd = Load.Ptr;
+ } else if (Load.C.cmd == MachO::LC_UUID) {
+ // Multiple UUID load commands
+ if (UuidLoadCmd) {
+ EC = object_error::parse_failed;
+ return;
+ }
+ UuidLoadCmd = Load.Ptr;
} else if (Load.C.cmd == SegmentLoadType) {
uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
for (unsigned J = 0; J < NumSections; ++J) {
const char *Sec = getSectionPtr(this, Load, J);
Sections.push_back(Sec);
}
+ if (isPageZeroSegment(this, Load))
+ HasPageZeroSegment = true;
} else if (Load.C.cmd == MachO::LC_LOAD_DYLIB ||
Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
@@ -474,7 +310,7 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
std::error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
StringRef &Res) const {
StringRef StringTable = getStringTableData();
- nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+ MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
const char *Start = &StringTable.data()[Entry.n_strx];
Res = StringRef(Start);
return object_error::success;
@@ -528,7 +364,7 @@ std::error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
uint32_t &Result) const {
uint32_t flags = getSymbolFlags(DRI);
if (flags & SymbolRef::SF_Common) {
- nlist_base Entry = getSymbolTableEntryBase(this, DRI);
+ MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI);
Result = 1 << MachO::GET_COMM_ALIGN(Entry.n_desc);
} else {
Result = 0;
@@ -542,7 +378,7 @@ std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
uint64_t EndOffset = 0;
uint8_t SectionIndex;
- nlist_base Entry = getSymbolTableEntryBase(this, DRI);
+ MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI);
uint64_t Value;
getSymbolAddress(DRI, Value);
if (Value == UnknownAddressOrSize) {
@@ -574,11 +410,10 @@ std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
EndOffset = Value;
}
if (!EndOffset) {
- uint64_t Size;
DataRefImpl Sec;
Sec.d.a = SectionIndex-1;
- getSectionSize(Sec, Size);
- getSectionAddress(Sec, EndOffset);
+ uint64_t Size = getSectionSize(Sec);
+ EndOffset = getSectionAddress(Sec);
EndOffset += Size;
}
Result = EndOffset - BeginOffset;
@@ -587,7 +422,7 @@ std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
SymbolRef::Type &Res) const {
- nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+ MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
uint8_t n_type = Entry.n_type;
Res = SymbolRef::ST_Other;
@@ -610,7 +445,7 @@ std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
}
uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
- nlist_base Entry = getSymbolTableEntryBase(this, DRI);
+ MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI);
uint8_t MachOType = Entry.n_type;
uint16_t MachOFlags = Entry.n_desc;
@@ -639,6 +474,9 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF))
Result |= SymbolRef::SF_Weak;
+ if (MachOFlags & (MachO::N_ARM_THUMB_DEF))
+ Result |= SymbolRef::SF_Thumb;
+
if ((MachOType & MachO::N_TYPE) == MachO::N_ABS)
Result |= SymbolRef::SF_Absolute;
@@ -647,7 +485,7 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
section_iterator &Res) const {
- nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+ MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
uint8_t index = Entry.n_sect;
if (index == 0) {
@@ -672,29 +510,16 @@ std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec,
return object_error::success;
}
-std::error_code MachOObjectFile::getSectionAddress(DataRefImpl Sec,
- uint64_t &Res) const {
- if (is64Bit()) {
- MachO::section_64 Sect = getSection64(Sec);
- Res = Sect.addr;
- } else {
- MachO::section Sect = getSection(Sec);
- Res = Sect.addr;
- }
- return object_error::success;
+uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
+ if (is64Bit())
+ return getSection64(Sec).addr;
+ return getSection(Sec).addr;
}
-std::error_code MachOObjectFile::getSectionSize(DataRefImpl Sec,
- uint64_t &Res) const {
- if (is64Bit()) {
- MachO::section_64 Sect = getSection64(Sec);
- Res = Sect.size;
- } else {
- MachO::section Sect = getSection(Sec);
- Res = Sect.size;
- }
-
- return object_error::success;
+uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
+ if (is64Bit())
+ return getSection64(Sec).size;
+ return getSection(Sec).size;
}
std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
@@ -716,8 +541,7 @@ std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
return object_error::success;
}
-std::error_code MachOObjectFile::getSectionAlignment(DataRefImpl Sec,
- uint64_t &Res) const {
+uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const {
uint32_t Align;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
@@ -727,92 +551,70 @@ std::error_code MachOObjectFile::getSectionAlignment(DataRefImpl Sec,
Align = Sect.align;
}
- Res = uint64_t(1) << Align;
- return object_error::success;
+ return uint64_t(1) << Align;
}
-std::error_code MachOObjectFile::isSectionText(DataRefImpl Sec,
- bool &Res) const {
+bool MachOObjectFile::isSectionText(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(this, Sec);
- Res = Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
- return object_error::success;
+ return Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
}
-std::error_code MachOObjectFile::isSectionData(DataRefImpl Sec,
- bool &Result) const {
+bool MachOObjectFile::isSectionData(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
- Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
- !(SectionType == MachO::S_ZEROFILL ||
- SectionType == MachO::S_GB_ZEROFILL);
- return object_error::success;
+ return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
+ !(SectionType == MachO::S_ZEROFILL ||
+ SectionType == MachO::S_GB_ZEROFILL);
}
-std::error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec,
- bool &Result) const {
+bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
- Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
- (SectionType == MachO::S_ZEROFILL ||
- SectionType == MachO::S_GB_ZEROFILL);
- return object_error::success;
+ return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
+ (SectionType == MachO::S_ZEROFILL ||
+ SectionType == MachO::S_GB_ZEROFILL);
}
-std::error_code
-MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
- bool &Result) const {
+bool MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sect) const {
// FIXME: Unimplemented.
- Result = true;
- return object_error::success;
+ return true;
}
-std::error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
- bool &Result) const {
+bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const {
// FIXME: Unimplemented.
- Result = false;
- return object_error::success;
+ return false;
}
-std::error_code MachOObjectFile::isSectionZeroInit(DataRefImpl Sec,
- bool &Res) const {
+bool MachOObjectFile::isSectionZeroInit(DataRefImpl Sec) const {
uint32_t Flags = getSectionFlags(this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
- Res = SectionType == MachO::S_ZEROFILL ||
- SectionType == MachO::S_GB_ZEROFILL;
- return object_error::success;
+ return SectionType == MachO::S_ZEROFILL ||
+ SectionType == MachO::S_GB_ZEROFILL;
}
-std::error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
- bool &Result) const {
+bool MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec) const {
// Consider using the code from isSectionText to look for __const sections.
// Alternately, emit S_ATTR_PURE_INSTRUCTIONS and/or S_ATTR_SOME_INSTRUCTIONS
// to use section attributes to distinguish code from data.
// FIXME: Unimplemented.
- Result = false;
- return object_error::success;
+ return false;
}
-std::error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
- DataRefImpl Symb,
- bool &Result) const {
+bool MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+ DataRefImpl Symb) const {
SymbolRef::Type ST;
this->getSymbolType(Symb, ST);
- if (ST == SymbolRef::ST_Unknown) {
- Result = false;
- return object_error::success;
- }
+ if (ST == SymbolRef::ST_Unknown)
+ return false;
- uint64_t SectBegin, SectEnd;
- getSectionAddress(Sec, SectBegin);
- getSectionSize(Sec, SectEnd);
+ uint64_t SectBegin = getSectionAddress(Sec);
+ uint64_t SectEnd = getSectionSize(Sec);
SectEnd += SectBegin;
uint64_t SymAddr;
getSymbolAddress(Symb, SymAddr);
- Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-
- return object_error::success;
+ return (SymAddr >= SectBegin) && (SymAddr < SectEnd);
}
relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const {
@@ -850,8 +652,7 @@ std::error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
DataRefImpl Sec;
Sec.d.a = Rel.d.a;
- uint64_t SecAddress;
- getSectionAddress(Sec, SecAddress);
+ uint64_t SecAddress = getSectionAddress(Sec);
Res = SecAddress + Offset;
return object_error::success;
}
@@ -956,7 +757,6 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
res = Table[RType];
break;
}
- case Triple::arm64:
case Triple::aarch64: {
static const char *const Table[] = {
"ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR",
@@ -1210,16 +1010,6 @@ std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
return object_error::success;
}
-std::error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData,
- LibraryRef &Res) const {
- report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-std::error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
- StringRef &Res) const {
- report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
//
// guessLibraryShortName() is passed a name of a dynamic library and returns a
// guess on what the short name is. Then name is returned as a substring of the
@@ -1368,31 +1158,26 @@ guess_qtx:
// It is passed the index (0 - based) of the library as translated from
// GET_LIBRARY_ORDINAL (1 - based).
std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
- StringRef &Res) {
+ StringRef &Res) const {
if (Index >= Libraries.size())
return object_error::parse_failed;
- MachO::dylib_command D =
- getStruct<MachO::dylib_command>(this, Libraries[Index]);
- if (D.dylib.name >= D.cmdsize)
- return object_error::parse_failed;
-
// If the cache of LibrariesShortNames is not built up do that first for
// all the Libraries.
if (LibrariesShortNames.size() == 0) {
for (unsigned i = 0; i < Libraries.size(); i++) {
MachO::dylib_command D =
getStruct<MachO::dylib_command>(this, Libraries[i]);
- if (D.dylib.name >= D.cmdsize) {
- LibrariesShortNames.push_back(StringRef());
- continue;
- }
+ if (D.dylib.name >= D.cmdsize)
+ return object_error::parse_failed;
const char *P = (const char *)(Libraries[i]) + D.dylib.name;
StringRef Name = StringRef(P);
+ if (D.dylib.name+Name.size() >= D.cmdsize)
+ return object_error::parse_failed;
StringRef Suffix;
bool isFramework;
StringRef shortName = guessLibraryShortName(Name, isFramework, Suffix);
- if (shortName == StringRef())
+ if (shortName.empty())
LibrariesShortNames.push_back(Name);
else
LibrariesShortNames.push_back(shortName);
@@ -1447,16 +1232,6 @@ section_iterator MachOObjectFile::section_end() const {
return section_iterator(SectionRef(DRI, this));
}
-library_iterator MachOObjectFile::needed_library_begin() const {
- // TODO: implement
- report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::needed_library_end() const {
- // TODO: implement
- report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
uint8_t MachOObjectFile::getBytesInAddress() const {
return is64Bit() ? 8 : 4;
}
@@ -1472,17 +1247,10 @@ StringRef MachOObjectFile::getFileFormatName() const {
case llvm::MachO::CPU_TYPE_POWERPC:
return "Mach-O 32-bit ppc";
default:
- assert((CPUType & llvm::MachO::CPU_ARCH_ABI64) == 0 &&
- "64-bit object file when we're not 64-bit?");
return "Mach-O 32-bit unknown";
}
}
- // Make sure the cpu type has the correct mask.
- assert((CPUType & llvm::MachO::CPU_ARCH_ABI64)
- == llvm::MachO::CPU_ARCH_ABI64 &&
- "32-bit object file when we're 64-bit?");
-
switch (CPUType) {
case llvm::MachO::CPU_TYPE_X86_64:
return "Mach-O 64-bit x86-64";
@@ -1504,7 +1272,7 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
case llvm::MachO::CPU_TYPE_ARM:
return Triple::arm;
case llvm::MachO::CPU_TYPE_ARM64:
- return Triple::arm64;
+ return Triple::aarch64;
case llvm::MachO::CPU_TYPE_POWERPC:
return Triple::ppc;
case llvm::MachO::CPU_TYPE_POWERPC64:
@@ -1514,7 +1282,11 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
}
}
-Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) {
+Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType,
+ const char **McpuDefault) {
+ if (McpuDefault)
+ *McpuDefault = nullptr;
+
switch (CPUType) {
case MachO::CPU_TYPE_I386:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
@@ -1538,15 +1310,25 @@ Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) {
return Triple("armv4t-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V5TEJ:
return Triple("armv5e-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_XSCALE:
+ return Triple("xscale-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V6:
return Triple("armv6-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V6M:
+ if (McpuDefault)
+ *McpuDefault = "cortex-m0";
return Triple("armv6m-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7:
+ return Triple("armv7-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7EM:
+ if (McpuDefault)
+ *McpuDefault = "cortex-m4";
return Triple("armv7em-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7K:
return Triple("armv7k-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7M:
+ if (McpuDefault)
+ *McpuDefault = "cortex-m3";
return Triple("armv7m-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7S:
return Triple("armv7s-apple-darwin");
@@ -1579,50 +1361,102 @@ Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) {
}
}
+Triple MachOObjectFile::getThumbArch(uint32_t CPUType, uint32_t CPUSubType,
+ const char **McpuDefault) {
+ if (McpuDefault)
+ *McpuDefault = nullptr;
+
+ switch (CPUType) {
+ case MachO::CPU_TYPE_ARM:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_ARM_V4T:
+ return Triple("thumbv4t-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V5TEJ:
+ return Triple("thumbv5e-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_XSCALE:
+ return Triple("xscale-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V6:
+ return Triple("thumbv6-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V6M:
+ if (McpuDefault)
+ *McpuDefault = "cortex-m0";
+ return Triple("thumbv6m-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7:
+ return Triple("thumbv7-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7EM:
+ if (McpuDefault)
+ *McpuDefault = "cortex-m4";
+ return Triple("thumbv7em-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7K:
+ return Triple("thumbv7k-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7M:
+ if (McpuDefault)
+ *McpuDefault = "cortex-m3";
+ return Triple("thumbv7m-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7S:
+ return Triple("thumbv7s-apple-darwin");
+ default:
+ return Triple();
+ }
+ default:
+ return Triple();
+ }
+}
+
+Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType,
+ const char **McpuDefault,
+ Triple *ThumbTriple) {
+ Triple T = MachOObjectFile::getArch(CPUType, CPUSubType, McpuDefault);
+ *ThumbTriple = MachOObjectFile::getThumbArch(CPUType, CPUSubType,
+ McpuDefault);
+ return T;
+}
+
Triple MachOObjectFile::getHostArch() {
return Triple(sys::getDefaultTargetTriple());
}
-Triple MachOObjectFile::getArch(StringRef ArchFlag) {
- if (ArchFlag == "i386")
- return Triple("i386-apple-darwin");
- else if (ArchFlag == "x86_64")
- return Triple("x86_64-apple-darwin");
- else if (ArchFlag == "x86_64h")
- return Triple("x86_64h-apple-darwin");
- else if (ArchFlag == "armv4t" || ArchFlag == "arm")
- return Triple("armv4t-apple-darwin");
- else if (ArchFlag == "armv5e")
- return Triple("armv5e-apple-darwin");
- else if (ArchFlag == "armv6")
- return Triple("armv6-apple-darwin");
- else if (ArchFlag == "armv6m")
- return Triple("armv6m-apple-darwin");
- else if (ArchFlag == "armv7em")
- return Triple("armv7em-apple-darwin");
- else if (ArchFlag == "armv7k")
- return Triple("armv7k-apple-darwin");
- else if (ArchFlag == "armv7k")
- return Triple("armv7m-apple-darwin");
- else if (ArchFlag == "armv7s")
- return Triple("armv7s-apple-darwin");
- else if (ArchFlag == "arm64")
- return Triple("arm64-apple-darwin");
- else if (ArchFlag == "ppc")
- return Triple("ppc-apple-darwin");
- else if (ArchFlag == "ppc64")
- return Triple("ppc64-apple-darwin");
- else
- return Triple();
+bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
+ return StringSwitch<bool>(ArchFlag)
+ .Case("i386", true)
+ .Case("x86_64", true)
+ .Case("x86_64h", true)
+ .Case("armv4t", true)
+ .Case("arm", true)
+ .Case("armv5e", true)
+ .Case("armv6", true)
+ .Case("armv6m", true)
+ .Case("armv7em", true)
+ .Case("armv7k", true)
+ .Case("armv7m", true)
+ .Case("armv7s", true)
+ .Case("arm64", true)
+ .Case("ppc", true)
+ .Case("ppc64", true)
+ .Default(false);
}
unsigned MachOObjectFile::getArch() const {
return getArch(getCPUType(this));
}
-StringRef MachOObjectFile::getLoadName() const {
- // TODO: Implement
- report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+Triple MachOObjectFile::getArch(const char **McpuDefault,
+ Triple *ThumbTriple) const {
+ Triple T;
+ if (is64Bit()) {
+ MachO::mach_header_64 H_64;
+ H_64 = getHeader64();
+ T = MachOObjectFile::getArch(H_64.cputype, H_64.cpusubtype, McpuDefault);
+ *ThumbTriple = MachOObjectFile::getThumbArch(H_64.cputype, H_64.cpusubtype,
+ McpuDefault);
+ } else {
+ MachO::mach_header H;
+ H = getHeader();
+ T = MachOObjectFile::getArch(H.cputype, H.cpusubtype, McpuDefault);
+ *ThumbTriple = MachOObjectFile::getThumbArch(H.cputype, H.cpusubtype,
+ McpuDefault);
+ }
+ return T;
}
relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const {
@@ -1658,6 +1492,620 @@ dice_iterator MachOObjectFile::end_dices() const {
return dice_iterator(DiceRef(DRI, this));
}
+ExportEntry::ExportEntry(ArrayRef<uint8_t> T)
+ : Trie(T), Malformed(false), Done(false) { }
+
+void ExportEntry::moveToFirst() {
+ pushNode(0);
+ pushDownUntilBottom();
+}
+
+void ExportEntry::moveToEnd() {
+ Stack.clear();
+ Done = true;
+}
+
+bool ExportEntry::operator==(const ExportEntry &Other) const {
+ // Common case, one at end, other iterating from begin.
+ if (Done || Other.Done)
+ return (Done == Other.Done);
+ // Not equal if different stack sizes.
+ if (Stack.size() != Other.Stack.size())
+ return false;
+ // Not equal if different cumulative strings.
+ if (!CumulativeString.str().equals(Other.CumulativeString.str()))
+ return false;
+ // Equal if all nodes in both stacks match.
+ for (unsigned i=0; i < Stack.size(); ++i) {
+ if (Stack[i].Start != Other.Stack[i].Start)
+ return false;
+ }
+ return true;
+}
+
+uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) {
+ unsigned Count;
+ uint64_t Result = decodeULEB128(Ptr, &Count);
+ Ptr += Count;
+ if (Ptr > Trie.end()) {
+ Ptr = Trie.end();
+ Malformed = true;
+ }
+ return Result;
+}
+
+StringRef ExportEntry::name() const {
+ return CumulativeString.str();
+}
+
+uint64_t ExportEntry::flags() const {
+ return Stack.back().Flags;
+}
+
+uint64_t ExportEntry::address() const {
+ return Stack.back().Address;
+}
+
+uint64_t ExportEntry::other() const {
+ return Stack.back().Other;
+}
+
+StringRef ExportEntry::otherName() const {
+ const char* ImportName = Stack.back().ImportName;
+ if (ImportName)
+ return StringRef(ImportName);
+ return StringRef();
+}
+
+uint32_t ExportEntry::nodeOffset() const {
+ return Stack.back().Start - Trie.begin();
+}
+
+ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
+ : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0),
+ ImportName(nullptr), ChildCount(0), NextChildIndex(0),
+ ParentStringLength(0), IsExportNode(false) {
+}
+
+void ExportEntry::pushNode(uint64_t offset) {
+ const uint8_t *Ptr = Trie.begin() + offset;
+ NodeState State(Ptr);
+ uint64_t ExportInfoSize = readULEB128(State.Current);
+ State.IsExportNode = (ExportInfoSize != 0);
+ const uint8_t* Children = State.Current + ExportInfoSize;
+ if (State.IsExportNode) {
+ State.Flags = readULEB128(State.Current);
+ if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
+ State.Address = 0;
+ State.Other = readULEB128(State.Current); // dylib ordinal
+ State.ImportName = reinterpret_cast<const char*>(State.Current);
+ } else {
+ State.Address = readULEB128(State.Current);
+ if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
+ State.Other = readULEB128(State.Current);
+ }
+ }
+ State.ChildCount = *Children;
+ State.Current = Children + 1;
+ State.NextChildIndex = 0;
+ State.ParentStringLength = CumulativeString.size();
+ Stack.push_back(State);
+}
+
+void ExportEntry::pushDownUntilBottom() {
+ while (Stack.back().NextChildIndex < Stack.back().ChildCount) {
+ NodeState &Top = Stack.back();
+ CumulativeString.resize(Top.ParentStringLength);
+ for (;*Top.Current != 0; Top.Current++) {
+ char C = *Top.Current;
+ CumulativeString.push_back(C);
+ }
+ Top.Current += 1;
+ uint64_t childNodeIndex = readULEB128(Top.Current);
+ Top.NextChildIndex += 1;
+ pushNode(childNodeIndex);
+ }
+ if (!Stack.back().IsExportNode) {
+ Malformed = true;
+ moveToEnd();
+ }
+}
+
+// We have a trie data structure and need a way to walk it that is compatible
+// with the C++ iterator model. The solution is a non-recursive depth first
+// traversal where the iterator contains a stack of parent nodes along with a
+// string that is the accumulation of all edge strings along the parent chain
+// to this point.
+//
+// There is one "export" node for each exported symbol. But because some
+// symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export
+// node may have child nodes too.
+//
+// The algorithm for moveNext() is to keep moving down the leftmost unvisited
+// child until hitting a node with no children (which is an export node or
+// else the trie is malformed). On the way down, each node is pushed on the
+// stack ivar. If there is no more ways down, it pops up one and tries to go
+// down a sibling path until a childless node is reached.
+void ExportEntry::moveNext() {
+ if (Stack.empty() || !Stack.back().IsExportNode) {
+ Malformed = true;
+ moveToEnd();
+ return;
+ }
+
+ Stack.pop_back();
+ while (!Stack.empty()) {
+ NodeState &Top = Stack.back();
+ if (Top.NextChildIndex < Top.ChildCount) {
+ pushDownUntilBottom();
+ // Now at the next export node.
+ return;
+ } else {
+ if (Top.IsExportNode) {
+ // This node has no children but is itself an export node.
+ CumulativeString.resize(Top.ParentStringLength);
+ return;
+ }
+ Stack.pop_back();
+ }
+ }
+ Done = true;
+}
+
+iterator_range<export_iterator>
+MachOObjectFile::exports(ArrayRef<uint8_t> Trie) {
+ ExportEntry Start(Trie);
+ Start.moveToFirst();
+
+ ExportEntry Finish(Trie);
+ Finish.moveToEnd();
+
+ return iterator_range<export_iterator>(export_iterator(Start),
+ export_iterator(Finish));
+}
+
+iterator_range<export_iterator> MachOObjectFile::exports() const {
+ return exports(getDyldInfoExportsTrie());
+}
+
+
+MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit)
+ : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
+ RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0),
+ PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {}
+
+void MachORebaseEntry::moveToFirst() {
+ Ptr = Opcodes.begin();
+ moveNext();
+}
+
+void MachORebaseEntry::moveToEnd() {
+ Ptr = Opcodes.end();
+ RemainingLoopCount = 0;
+ Done = true;
+}
+
+void MachORebaseEntry::moveNext() {
+ // If in the middle of some loop, move to next rebasing in loop.
+ SegmentOffset += AdvanceAmount;
+ if (RemainingLoopCount) {
+ --RemainingLoopCount;
+ return;
+ }
+ if (Ptr == Opcodes.end()) {
+ Done = true;
+ return;
+ }
+ bool More = true;
+ while (More && !Malformed) {
+ // Parse next opcode and set up next loop.
+ uint8_t Byte = *Ptr++;
+ uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK;
+ uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK;
+ switch (Opcode) {
+ case MachO::REBASE_OPCODE_DONE:
+ More = false;
+ Done = true;
+ moveToEnd();
+ DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n");
+ break;
+ case MachO::REBASE_OPCODE_SET_TYPE_IMM:
+ RebaseType = ImmValue;
+ DEBUG_WITH_TYPE(
+ "mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: "
+ << "RebaseType=" << (int) RebaseType << "\n");
+ break;
+ case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
+ SegmentIndex = ImmValue;
+ SegmentOffset = readULEB128();
+ DEBUG_WITH_TYPE(
+ "mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
+ << "SegmentIndex=" << SegmentIndex << ", "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << "\n");
+ break;
+ case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
+ SegmentOffset += readULEB128();
+ DEBUG_WITH_TYPE("mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
+ break;
+ case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
+ SegmentOffset += ImmValue * PointerSize;
+ DEBUG_WITH_TYPE("mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
+ break;
+ case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES:
+ AdvanceAmount = PointerSize;
+ RemainingLoopCount = ImmValue - 1;
+ DEBUG_WITH_TYPE(
+ "mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
+ return;
+ case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
+ AdvanceAmount = PointerSize;
+ RemainingLoopCount = readULEB128() - 1;
+ DEBUG_WITH_TYPE(
+ "mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
+ return;
+ case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
+ AdvanceAmount = readULEB128() + PointerSize;
+ RemainingLoopCount = 0;
+ DEBUG_WITH_TYPE(
+ "mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
+ return;
+ case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
+ RemainingLoopCount = readULEB128() - 1;
+ AdvanceAmount = readULEB128() + PointerSize;
+ DEBUG_WITH_TYPE(
+ "mach-o-rebase",
+ llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
+ return;
+ default:
+ Malformed = true;
+ }
+ }
+}
+
+uint64_t MachORebaseEntry::readULEB128() {
+ unsigned Count;
+ uint64_t Result = decodeULEB128(Ptr, &Count);
+ Ptr += Count;
+ if (Ptr > Opcodes.end()) {
+ Ptr = Opcodes.end();
+ Malformed = true;
+ }
+ return Result;
+}
+
+uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; }
+
+uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; }
+
+StringRef MachORebaseEntry::typeName() const {
+ switch (RebaseType) {
+ case MachO::REBASE_TYPE_POINTER:
+ return "pointer";
+ case MachO::REBASE_TYPE_TEXT_ABSOLUTE32:
+ return "text abs32";
+ case MachO::REBASE_TYPE_TEXT_PCREL32:
+ return "text rel32";
+ }
+ return "unknown";
+}
+
+bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
+ assert(Opcodes == Other.Opcodes && "compare iterators of different files");
+ return (Ptr == Other.Ptr) &&
+ (RemainingLoopCount == Other.RemainingLoopCount) &&
+ (Done == Other.Done);
+}
+
+iterator_range<rebase_iterator>
+MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) {
+ MachORebaseEntry Start(Opcodes, is64);
+ Start.moveToFirst();
+
+ MachORebaseEntry Finish(Opcodes, is64);
+ Finish.moveToEnd();
+
+ return iterator_range<rebase_iterator>(rebase_iterator(Start),
+ rebase_iterator(Finish));
+}
+
+iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const {
+ return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit());
+}
+
+
+MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit,
+ Kind BK)
+ : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
+ Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0),
+ BindType(0), PointerSize(is64Bit ? 8 : 4),
+ TableKind(BK), Malformed(false), Done(false) {}
+
+void MachOBindEntry::moveToFirst() {
+ Ptr = Opcodes.begin();
+ moveNext();
+}
+
+void MachOBindEntry::moveToEnd() {
+ Ptr = Opcodes.end();
+ RemainingLoopCount = 0;
+ Done = true;
+}
+
+void MachOBindEntry::moveNext() {
+ // If in the middle of some loop, move to next binding in loop.
+ SegmentOffset += AdvanceAmount;
+ if (RemainingLoopCount) {
+ --RemainingLoopCount;
+ return;
+ }
+ if (Ptr == Opcodes.end()) {
+ Done = true;
+ return;
+ }
+ bool More = true;
+ while (More && !Malformed) {
+ // Parse next opcode and set up next loop.
+ uint8_t Byte = *Ptr++;
+ uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK;
+ uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK;
+ int8_t SignExtended;
+ const uint8_t *SymStart;
+ switch (Opcode) {
+ case MachO::BIND_OPCODE_DONE:
+ if (TableKind == Kind::Lazy) {
+ // Lazying bindings have a DONE opcode between entries. Need to ignore
+ // it to advance to next entry. But need not if this is last entry.
+ bool NotLastEntry = false;
+ for (const uint8_t *P = Ptr; P < Opcodes.end(); ++P) {
+ if (*P) {
+ NotLastEntry = true;
+ }
+ }
+ if (NotLastEntry)
+ break;
+ }
+ More = false;
+ Done = true;
+ moveToEnd();
+ DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n");
+ break;
+ case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
+ Ordinal = ImmValue;
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: "
+ << "Ordinal=" << Ordinal << "\n");
+ break;
+ case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
+ Ordinal = readULEB128();
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: "
+ << "Ordinal=" << Ordinal << "\n");
+ break;
+ case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
+ if (ImmValue) {
+ SignExtended = MachO::BIND_OPCODE_MASK | ImmValue;
+ Ordinal = SignExtended;
+ } else
+ Ordinal = 0;
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: "
+ << "Ordinal=" << Ordinal << "\n");
+ break;
+ case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
+ Flags = ImmValue;
+ SymStart = Ptr;
+ while (*Ptr) {
+ ++Ptr;
+ }
+ SymbolName = StringRef(reinterpret_cast<const char*>(SymStart),
+ Ptr-SymStart);
+ ++Ptr;
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: "
+ << "SymbolName=" << SymbolName << "\n");
+ if (TableKind == Kind::Weak) {
+ if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
+ return;
+ }
+ break;
+ case MachO::BIND_OPCODE_SET_TYPE_IMM:
+ BindType = ImmValue;
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: "
+ << "BindType=" << (int)BindType << "\n");
+ break;
+ case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
+ Addend = readSLEB128();
+ if (TableKind == Kind::Lazy)
+ Malformed = true;
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: "
+ << "Addend=" << Addend << "\n");
+ break;
+ case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
+ SegmentIndex = ImmValue;
+ SegmentOffset = readULEB128();
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
+ << "SegmentIndex=" << SegmentIndex << ", "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << "\n");
+ break;
+ case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
+ SegmentOffset += readULEB128();
+ DEBUG_WITH_TYPE("mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
+ break;
+ case MachO::BIND_OPCODE_DO_BIND:
+ AdvanceAmount = PointerSize;
+ RemainingLoopCount = 0;
+ DEBUG_WITH_TYPE("mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_DO_BIND: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
+ return;
+ case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
+ AdvanceAmount = readULEB128() + PointerSize;
+ RemainingLoopCount = 0;
+ if (TableKind == Kind::Lazy)
+ Malformed = true;
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
+ return;
+ case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
+ AdvanceAmount = ImmValue * PointerSize + PointerSize;
+ RemainingLoopCount = 0;
+ if (TableKind == Kind::Lazy)
+ Malformed = true;
+ DEBUG_WITH_TYPE("mach-o-bind",
+ llvm::dbgs()
+ << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
+ return;
+ case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
+ RemainingLoopCount = readULEB128() - 1;
+ AdvanceAmount = readULEB128() + PointerSize;
+ if (TableKind == Kind::Lazy)
+ Malformed = true;
+ DEBUG_WITH_TYPE(
+ "mach-o-bind",
+ llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
+ return;
+ default:
+ Malformed = true;
+ }
+ }
+}
+
+uint64_t MachOBindEntry::readULEB128() {
+ unsigned Count;
+ uint64_t Result = decodeULEB128(Ptr, &Count);
+ Ptr += Count;
+ if (Ptr > Opcodes.end()) {
+ Ptr = Opcodes.end();
+ Malformed = true;
+ }
+ return Result;
+}
+
+int64_t MachOBindEntry::readSLEB128() {
+ unsigned Count;
+ int64_t Result = decodeSLEB128(Ptr, &Count);
+ Ptr += Count;
+ if (Ptr > Opcodes.end()) {
+ Ptr = Opcodes.end();
+ Malformed = true;
+ }
+ return Result;
+}
+
+
+uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; }
+
+uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; }
+
+StringRef MachOBindEntry::typeName() const {
+ switch (BindType) {
+ case MachO::BIND_TYPE_POINTER:
+ return "pointer";
+ case MachO::BIND_TYPE_TEXT_ABSOLUTE32:
+ return "text abs32";
+ case MachO::BIND_TYPE_TEXT_PCREL32:
+ return "text rel32";
+ }
+ return "unknown";
+}
+
+StringRef MachOBindEntry::symbolName() const { return SymbolName; }
+
+int64_t MachOBindEntry::addend() const { return Addend; }
+
+uint32_t MachOBindEntry::flags() const { return Flags; }
+
+int MachOBindEntry::ordinal() const { return Ordinal; }
+
+bool MachOBindEntry::operator==(const MachOBindEntry &Other) const {
+ assert(Opcodes == Other.Opcodes && "compare iterators of different files");
+ return (Ptr == Other.Ptr) &&
+ (RemainingLoopCount == Other.RemainingLoopCount) &&
+ (Done == Other.Done);
+}
+
+iterator_range<bind_iterator>
+MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64,
+ MachOBindEntry::Kind BKind) {
+ MachOBindEntry Start(Opcodes, is64, BKind);
+ Start.moveToFirst();
+
+ MachOBindEntry Finish(Opcodes, is64, BKind);
+ Finish.moveToEnd();
+
+ return iterator_range<bind_iterator>(bind_iterator(Start),
+ bind_iterator(Finish));
+}
+
+iterator_range<bind_iterator> MachOObjectFile::bindTable() const {
+ return bindTable(getDyldInfoBindOpcodes(), is64Bit(),
+ MachOBindEntry::Kind::Regular);
+}
+
+iterator_range<bind_iterator> MachOObjectFile::lazyBindTable() const {
+ return bindTable(getDyldInfoLazyBindOpcodes(), is64Bit(),
+ MachOBindEntry::Kind::Lazy);
+}
+
+iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const {
+ return bindTable(getDyldInfoWeakBindOpcodes(), is64Bit(),
+ MachOBindEntry::Kind::Weak);
+}
+
StringRef
MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
@@ -1668,14 +2116,14 @@ ArrayRef<char>
MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
const section_base *Base =
reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
- return ArrayRef<char>(Base->sectname);
+ return makeArrayRef(Base->sectname);
}
ArrayRef<char>
MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
const section_base *Base =
reinterpret_cast<const section_base *>(Sections[Sec.d.a]);
- return ArrayRef<char>(Base->segname);
+ return makeArrayRef(Base->segname);
}
bool
@@ -1710,6 +2158,11 @@ uint32_t MachOObjectFile::getScatteredRelocationValue(
return RE.r_word1;
}
+uint32_t MachOObjectFile::getScatteredRelocationType(
+ const MachO::any_relocation_info &RE) const {
+ return (RE.r_word0 >> 24) & 0xf;
+}
+
unsigned MachOObjectFile::getAnyRelocationAddress(
const MachO::any_relocation_info &RE) const {
if (isRelocationScattered(RE))
@@ -1831,6 +2284,31 @@ MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::dylib_command>(this, L.Ptr);
}
+MachO::dyld_info_command
+MachOObjectFile::getDyldInfoLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::dyld_info_command>(this, L.Ptr);
+}
+
+MachO::dylinker_command
+MachOObjectFile::getDylinkerCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::dylinker_command>(this, L.Ptr);
+}
+
+MachO::uuid_command
+MachOObjectFile::getUuidCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::uuid_command>(this, L.Ptr);
+}
+
+MachO::source_version_command
+MachOObjectFile::getSourceVersionCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::source_version_command>(this, L.Ptr);
+}
+
+MachO::entry_point_command
+MachOObjectFile::getEntryPointCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::entry_point_command>(this, L.Ptr);
+}
+
MachO::any_relocation_info
MachOObjectFile::getRelocation(DataRefImpl Rel) const {
@@ -1880,11 +2358,47 @@ MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
}
MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const {
- return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
+ if (SymtabLoadCmd)
+ return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
+
+ // If there is no SymtabLoadCmd return a load command with zero'ed fields.
+ MachO::symtab_command Cmd;
+ Cmd.cmd = MachO::LC_SYMTAB;
+ Cmd.cmdsize = sizeof(MachO::symtab_command);
+ Cmd.symoff = 0;
+ Cmd.nsyms = 0;
+ Cmd.stroff = 0;
+ Cmd.strsize = 0;
+ return Cmd;
}
MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const {
- return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
+ if (DysymtabLoadCmd)
+ return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
+
+ // If there is no DysymtabLoadCmd return a load command with zero'ed fields.
+ MachO::dysymtab_command Cmd;
+ Cmd.cmd = MachO::LC_DYSYMTAB;
+ Cmd.cmdsize = sizeof(MachO::dysymtab_command);
+ Cmd.ilocalsym = 0;
+ Cmd.nlocalsym = 0;
+ Cmd.iextdefsym = 0;
+ Cmd.nextdefsym = 0;
+ Cmd.iundefsym = 0;
+ Cmd.nundefsym = 0;
+ Cmd.tocoff = 0;
+ Cmd.ntoc = 0;
+ Cmd.modtaboff = 0;
+ Cmd.nmodtab = 0;
+ Cmd.extrefsymoff = 0;
+ Cmd.nextrefsyms = 0;
+ Cmd.indirectsymoff = 0;
+ Cmd.nindirectsyms = 0;
+ Cmd.extreloff = 0;
+ Cmd.nextrel = 0;
+ Cmd.locreloff = 0;
+ Cmd.nlocrel = 0;
+ return Cmd;
}
MachO::linkedit_data_command
@@ -1901,6 +2415,69 @@ MachOObjectFile::getDataInCodeLoadCommand() const {
return Cmd;
}
+ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
+ if (!DyldInfoLoadCmd)
+ return ArrayRef<uint8_t>();
+
+ MachO::dyld_info_command DyldInfo
+ = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
+ getPtr(this, DyldInfo.rebase_off));
+ return ArrayRef<uint8_t>(Ptr, DyldInfo.rebase_size);
+}
+
+ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
+ if (!DyldInfoLoadCmd)
+ return ArrayRef<uint8_t>();
+
+ MachO::dyld_info_command DyldInfo
+ = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
+ getPtr(this, DyldInfo.bind_off));
+ return ArrayRef<uint8_t>(Ptr, DyldInfo.bind_size);
+}
+
+ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
+ if (!DyldInfoLoadCmd)
+ return ArrayRef<uint8_t>();
+
+ MachO::dyld_info_command DyldInfo
+ = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
+ getPtr(this, DyldInfo.weak_bind_off));
+ return ArrayRef<uint8_t>(Ptr, DyldInfo.weak_bind_size);
+}
+
+ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
+ if (!DyldInfoLoadCmd)
+ return ArrayRef<uint8_t>();
+
+ MachO::dyld_info_command DyldInfo
+ = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
+ getPtr(this, DyldInfo.lazy_bind_off));
+ return ArrayRef<uint8_t>(Ptr, DyldInfo.lazy_bind_size);
+}
+
+ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
+ if (!DyldInfoLoadCmd)
+ return ArrayRef<uint8_t>();
+
+ MachO::dyld_info_command DyldInfo
+ = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
+ getPtr(this, DyldInfo.export_off));
+ return ArrayRef<uint8_t>(Ptr, DyldInfo.export_size);
+}
+
+ArrayRef<uint8_t> MachOObjectFile::getUuid() const {
+ if (!UuidLoadCmd)
+ return ArrayRef<uint8_t>();
+ // Returning a pointer is fine as uuid doesn't need endian swapping.
+ const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid);
+ return ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), 16);
+}
+
StringRef MachOObjectFile::getStringTableData() const {
MachO::symtab_command S = getSymtabLoadCommand();
return getData().substr(S.stroff, S.strsize);
@@ -1908,7 +2485,7 @@ StringRef MachOObjectFile::getStringTableData() const {
bool MachOObjectFile::is64Bit() const {
return getType() == getMachOType(false, true) ||
- getType() == getMachOType(true, true);
+ getType() == getMachOType(true, true);
}
void MachOObjectFile::ReadULEB128s(uint64_t Index,
@@ -1923,30 +2500,28 @@ void MachOObjectFile::ReadULEB128s(uint64_t Index,
}
}
-const char *MachOObjectFile::getSectionPointer(DataRefImpl Rel) const {
- return Sections[Rel.d.a];
+bool MachOObjectFile::isRelocatableObject() const {
+ return getHeader().filetype == MachO::MH_OBJECT;
}
-ErrorOr<ObjectFile *>
-ObjectFile::createMachOObjectFile(std::unique_ptr<MemoryBuffer> &Buffer) {
- StringRef Magic = Buffer->getBuffer().slice(0, 4);
+ErrorOr<std::unique_ptr<MachOObjectFile>>
+ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer) {
+ StringRef Magic = Buffer.getBuffer().slice(0, 4);
std::error_code EC;
std::unique_ptr<MachOObjectFile> Ret;
if (Magic == "\xFE\xED\xFA\xCE")
- Ret.reset(new MachOObjectFile(std::move(Buffer), false, false, EC));
+ Ret.reset(new MachOObjectFile(Buffer, false, false, EC));
else if (Magic == "\xCE\xFA\xED\xFE")
- Ret.reset(new MachOObjectFile(std::move(Buffer), true, false, EC));
+ Ret.reset(new MachOObjectFile(Buffer, true, false, EC));
else if (Magic == "\xFE\xED\xFA\xCF")
- Ret.reset(new MachOObjectFile(std::move(Buffer), false, true, EC));
+ Ret.reset(new MachOObjectFile(Buffer, false, true, EC));
else if (Magic == "\xCF\xFA\xED\xFE")
- Ret.reset(new MachOObjectFile(std::move(Buffer), true, true, EC));
+ Ret.reset(new MachOObjectFile(Buffer, true, true, EC));
else
return object_error::parse_failed;
if (EC)
return EC;
- return Ret.release();
+ return std::move(Ret);
}
-} // end namespace object
-} // end namespace llvm
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 4ba5d96..77aeb63 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -67,14 +67,13 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch(
}
}
-ErrorOr<std::unique_ptr<ObjectFile>>
+ErrorOr<std::unique_ptr<MachOObjectFile>>
MachOUniversalBinary::ObjectForArch::getAsObjectFile() const {
if (Parent) {
StringRef ParentData = Parent->getData();
StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
- std::string ObjectName = Parent->getFileName().str();
- std::unique_ptr<MemoryBuffer> ObjBuffer(
- MemoryBuffer::getMemBuffer(ObjectData, ObjectName, false));
+ StringRef ObjectName = Parent->getFileName();
+ MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
return ObjectFile::createMachOObjectFile(ObjBuffer);
}
return object_error::parse_failed;
@@ -85,13 +84,12 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
if (Parent) {
StringRef ParentData = Parent->getData();
StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
- std::string ObjectName = Parent->getFileName().str();
- std::unique_ptr<MemoryBuffer> ObjBuffer(
- MemoryBuffer::getMemBuffer(ObjectData, ObjectName, false));
- ErrorOr<Archive *> Obj = Archive::create(std::move(ObjBuffer));
+ StringRef ObjectName = Parent->getFileName();
+ MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
+ ErrorOr<std::unique_ptr<Archive>> Obj = Archive::create(ObjBuffer);
if (std::error_code EC = Obj.getError())
return EC;
- Result.reset(Obj.get());
+ Result = std::move(Obj.get());
return object_error::success;
}
return object_error::parse_failed;
@@ -99,21 +97,20 @@ std::error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
void MachOUniversalBinary::anchor() { }
-ErrorOr<MachOUniversalBinary *>
-MachOUniversalBinary::create(std::unique_ptr<MemoryBuffer> Source) {
+ErrorOr<std::unique_ptr<MachOUniversalBinary>>
+MachOUniversalBinary::create(MemoryBufferRef Source) {
std::error_code EC;
std::unique_ptr<MachOUniversalBinary> Ret(
- new MachOUniversalBinary(std::move(Source), EC));
+ new MachOUniversalBinary(Source, EC));
if (EC)
return EC;
- return Ret.release();
+ return std::move(Ret);
}
-MachOUniversalBinary::MachOUniversalBinary(std::unique_ptr<MemoryBuffer> Source,
+MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source,
std::error_code &ec)
- : Binary(Binary::ID_MachOUniversalBinary, std::move(Source)),
- NumberOfObjects(0) {
- if (Data->getBufferSize() < sizeof(MachO::fat_header)) {
+ : Binary(Binary::ID_MachOUniversalBinary, Source), NumberOfObjects(0) {
+ if (Data.getBufferSize() < sizeof(MachO::fat_header)) {
ec = object_error::invalid_file_type;
return;
}
@@ -142,7 +139,7 @@ static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) {
}
}
-ErrorOr<std::unique_ptr<ObjectFile>>
+ErrorOr<std::unique_ptr<MachOObjectFile>>
MachOUniversalBinary::getObjectForArch(Triple::ArchType Arch) const {
MachO::CPUType CTM;
if (!getCTMForArch(Arch, CTM))
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 567d87f..84a5df0 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -19,12 +19,13 @@
using namespace llvm;
using namespace object;
-inline ObjectFile *unwrap(LLVMObjectFileRef OF) {
- return reinterpret_cast<ObjectFile*>(OF);
+inline OwningBinary<ObjectFile> *unwrap(LLVMObjectFileRef OF) {
+ return reinterpret_cast<OwningBinary<ObjectFile> *>(OF);
}
-inline LLVMObjectFileRef wrap(const ObjectFile *OF) {
- return reinterpret_cast<LLVMObjectFileRef>(const_cast<ObjectFile*>(OF));
+inline LLVMObjectFileRef wrap(const OwningBinary<ObjectFile> *OF) {
+ return reinterpret_cast<LLVMObjectFileRef>(
+ const_cast<OwningBinary<ObjectFile> *>(OF));
}
inline section_iterator *unwrap(LLVMSectionIteratorRef SI) {
@@ -60,10 +61,14 @@ wrap(const relocation_iterator *SI) {
// ObjectFile creation
LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf));
- ErrorOr<ObjectFile *> ObjOrErr(ObjectFile::createObjectFile(Buf));
- Buf.release();
- ObjectFile *Obj = ObjOrErr ? ObjOrErr.get() : nullptr;
- return wrap(Obj);
+ ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr(
+ ObjectFile::createObjectFile(Buf->getMemBufferRef()));
+ std::unique_ptr<ObjectFile> Obj;
+ if (!ObjOrErr)
+ return nullptr;
+
+ auto *Ret = new OwningBinary<ObjectFile>(std::move(ObjOrErr.get()), std::move(Buf));
+ return wrap(Ret);
}
void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
@@ -71,8 +76,9 @@ void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
}
// ObjectFile Section iterators
-LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
- section_iterator SI = unwrap(ObjectFile)->section_begin();
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef OF) {
+ OwningBinary<ObjectFile> *OB = unwrap(OF);
+ section_iterator SI = OB->getBinary()->section_begin();
return wrap(new section_iterator(SI));
}
@@ -80,9 +86,10 @@ void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) {
delete unwrap(SI);
}
-LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
- LLVMSectionIteratorRef SI) {
- return (*unwrap(SI) == unwrap(ObjectFile)->section_end()) ? 1 : 0;
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef OF,
+ LLVMSectionIteratorRef SI) {
+ OwningBinary<ObjectFile> *OB = unwrap(OF);
+ return (*unwrap(SI) == OB->getBinary()->section_end()) ? 1 : 0;
}
void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
@@ -96,8 +103,9 @@ void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
}
// ObjectFile Symbol iterators
-LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile) {
- symbol_iterator SI = unwrap(ObjectFile)->symbol_begin();
+LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef OF) {
+ OwningBinary<ObjectFile> *OB = unwrap(OF);
+ symbol_iterator SI = OB->getBinary()->symbol_begin();
return wrap(new symbol_iterator(SI));
}
@@ -105,9 +113,10 @@ void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) {
delete unwrap(SI);
}
-LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
- LLVMSymbolIteratorRef SI) {
- return (*unwrap(SI) == unwrap(ObjectFile)->symbol_end()) ? 1 : 0;
+LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef OF,
+ LLVMSymbolIteratorRef SI) {
+ OwningBinary<ObjectFile> *OB = unwrap(OF);
+ return (*unwrap(SI) == OB->getBinary()->symbol_end()) ? 1 : 0;
}
void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) {
@@ -123,10 +132,7 @@ const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
}
uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
- uint64_t ret;
- if (std::error_code ec = (*unwrap(SI))->getSize(ret))
- report_fatal_error(ec.message());
- return ret;
+ return (*unwrap(SI))->getSize();
}
const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
@@ -137,18 +143,12 @@ const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
}
uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
- uint64_t ret;
- if (std::error_code ec = (*unwrap(SI))->getAddress(ret))
- report_fatal_error(ec.message());
- return ret;
+ return (*unwrap(SI))->getAddress();
}
LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI,
LLVMSymbolIteratorRef Sym) {
- bool ret;
- if (std::error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret))
- report_fatal_error(ec.message());
- return ret;
+ return (*unwrap(SI))->containsSymbol(**unwrap(Sym));
}
// Section Relocation iterators
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index f5488c6..fd78271 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -23,8 +25,8 @@ using namespace object;
void ObjectFile::anchor() { }
-ObjectFile::ObjectFile(unsigned int Type, std::unique_ptr<MemoryBuffer> Source)
- : SymbolicFile(Type, std::move(Source)) {}
+ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source)
+ : SymbolicFile(Type, Source) {}
std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
DataRefImpl Symb) const {
@@ -45,11 +47,11 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
return section_iterator(SectionRef(Sec, this));
}
-ErrorOr<ObjectFile *>
-ObjectFile::createObjectFile(std::unique_ptr<MemoryBuffer> &Object,
- sys::fs::file_magic Type) {
+ErrorOr<std::unique_ptr<ObjectFile>>
+ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) {
+ StringRef Data = Object.getBuffer();
if (Type == sys::fs::file_magic::unknown)
- Type = sys::fs::identify_magic(Object->getBuffer());
+ Type = sys::fs::identify_magic(Data);
switch (Type) {
case sys::fs::file_magic::unknown:
@@ -58,6 +60,7 @@ ObjectFile::createObjectFile(std::unique_ptr<MemoryBuffer> &Object,
case sys::fs::file_magic::macho_universal_binary:
case sys::fs::file_magic::windows_resource:
return object_error::invalid_file_type;
+ case sys::fs::file_magic::elf:
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::elf_executable:
case sys::fs::file_magic::elf_shared_object:
@@ -77,15 +80,24 @@ ObjectFile::createObjectFile(std::unique_ptr<MemoryBuffer> &Object,
case sys::fs::file_magic::coff_object:
case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
- return createCOFFObjectFile(std::move(Object));
+ return createCOFFObjectFile(Object);
}
llvm_unreachable("Unexpected Object File Type");
}
-ErrorOr<ObjectFile *> ObjectFile::createObjectFile(StringRef ObjectPath) {
+ErrorOr<OwningBinary<ObjectFile>>
+ObjectFile::createObjectFile(StringRef ObjectPath) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFile(ObjectPath);
if (std::error_code EC = FileOrErr.getError())
return EC;
- return createObjectFile(FileOrErr.get());
+ std::unique_ptr<MemoryBuffer> Buffer = std::move(FileOrErr.get());
+
+ ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
+ createObjectFile(Buffer->getMemBufferRef());
+ if (std::error_code EC = ObjOrErr.getError())
+ return EC;
+ std::unique_ptr<ObjectFile> Obj = std::move(ObjOrErr.get());
+
+ return OwningBinary<ObjectFile>(std::move(Obj), std::move(Buffer));
}
diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h
index 10e70ef..7dacbdf 100644
--- a/lib/Object/RecordStreamer.h
+++ b/lib/Object/RecordStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_OBJECT_RECORD_STREAMER
-#define LLVM_OBJECT_RECORD_STREAMER
+#ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H
+#define LLVM_LIB_OBJECT_RECORDSTREAMER_H
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp
index 30cf1a0..ffd3dbc 100644
--- a/lib/Object/SymbolicFile.cpp
+++ b/lib/Object/SymbolicFile.cpp
@@ -19,34 +19,31 @@
using namespace llvm;
using namespace object;
-SymbolicFile::SymbolicFile(unsigned int Type,
- std::unique_ptr<MemoryBuffer> Source)
- : Binary(Type, std::move(Source)) {}
+SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source)
+ : Binary(Type, Source) {}
SymbolicFile::~SymbolicFile() {}
-ErrorOr<SymbolicFile *>
-SymbolicFile::createSymbolicFile(std::unique_ptr<MemoryBuffer> &Object,
- sys::fs::file_magic Type,
- LLVMContext *Context) {
+ErrorOr<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
+ MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) {
+ StringRef Data = Object.getBuffer();
if (Type == sys::fs::file_magic::unknown)
- Type = sys::fs::identify_magic(Object->getBuffer());
+ Type = sys::fs::identify_magic(Data);
switch (Type) {
case sys::fs::file_magic::bitcode:
if (Context)
- return IRObjectFile::createIRObjectFile(std::move(Object), *Context);
+ return IRObjectFile::createIRObjectFile(Object, *Context);
// Fallthrough
case sys::fs::file_magic::unknown:
case sys::fs::file_magic::archive:
case sys::fs::file_magic::macho_universal_binary:
case sys::fs::file_magic::windows_resource:
return object_error::invalid_file_type;
- case sys::fs::file_magic::elf_relocatable:
+ case sys::fs::file_magic::elf:
case sys::fs::file_magic::elf_executable:
case sys::fs::file_magic::elf_shared_object:
case sys::fs::file_magic::elf_core:
- case sys::fs::file_magic::macho_object:
case sys::fs::file_magic::macho_executable:
case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib:
case sys::fs::file_magic::macho_core:
@@ -56,10 +53,26 @@ SymbolicFile::createSymbolicFile(std::unique_ptr<MemoryBuffer> &Object,
case sys::fs::file_magic::macho_bundle:
case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
case sys::fs::file_magic::macho_dsym_companion:
- case sys::fs::file_magic::coff_object:
case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
return ObjectFile::createObjectFile(Object, Type);
+ case sys::fs::file_magic::elf_relocatable:
+ case sys::fs::file_magic::macho_object:
+ case sys::fs::file_magic::coff_object: {
+ ErrorOr<std::unique_ptr<ObjectFile>> Obj =
+ ObjectFile::createObjectFile(Object, Type);
+ if (!Obj || !Context)
+ return std::move(Obj);
+
+ ErrorOr<MemoryBufferRef> BCData =
+ IRObjectFile::findBitcodeInObject(*Obj->get());
+ if (!BCData)
+ return std::move(Obj);
+
+ return IRObjectFile::createIRObjectFile(
+ MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()),
+ *Context);
+ }
}
llvm_unreachable("Unexpected Binary File Type");
}
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index 5848bb1..041e552 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -54,6 +54,15 @@ Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
return nullptr;
}
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1) const {
+ // FIXME: Make search efficient?
+ for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+ if ((*it)->getOption().matches(Id0) ||
+ (*it)->getOption().matches(Id1))
+ return *it;
+ return nullptr;
+}
+
Arg *ArgList::getLastArg(OptSpecifier Id) const {
Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index 6842f4d..dca02c1 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -264,6 +264,11 @@ InputArgList *OptTable::ParseArgs(const char *const *ArgBegin,
MissingArgIndex = MissingArgCount = 0;
unsigned Index = 0, End = ArgEnd - ArgBegin;
while (Index < End) {
+ // Ingore nullptrs, they are response file's EOL markers
+ if (Args->getArgString(Index) == nullptr) {
+ ++Index;
+ continue;
+ }
// Ignore empty arguments (other things may still take them as arguments).
StringRef Str = Args->getArgString(Index);
if (Str == "") {
@@ -300,7 +305,18 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
llvm_unreachable("Invalid option with help text.");
case Option::MultiArgClass:
- llvm_unreachable("Cannot print metavar for this kind of option.");
+ if (const char *MetaVarName = Opts.getOptionMetaVar(Id)) {
+ // For MultiArgs, metavar is full list of all argument names.
+ Name += ' ';
+ Name += MetaVarName;
+ }
+ else {
+ // For MultiArgs<N>, if metavar not supplied, print <value> N times.
+ for (unsigned i=0, e=O.getNumArgs(); i< e; ++i) {
+ Name += " <value>";
+ }
+ }
+ break;
case Option::FlagClass:
break;
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index 10662a3..cdc63c3 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -169,7 +169,8 @@ Arg *Option::accept(const ArgList &Args,
return nullptr;
Index += 2;
- if (Index > Args.getNumInputArgStrings())
+ if (Index > Args.getNumInputArgStrings() ||
+ Args.getArgString(Index - 1) == nullptr)
return nullptr;
return new Arg(UnaliasedOption, Spelling,
@@ -200,7 +201,8 @@ Arg *Option::accept(const ArgList &Args,
// Otherwise it must be separate.
Index += 2;
- if (Index > Args.getNumInputArgStrings())
+ if (Index > Args.getNumInputArgStrings() ||
+ Args.getArgString(Index - 1) == nullptr)
return nullptr;
return new Arg(UnaliasedOption, Spelling,
@@ -209,7 +211,8 @@ Arg *Option::accept(const ArgList &Args,
case JoinedAndSeparateClass:
// Always matches.
Index += 2;
- if (Index > Args.getNumInputArgStrings())
+ if (Index > Args.getNumInputArgStrings() ||
+ Args.getArgString(Index - 1) == nullptr)
return nullptr;
return new Arg(UnaliasedOption, Spelling, Index - 2,
@@ -221,7 +224,8 @@ Arg *Option::accept(const ArgList &Args,
if (ArgSize != strlen(Args.getArgString(Index)))
return nullptr;
Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
- while (Index < Args.getNumInputArgStrings())
+ while (Index < Args.getNumInputArgStrings() &&
+ Args.getArgString(Index) != nullptr)
A->getValues().push_back(Args.getArgString(Index++));
return A;
}
diff --git a/lib/ProfileData/Android.mk b/lib/ProfileData/Android.mk
index f4b3fa9..1e1d5f2 100644
--- a/lib/ProfileData/Android.mk
+++ b/lib/ProfileData/Android.mk
@@ -1,9 +1,15 @@
LOCAL_PATH:= $(call my-dir)
profiledata_SRC_FILES := \
- InstrProf.cpp \
- InstrProfReader.cpp \
- InstrProfWriter.cpp \
+ CoverageMapping.cpp \
+ CoverageMappingReader.cpp \
+ CoverageMappingWriter.cpp \
+ InstrProf.cpp \
+ InstrProfReader.cpp \
+ InstrProfWriter.cpp \
+ SampleProf.cpp \
+ SampleProfReader.cpp \
+ SampleProfWriter.cpp
# For the host
# =====================================================
diff --git a/lib/ProfileData/CMakeLists.txt b/lib/ProfileData/CMakeLists.txt
index aefb16c..b9d472d 100644
--- a/lib/ProfileData/CMakeLists.txt
+++ b/lib/ProfileData/CMakeLists.txt
@@ -2,4 +2,10 @@ add_llvm_library(LLVMProfileData
InstrProf.cpp
InstrProfReader.cpp
InstrProfWriter.cpp
+ CoverageMapping.cpp
+ CoverageMappingWriter.cpp
+ CoverageMappingReader.cpp
+ SampleProf.cpp
+ SampleProfReader.cpp
+ SampleProfWriter.cpp
)
diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp
new file mode 100644
index 0000000..0ccebc2
--- /dev/null
+++ b/lib/ProfileData/CoverageMapping.cpp
@@ -0,0 +1,475 @@
+//=-- CoverageMapping.cpp - Code coverage mapping support ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for clang's and llvm's instrumentation based
+// code coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/CoverageMapping.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ProfileData/CoverageMappingReader.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace coverage;
+
+#define DEBUG_TYPE "coverage-mapping"
+
+Counter CounterExpressionBuilder::get(const CounterExpression &E) {
+ auto It = ExpressionIndices.find(E);
+ if (It != ExpressionIndices.end())
+ return Counter::getExpression(It->second);
+ unsigned I = Expressions.size();
+ Expressions.push_back(E);
+ ExpressionIndices[E] = I;
+ return Counter::getExpression(I);
+}
+
+void CounterExpressionBuilder::extractTerms(
+ Counter C, int Sign, SmallVectorImpl<std::pair<unsigned, int>> &Terms) {
+ switch (C.getKind()) {
+ case Counter::Zero:
+ break;
+ case Counter::CounterValueReference:
+ Terms.push_back(std::make_pair(C.getCounterID(), Sign));
+ break;
+ case Counter::Expression:
+ const auto &E = Expressions[C.getExpressionID()];
+ extractTerms(E.LHS, Sign, Terms);
+ extractTerms(E.RHS, E.Kind == CounterExpression::Subtract ? -Sign : Sign,
+ Terms);
+ break;
+ }
+}
+
+Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
+ // Gather constant terms.
+ llvm::SmallVector<std::pair<unsigned, int>, 32> Terms;
+ extractTerms(ExpressionTree, +1, Terms);
+
+ // If there are no terms, this is just a zero. The algorithm below assumes at
+ // least one term.
+ if (Terms.size() == 0)
+ return Counter::getZero();
+
+ // Group the terms by counter ID.
+ std::sort(Terms.begin(), Terms.end(),
+ [](const std::pair<unsigned, int> &LHS,
+ const std::pair<unsigned, int> &RHS) {
+ return LHS.first < RHS.first;
+ });
+
+ // Combine terms by counter ID to eliminate counters that sum to zero.
+ auto Prev = Terms.begin();
+ for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) {
+ if (I->first == Prev->first) {
+ Prev->second += I->second;
+ continue;
+ }
+ ++Prev;
+ *Prev = *I;
+ }
+ Terms.erase(++Prev, Terms.end());
+
+ Counter C;
+ // Create additions. We do this before subtractions to avoid constructs like
+ // ((0 - X) + Y), as opposed to (Y - X).
+ for (auto Term : Terms) {
+ if (Term.second <= 0)
+ continue;
+ for (int I = 0; I < Term.second; ++I)
+ if (C.isZero())
+ C = Counter::getCounter(Term.first);
+ else
+ C = get(CounterExpression(CounterExpression::Add, C,
+ Counter::getCounter(Term.first)));
+ }
+
+ // Create subtractions.
+ for (auto Term : Terms) {
+ if (Term.second >= 0)
+ continue;
+ for (int I = 0; I < -Term.second; ++I)
+ C = get(CounterExpression(CounterExpression::Subtract, C,
+ Counter::getCounter(Term.first)));
+ }
+ return C;
+}
+
+Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) {
+ return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS)));
+}
+
+Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) {
+ return simplify(
+ get(CounterExpression(CounterExpression::Subtract, LHS, RHS)));
+}
+
+void CounterMappingContext::dump(const Counter &C,
+ llvm::raw_ostream &OS) const {
+ switch (C.getKind()) {
+ case Counter::Zero:
+ OS << '0';
+ return;
+ case Counter::CounterValueReference:
+ OS << '#' << C.getCounterID();
+ break;
+ case Counter::Expression: {
+ if (C.getExpressionID() >= Expressions.size())
+ return;
+ const auto &E = Expressions[C.getExpressionID()];
+ OS << '(';
+ dump(E.LHS, OS);
+ OS << (E.Kind == CounterExpression::Subtract ? " - " : " + ");
+ dump(E.RHS, OS);
+ OS << ')';
+ break;
+ }
+ }
+ if (CounterValues.empty())
+ return;
+ ErrorOr<int64_t> Value = evaluate(C);
+ if (!Value)
+ return;
+ OS << '[' << *Value << ']';
+}
+
+ErrorOr<int64_t> CounterMappingContext::evaluate(const Counter &C) const {
+ switch (C.getKind()) {
+ case Counter::Zero:
+ return 0;
+ case Counter::CounterValueReference:
+ if (C.getCounterID() >= CounterValues.size())
+ return std::make_error_code(std::errc::argument_out_of_domain);
+ return CounterValues[C.getCounterID()];
+ case Counter::Expression: {
+ if (C.getExpressionID() >= Expressions.size())
+ return std::make_error_code(std::errc::argument_out_of_domain);
+ const auto &E = Expressions[C.getExpressionID()];
+ ErrorOr<int64_t> LHS = evaluate(E.LHS);
+ if (!LHS)
+ return LHS;
+ ErrorOr<int64_t> RHS = evaluate(E.RHS);
+ if (!RHS)
+ return RHS;
+ return E.Kind == CounterExpression::Subtract ? *LHS - *RHS : *LHS + *RHS;
+ }
+ }
+ llvm_unreachable("Unhandled CounterKind");
+}
+
+void FunctionRecordIterator::skipOtherFiles() {
+ while (Current != Records.end() && !Filename.empty() &&
+ Filename != Current->Filenames[0])
+ ++Current;
+ if (Current == Records.end())
+ *this = FunctionRecordIterator();
+}
+
+ErrorOr<std::unique_ptr<CoverageMapping>>
+CoverageMapping::load(ObjectFileCoverageMappingReader &CoverageReader,
+ IndexedInstrProfReader &ProfileReader) {
+ auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
+
+ std::vector<uint64_t> Counts;
+ for (const auto &Record : CoverageReader) {
+ Counts.clear();
+ if (std::error_code EC = ProfileReader.getFunctionCounts(
+ Record.FunctionName, Record.FunctionHash, Counts)) {
+ if (EC != instrprof_error::hash_mismatch &&
+ EC != instrprof_error::unknown_function)
+ return EC;
+ Coverage->MismatchedFunctionCount++;
+ continue;
+ }
+
+ assert(Counts.size() != 0 && "Function's counts are empty");
+ FunctionRecord Function(Record.FunctionName, Record.Filenames,
+ Counts.front());
+ CounterMappingContext Ctx(Record.Expressions, Counts);
+ for (const auto &Region : Record.MappingRegions) {
+ ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
+ if (!ExecutionCount)
+ break;
+ Function.CountedRegions.push_back(CountedRegion(Region, *ExecutionCount));
+ }
+ if (Function.CountedRegions.size() != Record.MappingRegions.size()) {
+ Coverage->MismatchedFunctionCount++;
+ continue;
+ }
+
+ Coverage->Functions.push_back(std::move(Function));
+ }
+
+ return std::move(Coverage);
+}
+
+ErrorOr<std::unique_ptr<CoverageMapping>>
+CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename) {
+ auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename);
+ if (auto EC = CounterMappingBuff.getError())
+ return EC;
+ ObjectFileCoverageMappingReader CoverageReader(CounterMappingBuff.get());
+ if (auto EC = CoverageReader.readHeader())
+ return EC;
+ std::unique_ptr<IndexedInstrProfReader> ProfileReader;
+ if (auto EC = IndexedInstrProfReader::create(ProfileFilename, ProfileReader))
+ return EC;
+ return load(CoverageReader, *ProfileReader);
+}
+
+namespace {
+/// \brief Distributes functions into instantiation sets.
+///
+/// An instantiation set is a collection of functions that have the same source
+/// code, ie, template functions specializations.
+class FunctionInstantiationSetCollector {
+ typedef DenseMap<std::pair<unsigned, unsigned>,
+ std::vector<const FunctionRecord *>> MapT;
+ MapT InstantiatedFunctions;
+
+public:
+ void insert(const FunctionRecord &Function, unsigned FileID) {
+ auto I = Function.CountedRegions.begin(), E = Function.CountedRegions.end();
+ while (I != E && I->FileID != FileID)
+ ++I;
+ assert(I != E && "function does not cover the given file");
+ auto &Functions = InstantiatedFunctions[I->startLoc()];
+ Functions.push_back(&Function);
+ }
+
+ MapT::iterator begin() { return InstantiatedFunctions.begin(); }
+
+ MapT::iterator end() { return InstantiatedFunctions.end(); }
+};
+
+class SegmentBuilder {
+ std::vector<CoverageSegment> Segments;
+ SmallVector<const CountedRegion *, 8> ActiveRegions;
+
+ /// Start a segment with no count specified.
+ void startSegment(unsigned Line, unsigned Col) {
+ DEBUG(dbgs() << "Top level segment at " << Line << ":" << Col << "\n");
+ Segments.emplace_back(Line, Col, /*IsRegionEntry=*/false);
+ }
+
+ /// Start a segment with the given Region's count.
+ void startSegment(unsigned Line, unsigned Col, bool IsRegionEntry,
+ const CountedRegion &Region) {
+ if (Segments.empty())
+ Segments.emplace_back(Line, Col, IsRegionEntry);
+ CoverageSegment S = Segments.back();
+ // Avoid creating empty regions.
+ if (S.Line != Line || S.Col != Col) {
+ Segments.emplace_back(Line, Col, IsRegionEntry);
+ S = Segments.back();
+ }
+ DEBUG(dbgs() << "Segment at " << Line << ":" << Col);
+ // Set this region's count.
+ if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion) {
+ DEBUG(dbgs() << " with count " << Region.ExecutionCount);
+ Segments.back().setCount(Region.ExecutionCount);
+ }
+ DEBUG(dbgs() << "\n");
+ }
+
+ /// Start a segment for the given region.
+ void startSegment(const CountedRegion &Region) {
+ startSegment(Region.LineStart, Region.ColumnStart, true, Region);
+ }
+
+ /// Pop the top region off of the active stack, starting a new segment with
+ /// the containing Region's count.
+ void popRegion() {
+ const CountedRegion *Active = ActiveRegions.back();
+ unsigned Line = Active->LineEnd, Col = Active->ColumnEnd;
+ ActiveRegions.pop_back();
+ if (ActiveRegions.empty())
+ startSegment(Line, Col);
+ else
+ startSegment(Line, Col, false, *ActiveRegions.back());
+ }
+
+public:
+ /// Build a list of CoverageSegments from a sorted list of Regions.
+ std::vector<CoverageSegment> buildSegments(ArrayRef<CountedRegion> Regions) {
+ for (const auto &Region : Regions) {
+ // Pop any regions that end before this one starts.
+ while (!ActiveRegions.empty() &&
+ ActiveRegions.back()->endLoc() <= Region.startLoc())
+ popRegion();
+ if (Segments.size() && Segments.back().Line == Region.LineStart &&
+ Segments.back().Col == Region.ColumnStart) {
+ if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion)
+ Segments.back().addCount(Region.ExecutionCount);
+ } else {
+ // Add this region to the stack.
+ ActiveRegions.push_back(&Region);
+ startSegment(Region);
+ }
+ }
+ // Pop any regions that are left in the stack.
+ while (!ActiveRegions.empty())
+ popRegion();
+ return Segments;
+ }
+};
+}
+
+std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
+ std::vector<StringRef> Filenames;
+ for (const auto &Function : getCoveredFunctions())
+ for (const auto &Filename : Function.Filenames)
+ Filenames.push_back(Filename);
+ std::sort(Filenames.begin(), Filenames.end());
+ auto Last = std::unique(Filenames.begin(), Filenames.end());
+ Filenames.erase(Last, Filenames.end());
+ return Filenames;
+}
+
+static Optional<unsigned> findMainViewFileID(StringRef SourceFile,
+ const FunctionRecord &Function) {
+ llvm::SmallVector<bool, 8> IsExpandedFile(Function.Filenames.size(), false);
+ llvm::SmallVector<bool, 8> FilenameEquivalence(Function.Filenames.size(),
+ false);
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (SourceFile == Function.Filenames[I])
+ FilenameEquivalence[I] = true;
+ for (const auto &CR : Function.CountedRegions)
+ if (CR.Kind == CounterMappingRegion::ExpansionRegion &&
+ FilenameEquivalence[CR.FileID])
+ IsExpandedFile[CR.ExpandedFileID] = true;
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (FilenameEquivalence[I] && !IsExpandedFile[I])
+ return I;
+ return None;
+}
+
+static Optional<unsigned> findMainViewFileID(const FunctionRecord &Function) {
+ llvm::SmallVector<bool, 8> IsExpandedFile(Function.Filenames.size(), false);
+ for (const auto &CR : Function.CountedRegions)
+ if (CR.Kind == CounterMappingRegion::ExpansionRegion)
+ IsExpandedFile[CR.ExpandedFileID] = true;
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (!IsExpandedFile[I])
+ return I;
+ return None;
+}
+
+static SmallSet<unsigned, 8> gatherFileIDs(StringRef SourceFile,
+ const FunctionRecord &Function) {
+ SmallSet<unsigned, 8> IDs;
+ for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I)
+ if (SourceFile == Function.Filenames[I])
+ IDs.insert(I);
+ return IDs;
+}
+
+/// Sort a nested sequence of regions from a single file.
+template <class It> static void sortNestedRegions(It First, It Last) {
+ std::sort(First, Last,
+ [](const CountedRegion &LHS, const CountedRegion &RHS) {
+ if (LHS.startLoc() == RHS.startLoc())
+ // When LHS completely contains RHS, we sort LHS first.
+ return RHS.endLoc() < LHS.endLoc();
+ return LHS.startLoc() < RHS.startLoc();
+ });
+}
+
+static bool isExpansion(const CountedRegion &R, unsigned FileID) {
+ return R.Kind == CounterMappingRegion::ExpansionRegion && R.FileID == FileID;
+}
+
+CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) {
+ CoverageData FileCoverage(Filename);
+ std::vector<coverage::CountedRegion> Regions;
+
+ for (const auto &Function : Functions) {
+ auto MainFileID = findMainViewFileID(Filename, Function);
+ if (!MainFileID)
+ continue;
+ auto FileIDs = gatherFileIDs(Filename, Function);
+ for (const auto &CR : Function.CountedRegions)
+ if (FileIDs.count(CR.FileID)) {
+ Regions.push_back(CR);
+ if (isExpansion(CR, *MainFileID))
+ FileCoverage.Expansions.emplace_back(CR, Function);
+ }
+ }
+
+ sortNestedRegions(Regions.begin(), Regions.end());
+ FileCoverage.Segments = SegmentBuilder().buildSegments(Regions);
+
+ return FileCoverage;
+}
+
+std::vector<const FunctionRecord *>
+CoverageMapping::getInstantiations(StringRef Filename) {
+ FunctionInstantiationSetCollector InstantiationSetCollector;
+ for (const auto &Function : Functions) {
+ auto MainFileID = findMainViewFileID(Filename, Function);
+ if (!MainFileID)
+ continue;
+ InstantiationSetCollector.insert(Function, *MainFileID);
+ }
+
+ std::vector<const FunctionRecord *> Result;
+ for (const auto &InstantiationSet : InstantiationSetCollector) {
+ if (InstantiationSet.second.size() < 2)
+ continue;
+ for (auto Function : InstantiationSet.second)
+ Result.push_back(Function);
+ }
+ return Result;
+}
+
+CoverageData
+CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) {
+ auto MainFileID = findMainViewFileID(Function);
+ if (!MainFileID)
+ return CoverageData();
+
+ CoverageData FunctionCoverage(Function.Filenames[*MainFileID]);
+ std::vector<coverage::CountedRegion> Regions;
+ for (const auto &CR : Function.CountedRegions)
+ if (CR.FileID == *MainFileID) {
+ Regions.push_back(CR);
+ if (isExpansion(CR, *MainFileID))
+ FunctionCoverage.Expansions.emplace_back(CR, Function);
+ }
+
+ sortNestedRegions(Regions.begin(), Regions.end());
+ FunctionCoverage.Segments = SegmentBuilder().buildSegments(Regions);
+
+ return FunctionCoverage;
+}
+
+CoverageData
+CoverageMapping::getCoverageForExpansion(const ExpansionRecord &Expansion) {
+ CoverageData ExpansionCoverage(
+ Expansion.Function.Filenames[Expansion.FileID]);
+ std::vector<coverage::CountedRegion> Regions;
+ for (const auto &CR : Expansion.Function.CountedRegions)
+ if (CR.FileID == Expansion.FileID) {
+ Regions.push_back(CR);
+ if (isExpansion(CR, Expansion.FileID))
+ ExpansionCoverage.Expansions.emplace_back(CR, Expansion.Function);
+ }
+
+ sortNestedRegions(Regions.begin(), Regions.end());
+ ExpansionCoverage.Segments = SegmentBuilder().buildSegments(Regions);
+
+ return ExpansionCoverage;
+}
diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp
new file mode 100644
index 0000000..6476d28
--- /dev/null
+++ b/lib/ProfileData/CoverageMappingReader.cpp
@@ -0,0 +1,553 @@
+//=-- CoverageMappingReader.cpp - Code coverage mapping reader ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading coverage mapping data for
+// instrumentation based coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/CoverageMappingReader.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+using namespace coverage;
+using namespace object;
+
+#define DEBUG_TYPE "coverage-mapping"
+
+void CoverageMappingIterator::increment() {
+ // Check if all the records were read or if an error occurred while reading
+ // the next record.
+ if (Reader->readNextRecord(Record))
+ *this = CoverageMappingIterator();
+}
+
+std::error_code RawCoverageReader::readULEB128(uint64_t &Result) {
+ if (Data.size() < 1)
+ return error(instrprof_error::truncated);
+ unsigned N = 0;
+ Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ if (N > Data.size())
+ return error(instrprof_error::malformed);
+ Data = Data.substr(N);
+ return success();
+}
+
+std::error_code RawCoverageReader::readIntMax(uint64_t &Result,
+ uint64_t MaxPlus1) {
+ if (auto Err = readULEB128(Result))
+ return Err;
+ if (Result >= MaxPlus1)
+ return error(instrprof_error::malformed);
+ return success();
+}
+
+std::error_code RawCoverageReader::readSize(uint64_t &Result) {
+ if (auto Err = readULEB128(Result))
+ return Err;
+ // Sanity check the number.
+ if (Result > Data.size())
+ return error(instrprof_error::malformed);
+ return success();
+}
+
+std::error_code RawCoverageReader::readString(StringRef &Result) {
+ uint64_t Length;
+ if (auto Err = readSize(Length))
+ return Err;
+ Result = Data.substr(0, Length);
+ Data = Data.substr(Length);
+ return success();
+}
+
+std::error_code RawCoverageFilenamesReader::read() {
+ uint64_t NumFilenames;
+ if (auto Err = readSize(NumFilenames))
+ return Err;
+ for (size_t I = 0; I < NumFilenames; ++I) {
+ StringRef Filename;
+ if (auto Err = readString(Filename))
+ return Err;
+ Filenames.push_back(Filename);
+ }
+ return success();
+}
+
+std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value,
+ Counter &C) {
+ auto Tag = Value & Counter::EncodingTagMask;
+ switch (Tag) {
+ case Counter::Zero:
+ C = Counter::getZero();
+ return success();
+ case Counter::CounterValueReference:
+ C = Counter::getCounter(Value >> Counter::EncodingTagBits);
+ return success();
+ default:
+ break;
+ }
+ Tag -= Counter::Expression;
+ switch (Tag) {
+ case CounterExpression::Subtract:
+ case CounterExpression::Add: {
+ auto ID = Value >> Counter::EncodingTagBits;
+ if (ID >= Expressions.size())
+ return error(instrprof_error::malformed);
+ Expressions[ID].Kind = CounterExpression::ExprKind(Tag);
+ C = Counter::getExpression(ID);
+ break;
+ }
+ default:
+ return error(instrprof_error::malformed);
+ }
+ return success();
+}
+
+std::error_code RawCoverageMappingReader::readCounter(Counter &C) {
+ uint64_t EncodedCounter;
+ if (auto Err =
+ readIntMax(EncodedCounter, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = decodeCounter(EncodedCounter, C))
+ return Err;
+ return success();
+}
+
+static const unsigned EncodingExpansionRegionBit = 1
+ << Counter::EncodingTagBits;
+
+/// \brief Read the sub-array of regions for the given inferred file id.
+/// \param NumFileIDs the number of file ids that are defined for this
+/// function.
+std::error_code RawCoverageMappingReader::readMappingRegionsSubArray(
+ std::vector<CounterMappingRegion> &MappingRegions, unsigned InferredFileID,
+ size_t NumFileIDs) {
+ uint64_t NumRegions;
+ if (auto Err = readSize(NumRegions))
+ return Err;
+ unsigned LineStart = 0;
+ for (size_t I = 0; I < NumRegions; ++I) {
+ Counter C;
+ CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion;
+
+ // Read the combined counter + region kind.
+ uint64_t EncodedCounterAndRegion;
+ if (auto Err = readIntMax(EncodedCounterAndRegion,
+ std::numeric_limits<unsigned>::max()))
+ return Err;
+ unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask;
+ uint64_t ExpandedFileID = 0;
+ if (Tag != Counter::Zero) {
+ if (auto Err = decodeCounter(EncodedCounterAndRegion, C))
+ return Err;
+ } else {
+ // Is it an expansion region?
+ if (EncodedCounterAndRegion & EncodingExpansionRegionBit) {
+ Kind = CounterMappingRegion::ExpansionRegion;
+ ExpandedFileID = EncodedCounterAndRegion >>
+ Counter::EncodingCounterTagAndExpansionRegionTagBits;
+ if (ExpandedFileID >= NumFileIDs)
+ return error(instrprof_error::malformed);
+ } else {
+ switch (EncodedCounterAndRegion >>
+ Counter::EncodingCounterTagAndExpansionRegionTagBits) {
+ case CounterMappingRegion::CodeRegion:
+ // Don't do anything when we have a code region with a zero counter.
+ break;
+ case CounterMappingRegion::SkippedRegion:
+ Kind = CounterMappingRegion::SkippedRegion;
+ break;
+ default:
+ return error(instrprof_error::malformed);
+ }
+ }
+ }
+
+ // Read the source range.
+ uint64_t LineStartDelta, CodeBeforeColumnStart, NumLines, ColumnEnd;
+ if (auto Err =
+ readIntMax(LineStartDelta, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = readULEB128(CodeBeforeColumnStart))
+ return Err;
+ bool HasCodeBefore = CodeBeforeColumnStart & 1;
+ uint64_t ColumnStart = CodeBeforeColumnStart >>
+ CounterMappingRegion::EncodingHasCodeBeforeBits;
+ if (ColumnStart > std::numeric_limits<unsigned>::max())
+ return error(instrprof_error::malformed);
+ if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max()))
+ return Err;
+ if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max()))
+ return Err;
+ LineStart += LineStartDelta;
+ // Adjust the column locations for the empty regions that are supposed to
+ // cover whole lines. Those regions should be encoded with the
+ // column range (1 -> std::numeric_limits<unsigned>::max()), but because
+ // the encoded std::numeric_limits<unsigned>::max() is several bytes long,
+ // we set the column range to (0 -> 0) to ensure that the column start and
+ // column end take up one byte each.
+ // The std::numeric_limits<unsigned>::max() is used to represent a column
+ // position at the end of the line without knowing the length of that line.
+ if (ColumnStart == 0 && ColumnEnd == 0) {
+ ColumnStart = 1;
+ ColumnEnd = std::numeric_limits<unsigned>::max();
+ }
+
+ DEBUG({
+ dbgs() << "Counter in file " << InferredFileID << " " << LineStart << ":"
+ << ColumnStart << " -> " << (LineStart + NumLines) << ":"
+ << ColumnEnd << ", ";
+ if (Kind == CounterMappingRegion::ExpansionRegion)
+ dbgs() << "Expands to file " << ExpandedFileID;
+ else
+ CounterMappingContext(Expressions).dump(C, dbgs());
+ dbgs() << "\n";
+ });
+
+ MappingRegions.push_back(CounterMappingRegion(
+ C, InferredFileID, LineStart, ColumnStart, LineStart + NumLines,
+ ColumnEnd, HasCodeBefore, Kind));
+ MappingRegions.back().ExpandedFileID = ExpandedFileID;
+ }
+ return success();
+}
+
+std::error_code RawCoverageMappingReader::read(CoverageMappingRecord &Record) {
+
+ // Read the virtual file mapping.
+ llvm::SmallVector<unsigned, 8> VirtualFileMapping;
+ uint64_t NumFileMappings;
+ if (auto Err = readSize(NumFileMappings))
+ return Err;
+ for (size_t I = 0; I < NumFileMappings; ++I) {
+ uint64_t FilenameIndex;
+ if (auto Err = readIntMax(FilenameIndex, TranslationUnitFilenames.size()))
+ return Err;
+ VirtualFileMapping.push_back(FilenameIndex);
+ }
+
+ // Construct the files using unique filenames and virtual file mapping.
+ for (auto I : VirtualFileMapping) {
+ Filenames.push_back(TranslationUnitFilenames[I]);
+ }
+
+ // Read the expressions.
+ uint64_t NumExpressions;
+ if (auto Err = readSize(NumExpressions))
+ return Err;
+ // Create an array of dummy expressions that get the proper counters
+ // when the expressions are read, and the proper kinds when the counters
+ // are decoded.
+ Expressions.resize(
+ NumExpressions,
+ CounterExpression(CounterExpression::Subtract, Counter(), Counter()));
+ for (size_t I = 0; I < NumExpressions; ++I) {
+ if (auto Err = readCounter(Expressions[I].LHS))
+ return Err;
+ if (auto Err = readCounter(Expressions[I].RHS))
+ return Err;
+ }
+
+ // Read the mapping regions sub-arrays.
+ for (unsigned InferredFileID = 0, S = VirtualFileMapping.size();
+ InferredFileID < S; ++InferredFileID) {
+ if (auto Err = readMappingRegionsSubArray(MappingRegions, InferredFileID,
+ VirtualFileMapping.size()))
+ return Err;
+ }
+
+ // Set the counters for the expansion regions.
+ // i.e. Counter of expansion region = counter of the first region
+ // from the expanded file.
+ // Perform multiple passes to correctly propagate the counters through
+ // all the nested expansion regions.
+ SmallVector<CounterMappingRegion *, 8> FileIDExpansionRegionMapping;
+ FileIDExpansionRegionMapping.resize(VirtualFileMapping.size(), nullptr);
+ for (unsigned Pass = 1, S = VirtualFileMapping.size(); Pass < S; ++Pass) {
+ for (auto &R : MappingRegions) {
+ if (R.Kind != CounterMappingRegion::ExpansionRegion)
+ continue;
+ assert(!FileIDExpansionRegionMapping[R.ExpandedFileID]);
+ FileIDExpansionRegionMapping[R.ExpandedFileID] = &R;
+ }
+ for (auto &R : MappingRegions) {
+ if (FileIDExpansionRegionMapping[R.FileID]) {
+ FileIDExpansionRegionMapping[R.FileID]->Count = R.Count;
+ FileIDExpansionRegionMapping[R.FileID] = nullptr;
+ }
+ }
+ }
+
+ Record.FunctionName = FunctionName;
+ Record.Filenames = Filenames;
+ Record.Expressions = Expressions;
+ Record.MappingRegions = MappingRegions;
+ return success();
+}
+
+ObjectFileCoverageMappingReader::ObjectFileCoverageMappingReader(
+ StringRef FileName)
+ : CurrentRecord(0) {
+ auto File = llvm::object::ObjectFile::createObjectFile(FileName);
+ if (!File)
+ error(File.getError());
+ else
+ Object = std::move(File.get());
+}
+
+namespace {
+/// \brief The coverage mapping data for a single function.
+/// It points to the function's name.
+template <typename IntPtrT> struct CoverageMappingFunctionRecord {
+ IntPtrT FunctionNamePtr;
+ uint32_t FunctionNameSize;
+ uint32_t CoverageMappingSize;
+ uint64_t FunctionHash;
+};
+
+/// \brief The coverage mapping data for a single translation unit.
+/// It points to the array of function coverage mapping records and the encoded
+/// filenames array.
+template <typename IntPtrT> struct CoverageMappingTURecord {
+ uint32_t FunctionRecordsSize;
+ uint32_t FilenamesSize;
+ uint32_t CoverageMappingsSize;
+ uint32_t Version;
+};
+
+/// \brief A helper structure to access the data from a section
+/// in an object file.
+struct SectionData {
+ StringRef Data;
+ uint64_t Address;
+
+ std::error_code load(SectionRef &Section) {
+ if (auto Err = Section.getContents(Data))
+ return Err;
+ Address = Section.getAddress();
+ return instrprof_error::success;
+ }
+
+ std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) {
+ if (Pointer < Address)
+ return instrprof_error::malformed;
+ auto Offset = Pointer - Address;
+ if (Offset + Size > Data.size())
+ return instrprof_error::malformed;
+ Result = Data.substr(Pointer - Address, Size);
+ return instrprof_error::success;
+ }
+};
+}
+
+template <typename T>
+std::error_code readCoverageMappingData(
+ SectionData &ProfileNames, StringRef Data,
+ std::vector<ObjectFileCoverageMappingReader::ProfileMappingRecord> &Records,
+ std::vector<StringRef> &Filenames) {
+ llvm::DenseSet<T> UniqueFunctionMappingData;
+
+ // Read the records in the coverage data section.
+ while (!Data.empty()) {
+ if (Data.size() < sizeof(CoverageMappingTURecord<T>))
+ return instrprof_error::malformed;
+ auto TU = reinterpret_cast<const CoverageMappingTURecord<T> *>(Data.data());
+ Data = Data.substr(sizeof(CoverageMappingTURecord<T>));
+ switch (TU->Version) {
+ case CoverageMappingVersion1:
+ break;
+ default:
+ return instrprof_error::unsupported_version;
+ }
+ auto Version = CoverageMappingVersion(TU->Version);
+
+ // Get the function records.
+ auto FunctionRecords =
+ reinterpret_cast<const CoverageMappingFunctionRecord<T> *>(Data.data());
+ if (Data.size() <
+ sizeof(CoverageMappingFunctionRecord<T>) * TU->FunctionRecordsSize)
+ return instrprof_error::malformed;
+ Data = Data.substr(sizeof(CoverageMappingFunctionRecord<T>) *
+ TU->FunctionRecordsSize);
+
+ // Get the filenames.
+ if (Data.size() < TU->FilenamesSize)
+ return instrprof_error::malformed;
+ auto RawFilenames = Data.substr(0, TU->FilenamesSize);
+ Data = Data.substr(TU->FilenamesSize);
+ size_t FilenamesBegin = Filenames.size();
+ RawCoverageFilenamesReader Reader(RawFilenames, Filenames);
+ if (auto Err = Reader.read())
+ return Err;
+
+ // Get the coverage mappings.
+ if (Data.size() < TU->CoverageMappingsSize)
+ return instrprof_error::malformed;
+ auto CoverageMappings = Data.substr(0, TU->CoverageMappingsSize);
+ Data = Data.substr(TU->CoverageMappingsSize);
+
+ for (unsigned I = 0; I < TU->FunctionRecordsSize; ++I) {
+ auto &MappingRecord = FunctionRecords[I];
+
+ // Get the coverage mapping.
+ if (CoverageMappings.size() < MappingRecord.CoverageMappingSize)
+ return instrprof_error::malformed;
+ auto Mapping =
+ CoverageMappings.substr(0, MappingRecord.CoverageMappingSize);
+ CoverageMappings =
+ CoverageMappings.substr(MappingRecord.CoverageMappingSize);
+
+ // Ignore this record if we already have a record that points to the same
+ // function name.
+ // This is useful to ignore the redundant records for the functions
+ // with ODR linkage.
+ if (!UniqueFunctionMappingData.insert(MappingRecord.FunctionNamePtr)
+ .second)
+ continue;
+ StringRef FunctionName;
+ if (auto Err =
+ ProfileNames.get(MappingRecord.FunctionNamePtr,
+ MappingRecord.FunctionNameSize, FunctionName))
+ return Err;
+ Records.push_back(ObjectFileCoverageMappingReader::ProfileMappingRecord(
+ Version, FunctionName, MappingRecord.FunctionHash, Mapping,
+ FilenamesBegin, Filenames.size() - FilenamesBegin));
+ }
+ }
+
+ return instrprof_error::success;
+}
+
+static const char *TestingFormatMagic = "llvmcovmtestdata";
+
+static std::error_code decodeTestingFormat(StringRef Data,
+ SectionData &ProfileNames,
+ StringRef &CoverageMapping) {
+ Data = Data.substr(StringRef(TestingFormatMagic).size());
+ if (Data.size() < 1)
+ return instrprof_error::truncated;
+ unsigned N = 0;
+ auto ProfileNamesSize =
+ decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ if (N > Data.size())
+ return instrprof_error::malformed;
+ Data = Data.substr(N);
+ if (Data.size() < 1)
+ return instrprof_error::truncated;
+ N = 0;
+ ProfileNames.Address =
+ decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ if (N > Data.size())
+ return instrprof_error::malformed;
+ Data = Data.substr(N);
+ if (Data.size() < ProfileNamesSize)
+ return instrprof_error::malformed;
+ ProfileNames.Data = Data.substr(0, ProfileNamesSize);
+ CoverageMapping = Data.substr(ProfileNamesSize);
+ return instrprof_error::success;
+}
+
+ObjectFileCoverageMappingReader::ObjectFileCoverageMappingReader(
+ std::unique_ptr<MemoryBuffer> &ObjectBuffer, sys::fs::file_magic Type)
+ : CurrentRecord(0) {
+ if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) {
+ // This is a special format used for testing.
+ SectionData ProfileNames;
+ StringRef CoverageMapping;
+ if (auto Err = decodeTestingFormat(ObjectBuffer->getBuffer(), ProfileNames,
+ CoverageMapping)) {
+ error(Err);
+ return;
+ }
+ error(readCoverageMappingData<uint64_t>(ProfileNames, CoverageMapping,
+ MappingRecords, Filenames));
+ Object = OwningBinary<ObjectFile>(std::unique_ptr<ObjectFile>(),
+ std::move(ObjectBuffer));
+ return;
+ }
+
+ auto File = object::ObjectFile::createObjectFile(
+ ObjectBuffer->getMemBufferRef(), Type);
+ if (!File)
+ error(File.getError());
+ else
+ Object = OwningBinary<ObjectFile>(std::move(File.get()),
+ std::move(ObjectBuffer));
+}
+
+std::error_code ObjectFileCoverageMappingReader::readHeader() {
+ const ObjectFile *OF = Object.getBinary();
+ if (!OF)
+ return getError();
+ auto BytesInAddress = OF->getBytesInAddress();
+ if (BytesInAddress != 4 && BytesInAddress != 8)
+ return error(instrprof_error::malformed);
+
+ // Look for the sections that we are interested in.
+ int FoundSectionCount = 0;
+ SectionRef ProfileNames, CoverageMapping;
+ for (const auto &Section : OF->sections()) {
+ StringRef Name;
+ if (auto Err = Section.getName(Name))
+ return Err;
+ if (Name == "__llvm_prf_names") {
+ ProfileNames = Section;
+ } else if (Name == "__llvm_covmap") {
+ CoverageMapping = Section;
+ } else
+ continue;
+ ++FoundSectionCount;
+ }
+ if (FoundSectionCount != 2)
+ return error(instrprof_error::bad_header);
+
+ // Get the contents of the given sections.
+ StringRef Data;
+ if (auto Err = CoverageMapping.getContents(Data))
+ return Err;
+ SectionData ProfileNamesData;
+ if (auto Err = ProfileNamesData.load(ProfileNames))
+ return Err;
+
+ // Load the data from the found sections.
+ std::error_code Err;
+ if (BytesInAddress == 4)
+ Err = readCoverageMappingData<uint32_t>(ProfileNamesData, Data,
+ MappingRecords, Filenames);
+ else
+ Err = readCoverageMappingData<uint64_t>(ProfileNamesData, Data,
+ MappingRecords, Filenames);
+ if (Err)
+ return error(Err);
+
+ return success();
+}
+
+std::error_code
+ObjectFileCoverageMappingReader::readNextRecord(CoverageMappingRecord &Record) {
+ if (CurrentRecord >= MappingRecords.size())
+ return error(instrprof_error::eof);
+
+ FunctionsFilenames.clear();
+ Expressions.clear();
+ MappingRegions.clear();
+ auto &R = MappingRecords[CurrentRecord];
+ RawCoverageMappingReader Reader(
+ R.FunctionName, R.CoverageMapping,
+ makeArrayRef(Filenames.data() + R.FilenamesBegin, R.FilenamesSize),
+ FunctionsFilenames, Expressions, MappingRegions);
+ if (auto Err = Reader.read(Record))
+ return Err;
+ Record.FunctionHash = R.FunctionHash;
+ ++CurrentRecord;
+ return success();
+}
diff --git a/lib/ProfileData/CoverageMappingWriter.cpp b/lib/ProfileData/CoverageMappingWriter.cpp
new file mode 100644
index 0000000..6969c2a
--- /dev/null
+++ b/lib/ProfileData/CoverageMappingWriter.cpp
@@ -0,0 +1,187 @@
+//=-- CoverageMappingWriter.cpp - Code coverage mapping writer -------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing coverage mapping data for
+// instrumentation based coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/CoverageMappingWriter.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+using namespace coverage;
+
+void CoverageFilenamesSectionWriter::write(raw_ostream &OS) {
+ encodeULEB128(Filenames.size(), OS);
+ for (const auto &Filename : Filenames) {
+ encodeULEB128(Filename.size(), OS);
+ OS << Filename;
+ }
+}
+
+namespace {
+/// \brief Gather only the expressions that are used by the mapping
+/// regions in this function.
+class CounterExpressionsMinimizer {
+ ArrayRef<CounterExpression> Expressions;
+ llvm::SmallVector<CounterExpression, 16> UsedExpressions;
+ std::vector<unsigned> AdjustedExpressionIDs;
+
+public:
+ void mark(Counter C) {
+ if (!C.isExpression())
+ return;
+ unsigned ID = C.getExpressionID();
+ AdjustedExpressionIDs[ID] = 1;
+ mark(Expressions[ID].LHS);
+ mark(Expressions[ID].RHS);
+ }
+
+ void gatherUsed(Counter C) {
+ if (!C.isExpression() || !AdjustedExpressionIDs[C.getExpressionID()])
+ return;
+ AdjustedExpressionIDs[C.getExpressionID()] = UsedExpressions.size();
+ const auto &E = Expressions[C.getExpressionID()];
+ UsedExpressions.push_back(E);
+ gatherUsed(E.LHS);
+ gatherUsed(E.RHS);
+ }
+
+ CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,
+ ArrayRef<CounterMappingRegion> MappingRegions)
+ : Expressions(Expressions) {
+ AdjustedExpressionIDs.resize(Expressions.size(), 0);
+ for (const auto &I : MappingRegions)
+ mark(I.Count);
+ for (const auto &I : MappingRegions)
+ gatherUsed(I.Count);
+ }
+
+ ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; }
+
+ /// \brief Adjust the given counter to correctly transition from the old
+ /// expression ids to the new expression ids.
+ Counter adjust(Counter C) const {
+ if (C.isExpression())
+ C = Counter::getExpression(AdjustedExpressionIDs[C.getExpressionID()]);
+ return C;
+ }
+};
+}
+
+/// \brief Encode the counter.
+///
+/// The encoding uses the following format:
+/// Low 2 bits - Tag:
+/// Counter::Zero(0) - A Counter with kind Counter::Zero
+/// Counter::CounterValueReference(1) - A counter with kind
+/// Counter::CounterValueReference
+/// Counter::Expression(2) + CounterExpression::Subtract(0) -
+/// A counter with kind Counter::Expression and an expression
+/// with kind CounterExpression::Subtract
+/// Counter::Expression(2) + CounterExpression::Add(1) -
+/// A counter with kind Counter::Expression and an expression
+/// with kind CounterExpression::Add
+/// Remaining bits - Counter/Expression ID.
+static unsigned encodeCounter(ArrayRef<CounterExpression> Expressions,
+ Counter C) {
+ unsigned Tag = unsigned(C.getKind());
+ if (C.isExpression())
+ Tag += Expressions[C.getExpressionID()].Kind;
+ unsigned ID = C.getCounterID();
+ assert(ID <=
+ (std::numeric_limits<unsigned>::max() >> Counter::EncodingTagBits));
+ return Tag | (ID << Counter::EncodingTagBits);
+}
+
+static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C,
+ raw_ostream &OS) {
+ encodeULEB128(encodeCounter(Expressions, C), OS);
+}
+
+void CoverageMappingWriter::write(raw_ostream &OS) {
+ // Sort the regions in an ascending order by the file id and the starting
+ // location.
+ std::sort(MappingRegions.begin(), MappingRegions.end());
+
+ // Write out the fileid -> filename mapping.
+ encodeULEB128(VirtualFileMapping.size(), OS);
+ for (const auto &FileID : VirtualFileMapping)
+ encodeULEB128(FileID, OS);
+
+ // Write out the expressions.
+ CounterExpressionsMinimizer Minimizer(Expressions, MappingRegions);
+ auto MinExpressions = Minimizer.getExpressions();
+ encodeULEB128(MinExpressions.size(), OS);
+ for (const auto &E : MinExpressions) {
+ writeCounter(MinExpressions, Minimizer.adjust(E.LHS), OS);
+ writeCounter(MinExpressions, Minimizer.adjust(E.RHS), OS);
+ }
+
+ // Write out the mapping regions.
+ // Split the regions into subarrays where each region in a
+ // subarray has a fileID which is the index of that subarray.
+ unsigned PrevLineStart = 0;
+ unsigned CurrentFileID = ~0U;
+ for (auto I = MappingRegions.begin(), E = MappingRegions.end(); I != E; ++I) {
+ if (I->FileID != CurrentFileID) {
+ // Ensure that all file ids have at least one mapping region.
+ assert(I->FileID == (CurrentFileID + 1));
+ // Find the number of regions with this file id.
+ unsigned RegionCount = 1;
+ for (auto J = I + 1; J != E && I->FileID == J->FileID; ++J)
+ ++RegionCount;
+ // Start a new region sub-array.
+ encodeULEB128(RegionCount, OS);
+
+ CurrentFileID = I->FileID;
+ PrevLineStart = 0;
+ }
+ Counter Count = Minimizer.adjust(I->Count);
+ switch (I->Kind) {
+ case CounterMappingRegion::CodeRegion:
+ writeCounter(MinExpressions, Count, OS);
+ break;
+ case CounterMappingRegion::ExpansionRegion: {
+ assert(Count.isZero());
+ assert(I->ExpandedFileID <=
+ (std::numeric_limits<unsigned>::max() >>
+ Counter::EncodingCounterTagAndExpansionRegionTagBits));
+ // Mark an expansion region with a set bit that follows the counter tag,
+ // and pack the expanded file id into the remaining bits.
+ unsigned EncodedTagExpandedFileID =
+ (1 << Counter::EncodingTagBits) |
+ (I->ExpandedFileID
+ << Counter::EncodingCounterTagAndExpansionRegionTagBits);
+ encodeULEB128(EncodedTagExpandedFileID, OS);
+ break;
+ }
+ case CounterMappingRegion::SkippedRegion:
+ assert(Count.isZero());
+ encodeULEB128(unsigned(I->Kind)
+ << Counter::EncodingCounterTagAndExpansionRegionTagBits,
+ OS);
+ break;
+ }
+ assert(I->LineStart >= PrevLineStart);
+ encodeULEB128(I->LineStart - PrevLineStart, OS);
+ uint64_t CodeBeforeColumnStart =
+ uint64_t(I->HasCodeBefore) |
+ (uint64_t(I->ColumnStart)
+ << CounterMappingRegion::EncodingHasCodeBeforeBits);
+ encodeULEB128(CodeBeforeColumnStart, OS);
+ assert(I->LineEnd >= I->LineStart);
+ encodeULEB128(I->LineEnd - I->LineStart, OS);
+ encodeULEB128(I->ColumnEnd, OS);
+ PrevLineStart = I->LineStart;
+ }
+ // Ensure that all file ids have at least one mapping region.
+ assert(CurrentFileID == (VirtualFileMapping.size() - 1));
+}
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 0121222..900dff9 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -14,6 +14,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -55,7 +56,8 @@ class InstrProfErrorCategoryType : public std::error_category {
};
}
+static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
+
const std::error_category &llvm::instrprof_category() {
- static InstrProfErrorCategoryType C;
- return C;
+ return *ErrorCategory;
}
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h
index 7761704..c2bc46c 100644
--- a/lib/ProfileData/InstrProfIndexed.h
+++ b/lib/ProfileData/InstrProfIndexed.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
-#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
+#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
@@ -46,10 +46,10 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) {
}
const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
-const uint64_t Version = 1;
+const uint64_t Version = 2;
const HashT HashType = HashT::MD5;
}
} // end namespace llvm
-#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#endif
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 0b36728..0160a64 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -21,32 +21,34 @@
using namespace llvm;
-static std::error_code
-setupMemoryBuffer(std::string Path, std::unique_ptr<MemoryBuffer> &Buffer) {
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+setupMemoryBuffer(std::string Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFileOrSTDIN(Path);
if (std::error_code EC = BufferOrErr.getError())
return EC;
- Buffer = std::move(BufferOrErr.get());
+ auto Buffer = std::move(BufferOrErr.get());
// Sanity check the file.
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
return instrprof_error::too_large;
- return instrprof_error::success;
+ return std::move(Buffer);
}
static std::error_code initializeReader(InstrProfReader &Reader) {
return Reader.readHeader();
}
-std::error_code
-InstrProfReader::create(std::string Path,
- std::unique_ptr<InstrProfReader> &Result) {
+ErrorOr<std::unique_ptr<InstrProfReader>>
+InstrProfReader::create(std::string Path) {
// Set up the buffer to read.
- std::unique_ptr<MemoryBuffer> Buffer;
- if (std::error_code EC = setupMemoryBuffer(Path, Buffer))
+ auto BufferOrError = setupMemoryBuffer(Path);
+ if (std::error_code EC = BufferOrError.getError())
return EC;
+ auto Buffer = std::move(BufferOrError.get());
+ std::unique_ptr<InstrProfReader> Result;
+
// Create the reader.
if (IndexedInstrProfReader::hasFormat(*Buffer))
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
@@ -58,16 +60,20 @@ InstrProfReader::create(std::string Path,
Result.reset(new TextInstrProfReader(std::move(Buffer)));
// Initialize the reader and return the result.
- return initializeReader(*Result);
+ if (std::error_code EC = initializeReader(*Result))
+ return EC;
+
+ return std::move(Result);
}
std::error_code IndexedInstrProfReader::create(
std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
// Set up the buffer to read.
- std::unique_ptr<MemoryBuffer> Buffer;
- if (std::error_code EC = setupMemoryBuffer(Path, Buffer))
+ auto BufferOrError = setupMemoryBuffer(Path);
+ if (std::error_code EC = BufferOrError.getError())
return EC;
+ auto Buffer = std::move(BufferOrError.get());
// Create the reader.
if (!IndexedInstrProfReader::hasFormat(*Buffer))
return instrprof_error::bad_magic;
@@ -83,8 +89,8 @@ void InstrProfIterator::Increment() {
}
std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
- // Skip empty lines.
- while (!Line.is_at_end() && Line->empty())
+ // Skip empty lines and comments.
+ while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
++Line;
// If we hit EOF while looking for a name, we're done.
if (Line.is_at_end())
@@ -190,6 +196,9 @@ RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
// garbage at the end of the file.
if (CurrentPos + sizeof(RawHeader) > End)
return instrprof_error::malformed;
+ // The writer ensures each profile is padded to start at an aligned address.
+ if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
+ return instrprof_error::malformed;
// The magic should have the same byte order as in the previous header.
uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
if (Magic != swap(getRawMagic<IntPtrT>()))
@@ -307,8 +316,8 @@ std::error_code IndexedInstrProfReader::readHeader() {
return error(instrprof_error::bad_magic);
// Read the version.
- uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
- if (Version != IndexedInstrProf::Version)
+ FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (FormatVersion > IndexedInstrProf::Version)
return error(instrprof_error::unsupported_version);
// Read the maximal function count.
@@ -331,18 +340,31 @@ std::error_code IndexedInstrProfReader::readHeader() {
}
std::error_code IndexedInstrProfReader::getFunctionCounts(
- StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
- const auto &Iter = Index->find(FuncName);
+ StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) {
+ auto Iter = Index->find(FuncName);
if (Iter == Index->end())
return error(instrprof_error::unknown_function);
- // Found it. Make sure it's valid before giving back a result.
- const InstrProfRecord &Record = *Iter;
- if (Record.Name.empty())
- return error(instrprof_error::malformed);
- FuncHash = Record.Hash;
- Counts = Record.Counts;
- return success();
+ // Found it. Look for counters with the right hash.
+ ArrayRef<uint64_t> Data = (*Iter).Data;
+ uint64_t NumCounts;
+ for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
+ // The function hash comes first.
+ uint64_t FoundHash = Data[I++];
+ // In v1, we have at least one count. Later, we have the number of counts.
+ if (I == E)
+ return error(instrprof_error::malformed);
+ NumCounts = FormatVersion == 1 ? E - I : Data[I++];
+ // If we have more counts than data, this is bogus.
+ if (I + NumCounts > E)
+ return error(instrprof_error::malformed);
+ // Check for a match and fill the vector if there is one.
+ if (FoundHash == FuncHash) {
+ Counts = Data.slice(I, NumCounts);
+ return success();
+ }
+ }
+ return error(instrprof_error::hash_mismatch);
}
std::error_code
@@ -351,10 +373,30 @@ IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
if (RecordIterator == Index->data_end())
return error(instrprof_error::eof);
- // Read the next one.
- Record = *RecordIterator;
- ++RecordIterator;
- if (Record.Name.empty())
+ // Record the current function name.
+ Record.Name = (*RecordIterator).Name;
+
+ ArrayRef<uint64_t> Data = (*RecordIterator).Data;
+ // Valid data starts with a hash and either a count or the number of counts.
+ if (CurrentOffset + 1 > Data.size())
return error(instrprof_error::malformed);
+ // First we have a function hash.
+ Record.Hash = Data[CurrentOffset++];
+ // In version 1 we knew the number of counters implicitly, but in newer
+ // versions we store the number of counters next.
+ uint64_t NumCounts =
+ FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++];
+ if (CurrentOffset + NumCounts > Data.size())
+ return error(instrprof_error::malformed);
+ // And finally the counts themselves.
+ Record.Counts = Data.slice(CurrentOffset, NumCounts);
+
+ // If we've exhausted this function's data, increment the record.
+ CurrentOffset += NumCounts;
+ if (CurrentOffset == Data.size()) {
+ ++RecordIterator;
+ CurrentOffset = 0;
+ }
+
return success();
}
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index e55c299..ad1b876 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -45,7 +45,9 @@ public:
offset_type N = K.size();
LE.write<offset_type>(N);
- offset_type M = (1 + V->Counts.size()) * sizeof(uint64_t);
+ offset_type M = 0;
+ for (const auto &Counts : *V)
+ M += (2 + Counts.second.size()) * sizeof(uint64_t);
LE.write<offset_type>(M);
return std::make_pair(N, M);
@@ -59,9 +61,13 @@ public:
offset_type) {
using namespace llvm::support;
endian::Writer<little> LE(Out);
- LE.write<uint64_t>(V->Hash);
- for (uint64_t I : V->Counts)
- LE.write<uint64_t>(I);
+
+ for (const auto &Counts : *V) {
+ LE.write<uint64_t>(Counts.first);
+ LE.write<uint64_t>(Counts.second.size());
+ for (uint64_t I : Counts.second)
+ LE.write<uint64_t>(I);
+ }
}
};
}
@@ -70,41 +76,43 @@ std::error_code
InstrProfWriter::addFunctionCounts(StringRef FunctionName,
uint64_t FunctionHash,
ArrayRef<uint64_t> Counters) {
- auto Where = FunctionData.find(FunctionName);
- if (Where == FunctionData.end()) {
- // If this is the first time we've seen this function, just add it.
- auto &Data = FunctionData[FunctionName];
- Data.Hash = FunctionHash;
- Data.Counts = Counters;
+ auto &CounterData = FunctionData[FunctionName];
+
+ auto Where = CounterData.find(FunctionHash);
+ if (Where == CounterData.end()) {
+ // We've never seen a function with this name and hash, add it.
+ CounterData[FunctionHash] = Counters;
+ // We keep track of the max function count as we go for simplicity.
+ if (Counters[0] > MaxFunctionCount)
+ MaxFunctionCount = Counters[0];
return instrprof_error::success;
}
- auto &Data = Where->getValue();
- // We can only add to existing functions if they match, so we check the hash
- // and number of counters.
- if (Data.Hash != FunctionHash)
- return instrprof_error::hash_mismatch;
- if (Data.Counts.size() != Counters.size())
+ // We're updating a function we've seen before.
+ auto &FoundCounters = Where->second;
+ // If the number of counters doesn't match we either have bad data or a hash
+ // collision.
+ if (FoundCounters.size() != Counters.size())
return instrprof_error::count_mismatch;
- // These match, add up the counters.
+
for (size_t I = 0, E = Counters.size(); I < E; ++I) {
- if (Data.Counts[I] + Counters[I] < Data.Counts[I])
+ if (FoundCounters[I] + Counters[I] < FoundCounters[I])
return instrprof_error::counter_overflow;
- Data.Counts[I] += Counters[I];
+ FoundCounters[I] += Counters[I];
}
+ // We keep track of the max function count as we go for simplicity.
+ if (FoundCounters[0] > MaxFunctionCount)
+ MaxFunctionCount = FoundCounters[0];
+
return instrprof_error::success;
}
void InstrProfWriter::write(raw_fd_ostream &OS) {
OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
- uint64_t MaxFunctionCount = 0;
// Populate the hash table generator.
- for (const auto &I : FunctionData) {
+ for (const auto &I : FunctionData)
Generator.insert(I.getKey(), &I.getValue());
- if (I.getValue().Counts[0] > MaxFunctionCount)
- MaxFunctionCount = I.getValue().Counts[0];
- }
using namespace llvm::support;
endian::Writer<little> LE(OS);
diff --git a/lib/ProfileData/LLVMBuild.txt b/lib/ProfileData/LLVMBuild.txt
index 0a8cbe3..a7f471f 100644
--- a/lib/ProfileData/LLVMBuild.txt
+++ b/lib/ProfileData/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = ProfileData
parent = Libraries
-required_libraries = Support
+required_libraries = Core Support Object
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
new file mode 100644
index 0000000..920c48a
--- /dev/null
+++ b/lib/ProfileData/SampleProf.cpp
@@ -0,0 +1,51 @@
+//=-- SampleProf.cpp - Sample profiling format support --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains common definitions used in the reading and writing of
+// sample profile data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+namespace {
+class SampleProfErrorCategoryType : public std::error_category {
+ const char *name() const LLVM_NOEXCEPT override { return "llvm.sampleprof"; }
+ std::string message(int IE) const override {
+ sampleprof_error E = static_cast<sampleprof_error>(IE);
+ switch (E) {
+ case sampleprof_error::success:
+ return "Success";
+ case sampleprof_error::bad_magic:
+ return "Invalid file format (bad magic)";
+ case sampleprof_error::unsupported_version:
+ return "Unsupported format version";
+ case sampleprof_error::too_large:
+ return "Too much profile data";
+ case sampleprof_error::truncated:
+ return "Truncated profile data";
+ case sampleprof_error::malformed:
+ return "Malformed profile data";
+ case sampleprof_error::unrecognized_format:
+ return "Unrecognized profile encoding format";
+ }
+ llvm_unreachable("A value of sampleprof_error has no message.");
+ }
+};
+}
+
+static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
+
+const std::error_category &llvm::sampleprof_category() {
+ return *ErrorCategory;
+}
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
new file mode 100644
index 0000000..b39bfd6
--- /dev/null
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -0,0 +1,399 @@
+//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that reads LLVM sample profiles. It
+// supports two file formats: text and binary. The textual representation
+// is useful for debugging and testing purposes. The binary representation
+// is more compact, resulting in smaller file sizes. However, they can
+// both be used interchangeably.
+//
+// NOTE: If you are making changes to the file format, please remember
+// to document them in the Clang documentation at
+// tools/clang/docs/UsersManual.rst.
+//
+// Text format
+// -----------
+//
+// Sample profiles are written as ASCII text. The file is divided into
+// sections, which correspond to each of the functions executed at runtime.
+// Each section has the following format
+//
+// function1:total_samples:total_head_samples
+// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
+// ...
+// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+//
+// The file may contain blank lines between sections and within a
+// section. However, the spacing within a single line is fixed. Additional
+// spaces will result in an error while reading the file.
+//
+// Function names must be mangled in order for the profile loader to
+// match them in the current translation unit. The two numbers in the
+// function header specify how many total samples were accumulated in the
+// function (first number), and the total number of samples accumulated
+// in the prologue of the function (second number). This head sample
+// count provides an indicator of how frequently the function is invoked.
+//
+// Each sampled line may contain several items. Some are optional (marked
+// below):
+//
+// a. Source line offset. This number represents the line number
+// in the function where the sample was collected. The line number is
+// always relative to the line where symbol of the function is
+// defined. So, if the function has its header at line 280, the offset
+// 13 is at line 293 in the file.
+//
+// Note that this offset should never be a negative number. This could
+// happen in cases like macros. The debug machinery will register the
+// line number at the point of macro expansion. So, if the macro was
+// expanded in a line before the start of the function, the profile
+// converter should emit a 0 as the offset (this means that the optimizers
+// will not be able to associate a meaningful weight to the instructions
+// in the macro).
+//
+// b. [OPTIONAL] Discriminator. This is used if the sampled program
+// was compiled with DWARF discriminator support
+// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
+// DWARF discriminators are unsigned integer values that allow the
+// compiler to distinguish between multiple execution paths on the
+// same source line location.
+//
+// For example, consider the line of code ``if (cond) foo(); else bar();``.
+// If the predicate ``cond`` is true 80% of the time, then the edge
+// into function ``foo`` should be considered to be taken most of the
+// time. But both calls to ``foo`` and ``bar`` are at the same source
+// line, so a sample count at that line is not sufficient. The
+// compiler needs to know which part of that line is taken more
+// frequently.
+//
+// This is what discriminators provide. In this case, the calls to
+// ``foo`` and ``bar`` will be at the same line, but will have
+// different discriminator values. This allows the compiler to correctly
+// set edge weights into ``foo`` and ``bar``.
+//
+// c. Number of samples. This is an integer quantity representing the
+// number of samples collected by the profiler at this source
+// location.
+//
+// d. [OPTIONAL] Potential call targets and samples. If present, this
+// line contains a call instruction. This models both direct and
+// number of samples. For example,
+//
+// 130: 7 foo:3 bar:2 baz:7
+//
+// The above means that at relative line offset 130 there is a call
+// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
+// with ``baz()`` being the relatively more frequently called target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm::sampleprof;
+using namespace llvm;
+
+/// \brief Print the samples collected for a function on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+void FunctionSamples::print(raw_ostream &OS) {
+ OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
+ << " sampled lines\n";
+ for (const auto &SI : BodySamples) {
+ LineLocation Loc = SI.first;
+ const SampleRecord &Sample = SI.second;
+ OS << "\tline offset: " << Loc.LineOffset
+ << ", discriminator: " << Loc.Discriminator
+ << ", number of samples: " << Sample.getSamples();
+ if (Sample.hasCalls()) {
+ OS << ", calls:";
+ for (const auto &I : Sample.getCallTargets())
+ OS << " " << I.first() << ":" << I.second;
+ }
+ OS << "\n";
+ }
+ OS << "\n";
+}
+
+/// \brief Dump the function profile for \p FName.
+///
+/// \param FName Name of the function to print.
+/// \param OS Stream to emit the output to.
+void SampleProfileReader::dumpFunctionProfile(StringRef FName,
+ raw_ostream &OS) {
+ OS << "Function: " << FName << ": ";
+ Profiles[FName].print(OS);
+}
+
+/// \brief Dump all the function profiles found on stream \p OS.
+void SampleProfileReader::dump(raw_ostream &OS) {
+ for (const auto &I : Profiles)
+ dumpFunctionProfile(I.getKey(), OS);
+}
+
+/// \brief Load samples from a text file.
+///
+/// See the documentation at the top of the file for an explanation of
+/// the expected format.
+///
+/// \returns true if the file was loaded successfully, false otherwise.
+std::error_code SampleProfileReaderText::read() {
+ line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
+
+ // Read the profile of each function. Since each function may be
+ // mentioned more than once, and we are collecting flat profiles,
+ // accumulate samples as we parse them.
+ Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
+ Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
+ Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)");
+ while (!LineIt.is_at_eof()) {
+ // Read the header of each function.
+ //
+ // Note that for function identifiers we are actually expecting
+ // mangled names, but we may not always get them. This happens when
+ // the compiler decides not to emit the function (e.g., it was inlined
+ // and removed). In this case, the binary will not have the linkage
+ // name for the function, so the profiler will emit the function's
+ // unmangled name, which may contain characters like ':' and '>' in its
+ // name (member functions, templates, etc).
+ //
+ // The only requirement we place on the identifier, then, is that it
+ // should not begin with a number.
+ SmallVector<StringRef, 4> Matches;
+ if (!HeadRE.match(*LineIt, &Matches)) {
+ reportParseError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ assert(Matches.size() == 4);
+ StringRef FName = Matches[1];
+ unsigned NumSamples, NumHeadSamples;
+ Matches[2].getAsInteger(10, NumSamples);
+ Matches[3].getAsInteger(10, NumHeadSamples);
+ Profiles[FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FName];
+ FProfile.addTotalSamples(NumSamples);
+ FProfile.addHeadSamples(NumHeadSamples);
+ ++LineIt;
+
+ // Now read the body. The body of the function ends when we reach
+ // EOF or when we see the start of the next function.
+ while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
+ if (!LineSampleRE.match(*LineIt, &Matches)) {
+ reportParseError(
+ LineIt.line_number(),
+ "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ assert(Matches.size() == 5);
+ unsigned LineOffset, NumSamples, Discriminator = 0;
+ Matches[1].getAsInteger(10, LineOffset);
+ if (Matches[2] != "")
+ Matches[2].getAsInteger(10, Discriminator);
+ Matches[3].getAsInteger(10, NumSamples);
+
+ // If there are function calls in this line, generate a call sample
+ // entry for each call.
+ std::string CallsLine(Matches[4]);
+ while (CallsLine != "") {
+ SmallVector<StringRef, 3> CallSample;
+ if (!CallSampleRE.match(CallsLine, &CallSample)) {
+ reportParseError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM', found " + CallsLine);
+ return sampleprof_error::malformed;
+ }
+ StringRef CalledFunction = CallSample[1];
+ unsigned CalledFunctionSamples;
+ CallSample[2].getAsInteger(10, CalledFunctionSamples);
+ FProfile.addCalledTargetSamples(LineOffset, Discriminator,
+ CalledFunction, CalledFunctionSamples);
+ CallsLine = CallSampleRE.sub("", CallsLine);
+ }
+
+ FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
+ ++LineIt;
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
+template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
+ unsigned NumBytesRead = 0;
+ std::error_code EC;
+ uint64_t Val = decodeULEB128(Data, &NumBytesRead);
+
+ if (Val > std::numeric_limits<T>::max())
+ EC = sampleprof_error::malformed;
+ else if (Data + NumBytesRead > End)
+ EC = sampleprof_error::truncated;
+ else
+ EC = sampleprof_error::success;
+
+ if (EC) {
+ reportParseError(0, EC.message());
+ return EC;
+ }
+
+ Data += NumBytesRead;
+ return static_cast<T>(Val);
+}
+
+ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
+ std::error_code EC;
+ StringRef Str(reinterpret_cast<const char *>(Data));
+ if (Data + Str.size() + 1 > End) {
+ EC = sampleprof_error::truncated;
+ reportParseError(0, EC.message());
+ return EC;
+ }
+
+ Data += Str.size() + 1;
+ return Str;
+}
+
+std::error_code SampleProfileReaderBinary::read() {
+ while (!at_eof()) {
+ auto FName(readString());
+ if (std::error_code EC = FName.getError())
+ return EC;
+
+ Profiles[*FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[*FName];
+
+ auto Val = readNumber<unsigned>();
+ if (std::error_code EC = Val.getError())
+ return EC;
+ FProfile.addTotalSamples(*Val);
+
+ Val = readNumber<unsigned>();
+ if (std::error_code EC = Val.getError())
+ return EC;
+ FProfile.addHeadSamples(*Val);
+
+ // Read the samples in the body.
+ auto NumRecords = readNumber<unsigned>();
+ if (std::error_code EC = NumRecords.getError())
+ return EC;
+ for (unsigned I = 0; I < *NumRecords; ++I) {
+ auto LineOffset = readNumber<uint64_t>();
+ if (std::error_code EC = LineOffset.getError())
+ return EC;
+
+ auto Discriminator = readNumber<uint64_t>();
+ if (std::error_code EC = Discriminator.getError())
+ return EC;
+
+ auto NumSamples = readNumber<uint64_t>();
+ if (std::error_code EC = NumSamples.getError())
+ return EC;
+
+ auto NumCalls = readNumber<unsigned>();
+ if (std::error_code EC = NumCalls.getError())
+ return EC;
+
+ for (unsigned J = 0; J < *NumCalls; ++J) {
+ auto CalledFunction(readString());
+ if (std::error_code EC = CalledFunction.getError())
+ return EC;
+
+ auto CalledFunctionSamples = readNumber<uint64_t>();
+ if (std::error_code EC = CalledFunctionSamples.getError())
+ return EC;
+
+ FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
+ *CalledFunction,
+ *CalledFunctionSamples);
+ }
+
+ FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderBinary::readHeader() {
+ Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
+ End = Data + Buffer->getBufferSize();
+
+ // Read and check the magic identifier.
+ auto Magic = readNumber<uint64_t>();
+ if (std::error_code EC = Magic.getError())
+ return EC;
+ else if (*Magic != SPMagic())
+ return sampleprof_error::bad_magic;
+
+ // Read the version number.
+ auto Version = readNumber<uint64_t>();
+ if (std::error_code EC = Version.getError())
+ return EC;
+ else if (*Version != SPVersion())
+ return sampleprof_error::unsupported_version;
+
+ return sampleprof_error::success;
+}
+
+bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) {
+ const uint8_t *Data =
+ reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
+ uint64_t Magic = decodeULEB128(Data);
+ return Magic == SPMagic();
+}
+
+/// \brief Prepare a memory buffer for the contents of \p Filename.
+///
+/// \returns an error code indicating the status of the buffer.
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+setupMemoryBuffer(std::string Filename) {
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = BufferOrErr.getError())
+ return EC;
+ auto Buffer = std::move(BufferOrErr.get());
+
+ // Sanity check the file.
+ if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+ return sampleprof_error::too_large;
+
+ return std::move(Buffer);
+}
+
+/// \brief Create a sample profile reader based on the format of the input file.
+///
+/// \param Filename The file to open.
+///
+/// \param Reader The reader to instantiate according to \p Filename's format.
+///
+/// \param C The LLVM context to use to emit diagnostics.
+///
+/// \returns an error code indicating the status of the created reader.
+ErrorOr<std::unique_ptr<SampleProfileReader>>
+SampleProfileReader::create(StringRef Filename, LLVMContext &C) {
+ auto BufferOrError = setupMemoryBuffer(Filename);
+ if (std::error_code EC = BufferOrError.getError())
+ return EC;
+
+ auto Buffer = std::move(BufferOrError.get());
+ std::unique_ptr<SampleProfileReader> Reader;
+ if (SampleProfileReaderBinary::hasFormat(*Buffer))
+ Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C));
+ else
+ Reader.reset(new SampleProfileReaderText(std::move(Buffer), C));
+
+ if (std::error_code EC = Reader->readHeader())
+ return EC;
+
+ return std::move(Reader);
+}
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
new file mode 100644
index 0000000..8525045
--- /dev/null
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -0,0 +1,126 @@
+//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that writes LLVM sample profiles. It
+// supports two file formats: text and binary. The textual representation
+// is useful for debugging and testing purposes. The binary representation
+// is more compact, resulting in smaller file sizes. However, they can
+// both be used interchangeably.
+//
+// See lib/ProfileData/SampleProfReader.cpp for documentation on each of the
+// supported formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProfWriter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm::sampleprof;
+using namespace llvm;
+
+/// \brief Write samples to a text file.
+bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) {
+ if (S.empty())
+ return true;
+
+ OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples()
+ << "\n";
+
+ for (const auto &I : S.getBodySamples()) {
+ LineLocation Loc = I.first;
+ const SampleRecord &Sample = I.second;
+ if (Loc.Discriminator == 0)
+ OS << Loc.LineOffset << ": ";
+ else
+ OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
+
+ OS << Sample.getSamples();
+
+ for (const auto &J : Sample.getCallTargets())
+ OS << " " << J.first() << ":" << J.second;
+ OS << "\n";
+ }
+
+ return true;
+}
+
+SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F,
+ std::error_code &EC)
+ : SampleProfileWriter(F, EC, sys::fs::F_None) {
+ if (EC)
+ return;
+
+ // Write the file header.
+ encodeULEB128(SPMagic(), OS);
+ encodeULEB128(SPVersion(), OS);
+}
+
+/// \brief Write samples to a binary file.
+///
+/// \returns true if the samples were written successfully, false otherwise.
+bool SampleProfileWriterBinary::write(StringRef FName,
+ const FunctionSamples &S) {
+ if (S.empty())
+ return true;
+
+ OS << FName;
+ encodeULEB128(0, OS);
+ encodeULEB128(S.getTotalSamples(), OS);
+ encodeULEB128(S.getHeadSamples(), OS);
+ encodeULEB128(S.getBodySamples().size(), OS);
+ for (const auto &I : S.getBodySamples()) {
+ LineLocation Loc = I.first;
+ const SampleRecord &Sample = I.second;
+ encodeULEB128(Loc.LineOffset, OS);
+ encodeULEB128(Loc.Discriminator, OS);
+ encodeULEB128(Sample.getSamples(), OS);
+ encodeULEB128(Sample.getCallTargets().size(), OS);
+ for (const auto &J : Sample.getCallTargets()) {
+ std::string Callee = J.first();
+ unsigned CalleeSamples = J.second;
+ OS << Callee;
+ encodeULEB128(0, OS);
+ encodeULEB128(CalleeSamples, OS);
+ }
+ }
+
+ return true;
+}
+
+/// \brief Create a sample profile writer based on the specified format.
+///
+/// \param Filename The file to create.
+///
+/// \param Writer The writer to instantiate according to the specified format.
+///
+/// \param Format Encoding format for the profile file.
+///
+/// \returns an error code indicating the status of the created writer.
+ErrorOr<std::unique_ptr<SampleProfileWriter>>
+SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
+ std::error_code EC;
+ std::unique_ptr<SampleProfileWriter> Writer;
+
+ if (Format == SPF_Binary)
+ Writer.reset(new SampleProfileWriterBinary(Filename, EC));
+ else if (Format == SPF_Text)
+ Writer.reset(new SampleProfileWriterText(Filename, EC));
+ else
+ EC = sampleprof_error::unrecognized_format;
+
+ if (EC)
+ return EC;
+
+ return std::move(Writer);
+}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 7989e30..295b16c 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -35,8 +35,7 @@ using namespace llvm;
/* Assumed in hexadecimal significand parsing, and conversion to
hexadecimal strings. */
-#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
-COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
+static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
namespace llvm {
@@ -212,15 +211,15 @@ skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
{
StringRef::iterator p = begin;
*dot = end;
- while (*p == '0' && p != end)
+ while (p != end && *p == '0')
p++;
- if (*p == '.') {
+ if (p != end && *p == '.') {
*dot = p++;
assert(end - begin != 1 && "Significand has no digits");
- while (*p == '0' && p != end)
+ while (p != end && *p == '0')
p++;
}
@@ -927,7 +926,10 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
assert(semantics == rhs.semantics);
precision = semantics->precision;
- newPartsCount = partCountForBits(precision * 2);
+
+ // Allocate space for twice as many bits as the original significand, plus one
+ // extra bit for the addition to overflow into.
+ newPartsCount = partCountForBits(precision * 2 + 1);
if (newPartsCount > 4)
fullSignificand = new integerPart[newPartsCount];
@@ -949,13 +951,14 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
// *this = a23 . a22 ... a0 * 2^e1
// rhs = b23 . b22 ... b0 * 2^e2
// the result of multiplication is:
- // *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
- // Note that there are two significant bits at the left-hand side of the
- // radix point. Move the radix point toward left by one bit, and adjust
- // exponent accordingly.
- exponent += 1;
-
- if (addend) {
+ // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
+ // Note that there are three significant bits at the left-hand side of the
+ // radix point: two for the multiplication, and an overflow bit for the
+ // addition (that will always be zero at this point). Move the radix point
+ // toward left by two bits, and adjust exponent accordingly.
+ exponent += 2;
+
+ if (addend && addend->isNonZero()) {
// The intermediate result of the multiplication has "2 * precision"
// signicant bit; adjust the addend to be consistent with mul result.
//
@@ -965,13 +968,13 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
opStatus status;
unsigned int extendedPrecision;
- /* Normalize our MSB. */
- extendedPrecision = 2 * precision;
- if (omsb != extendedPrecision) {
+ // Normalize our MSB to one below the top bit to allow for overflow.
+ extendedPrecision = 2 * precision + 1;
+ if (omsb != extendedPrecision - 1) {
assert(extendedPrecision > omsb);
APInt::tcShiftLeft(fullSignificand, newPartsCount,
- extendedPrecision - omsb);
- exponent -= extendedPrecision - omsb;
+ (extendedPrecision - 1) - omsb);
+ exponent -= (extendedPrecision - 1) - omsb;
}
/* Create new semantics. */
@@ -988,6 +991,14 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
assert(status == opOK);
(void)status;
+
+ // Shift the significand of the addend right by one bit. This guarantees
+ // that the high bit of the significand is zero (same as fullSignificand),
+ // so the addition will overflow (if it does overflow at all) into the top bit.
+ lost_fraction = extendedAddend.shiftSignificandRight(1);
+ assert(lost_fraction == lfExactlyZero &&
+ "Lost precision while shifting addend for fused-multiply-add.");
+
lost_fraction = addOrSubtractSignificand(extendedAddend, false);
/* Restore our state. */
@@ -1003,7 +1014,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
// having "precision" significant-bits. First, move the radix point from
// poision "2*precision - 1" to "precision - 1". The exponent need to be
// adjusted by "2*precision - 1" - "precision - 1" = "precision".
- exponent -= precision;
+ exponent -= precision + 1;
// In case MSB resides at the left-hand side of radix point, shift the
// mantissa right by some amount to make sure the MSB reside right before
@@ -1801,7 +1812,7 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
extended-precision calculation. */
if (isFiniteNonZero() &&
multiplicand.isFiniteNonZero() &&
- addend.isFiniteNonZero()) {
+ addend.isFinite()) {
lostFraction lost_fraction;
lost_fraction = multiplySignificand(multiplicand, &addend);
@@ -3377,7 +3388,9 @@ void APFloat::makeLargest(bool Negative) {
// internal consistency.
const unsigned NumUnusedHighBits =
PartCount*integerPartWidth - semantics->precision;
- significand[PartCount - 1] = ~integerPart(0) >> NumUnusedHighBits;
+ significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
+ ? (~integerPart(0) >> NumUnusedHighBits)
+ : 0;
}
/// Make this number the smallest magnitude denormal number in the given
@@ -3904,3 +3917,20 @@ APFloat::makeZero(bool Negative) {
exponent = semantics->minExponent-1;
APInt::tcSet(significandParts(), 0, partCount());
}
+
+APFloat llvm::scalbn(APFloat X, int Exp) {
+ if (X.isInfinity() || X.isZero() || X.isNaN())
+ return std::move(X);
+
+ auto MaxExp = X.getSemantics().maxExponent;
+ auto MinExp = X.getSemantics().minExponent;
+ if (Exp > (MaxExp - X.exponent))
+ // Overflow saturates to infinity.
+ return APFloat::getInf(X.getSemantics(), X.isNegative());
+ if (Exp < (MinExp - X.exponent))
+ // Underflow saturates to zero.
+ return APFloat::getZero(X.getSemantics(), X.isNegative());
+
+ X.exponent += Exp;
+ return std::move(X);
+}
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index fa929eb..c20eeb2 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -454,8 +454,10 @@ APInt APInt::XorSlowCase(const APInt& RHS) const {
for (unsigned i = 0; i < numWords; ++i)
val[i] = pVal[i] ^ RHS.pVal[i];
+ APInt Result(val, getBitWidth());
// 0^0==1 so clear the high bits in case they got set.
- return APInt(val, getBitWidth()).clearUnusedBits();
+ Result.clearUnusedBits();
+ return Result;
}
APInt APInt::operator*(const APInt& RHS) const {
@@ -473,7 +475,8 @@ APInt APInt::operator+(const APInt& RHS) const {
return APInt(BitWidth, VAL + RHS.VAL);
APInt Result(BitWidth, 0);
add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
- return Result.clearUnusedBits();
+ Result.clearUnusedBits();
+ return Result;
}
APInt APInt::operator-(const APInt& RHS) const {
@@ -482,7 +485,8 @@ APInt APInt::operator-(const APInt& RHS) const {
return APInt(BitWidth, VAL - RHS.VAL);
APInt Result(BitWidth, 0);
sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
- return Result.clearUnusedBits();
+ Result.clearUnusedBits();
+ return Result;
}
bool APInt::EqualSlowCase(const APInt& RHS) const {
@@ -1114,7 +1118,9 @@ APInt APInt::ashr(unsigned shiftAmt) const {
uint64_t fillValue = (isNegative() ? -1ULL : 0);
for (unsigned i = breakWord+1; i < getNumWords(); ++i)
val[i] = fillValue;
- return APInt(val, BitWidth).clearUnusedBits();
+ APInt Result(val, BitWidth);
+ Result.clearUnusedBits();
+ return Result;
}
/// Logical right-shift this APInt by shiftAmt.
@@ -1151,7 +1157,9 @@ APInt APInt::lshr(unsigned shiftAmt) const {
// If we are shifting less than a word, compute the shift with a simple carry
if (shiftAmt < APINT_BITS_PER_WORD) {
lshrNear(val, pVal, getNumWords(), shiftAmt);
- return APInt(val, BitWidth).clearUnusedBits();
+ APInt Result(val, BitWidth);
+ Result.clearUnusedBits();
+ return Result;
}
// Compute some values needed by the remaining shift algorithms
@@ -1164,7 +1172,9 @@ APInt APInt::lshr(unsigned shiftAmt) const {
val[i] = pVal[i+offset];
for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
val[i] = 0;
- return APInt(val,BitWidth).clearUnusedBits();
+ APInt Result(val, BitWidth);
+ Result.clearUnusedBits();
+ return Result;
}
// Shift the low order words
@@ -1178,7 +1188,9 @@ APInt APInt::lshr(unsigned shiftAmt) const {
// Remaining words are 0
for (unsigned i = breakWord+1; i < getNumWords(); ++i)
val[i] = 0;
- return APInt(val, BitWidth).clearUnusedBits();
+ APInt Result(val, BitWidth);
+ Result.clearUnusedBits();
+ return Result;
}
/// Left-shift this APInt by shiftAmt.
@@ -1211,7 +1223,9 @@ APInt APInt::shlSlowCase(unsigned shiftAmt) const {
val[i] = pVal[i] << shiftAmt | carry;
carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
}
- return APInt(val, BitWidth).clearUnusedBits();
+ APInt Result(val, BitWidth);
+ Result.clearUnusedBits();
+ return Result;
}
// Compute some values needed by the remaining shift algorithms
@@ -1224,7 +1238,9 @@ APInt APInt::shlSlowCase(unsigned shiftAmt) const {
val[i] = 0;
for (unsigned i = offset; i < getNumWords(); i++)
val[i] = pVal[i-offset];
- return APInt(val,BitWidth).clearUnusedBits();
+ APInt Result(val, BitWidth);
+ Result.clearUnusedBits();
+ return Result;
}
// Copy whole words from this to Result.
@@ -1235,7 +1251,9 @@ APInt APInt::shlSlowCase(unsigned shiftAmt) const {
val[offset] = pVal[0] << wordShift;
for (i = 0; i < offset; ++i)
val[i] = 0;
- return APInt(val, BitWidth).clearUnusedBits();
+ APInt Result(val, BitWidth);
+ Result.clearUnusedBits();
+ return Result;
}
APInt APInt::rotl(const APInt &rotateAmt) const {
@@ -1303,7 +1321,7 @@ APInt APInt::sqrt() const {
// Okay, all the short cuts are exhausted. We must compute it. The following
// is a classical Babylonian method for computing the square root. This code
- // was adapted to APINt from a wikipedia article on such computations.
+ // was adapted to APInt from a wikipedia article on such computations.
// See http://www.wikipedia.org/ and go to the page named
// Calculate_an_integer_square_root.
unsigned nbits = BitWidth, i = 4;
@@ -2046,19 +2064,29 @@ APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
return Res;
}
-APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
- Overflow = ShAmt >= getBitWidth();
+APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
+ Overflow = ShAmt.uge(getBitWidth());
if (Overflow)
- ShAmt = getBitWidth()-1;
+ return APInt(BitWidth, 0);
if (isNonNegative()) // Don't allow sign change.
- Overflow = ShAmt >= countLeadingZeros();
+ Overflow = ShAmt.uge(countLeadingZeros());
else
- Overflow = ShAmt >= countLeadingOnes();
+ Overflow = ShAmt.uge(countLeadingOnes());
return *this << ShAmt;
}
+APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
+ Overflow = ShAmt.uge(getBitWidth());
+ if (Overflow)
+ return APInt(BitWidth, 0);
+
+ Overflow = ShAmt.ugt(countLeadingZeros());
+
+ return *this << ShAmt;
+}
+
@@ -2270,8 +2298,7 @@ void APInt::print(raw_ostream &OS, bool isSigned) const {
// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
// and unrestricting assumption.
-#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
-COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
+static_assert(integerPartWidth % 2 == 0, "Part width must be divisible by 2!");
/* Some handy functions local to this file. */
namespace {
diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk
index 7968697..34448a7 100644
--- a/lib/Support/Android.mk
+++ b/lib/Support/Android.mk
@@ -22,7 +22,6 @@ support_SRC_FILES := \
DataExtractor.cpp \
Debug.cpp \
DeltaAlgorithm.cpp \
- Disassembler.cpp \
Dwarf.cpp \
DynamicLibrary.cpp \
Errno.cpp \
@@ -44,10 +43,12 @@ support_SRC_FILES := \
LockFileManager.cpp \
MD5.cpp \
ManagedStatic.cpp \
+ MathExtras.cpp \
Memory.cpp \
MemoryBuffer.cpp \
MemoryObject.cpp \
Mutex.cpp \
+ Options.cpp \
Path.cpp \
PluginLoader.cpp \
PrettyStackTrace.cpp \
@@ -61,15 +62,14 @@ support_SRC_FILES := \
Signals.cpp \
SmallPtrSet.cpp \
SmallVector.cpp \
+ StreamingMemoryObject.cpp \
SourceMgr.cpp \
SpecialCaseList.cpp \
Statistic.cpp \
- StreamableMemoryObject.cpp \
StringExtras.cpp \
StringMap.cpp \
StringPool.cpp \
StringRef.cpp \
- StringRefMemoryObject.cpp \
SystemUtils.cpp \
TargetRegistry.cpp \
Threading.cpp \
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 9ecd559..fa62591 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -1,3 +1,32 @@
+set(system_libs)
+if( NOT MSVC )
+ if( MINGW )
+ set(system_libs ${system_libs} imagehlp psapi shell32)
+ elseif( CMAKE_HOST_UNIX )
+ if( HAVE_LIBRT )
+ set(system_libs ${system_libs} rt)
+ endif()
+ if( HAVE_LIBDL )
+ set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
+ endif()
+ if(LLVM_ENABLE_TERMINFO)
+ if(HAVE_TERMINFO)
+ set(system_libs ${system_libs} ${TERMINFO_LIBS})
+ endif()
+ endif()
+ if( LLVM_ENABLE_THREADS AND HAVE_LIBATOMIC )
+ set(system_libs ${system_libs} atomic)
+ endif()
+ if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+ set(system_libs ${system_libs} pthread)
+ endif()
+ if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
+ set(system_libs ${system_libs} z)
+ endif()
+ set(system_libs ${system_libs} m)
+ endif( MINGW )
+endif( NOT MSVC )
+
add_llvm_library(LLVMSupport
APFloat.cpp
APInt.cpp
@@ -36,9 +65,11 @@ add_llvm_library(LLVMSupport
Locale.cpp
LockFileManager.cpp
ManagedStatic.cpp
+ MathExtras.cpp
MemoryBuffer.cpp
MemoryObject.cpp
MD5.cpp
+ Options.cpp
PluginLoader.cpp
PrettyStackTrace.cpp
RandomNumberGenerator.cpp
@@ -49,12 +80,11 @@ add_llvm_library(LLVMSupport
SourceMgr.cpp
SpecialCaseList.cpp
Statistic.cpp
- StreamableMemoryObject.cpp
+ StreamingMemoryObject.cpp
StringExtras.cpp
StringMap.cpp
StringPool.cpp
StringRef.cpp
- StringRefMemoryObject.cpp
SystemUtils.cpp
Timer.cpp
ToolOutputFile.cpp
@@ -73,11 +103,9 @@ add_llvm_library(LLVMSupport
# System
Atomic.cpp
- Disassembler.cpp
DynamicLibrary.cpp
Errno.cpp
Host.cpp
- IncludeFile.cpp
Memory.cpp
Mutex.cpp
Path.cpp
@@ -117,38 +145,8 @@ add_llvm_library(LLVMSupport
Windows/ThreadLocal.inc
Windows/TimeValue.inc
Windows/Watchdog.inc
- )
-set(system_libs)
-if( NOT MSVC )
- if( MINGW )
- set(system_libs ${system_libs} imagehlp psapi shell32)
- elseif( CMAKE_HOST_UNIX )
- if( HAVE_LIBRT )
- set(system_libs ${system_libs} rt)
- endif()
- if( HAVE_LIBDL )
- set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
- endif()
- if(LLVM_ENABLE_TERMINFO)
- if(HAVE_TERMINFO)
- set(system_libs ${system_libs} ${TERMINFO_LIBS})
- endif()
- endif()
- if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
- set(system_libs ${system_libs} pthread)
- endif()
- if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
- set(system_libs ${system_libs} z)
- endif()
- endif( MINGW )
-endif( NOT MSVC )
-
-if(POLICY CMP0022 AND BUILD_SHARED_LIBS)
- # FIXME: Should this be really PUBLIC?
- target_link_libraries(LLVMSupport PUBLIC ${system_libs})
-else()
- target_link_libraries(LLVMSupport ${cmake_2_8_12_INTERFACE} ${system_libs})
-endif()
+ LINK_LIBS ${system_libs}
+ )
set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${system_libs}")
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 87348f7..985c877 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/CommandLine.h"
+#include "llvm-c/Support.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
@@ -113,9 +114,15 @@ void Option::addArgument() {
}
void Option::removeArgument() {
- assert(NextRegistered && "argument never registered");
- assert(RegisteredOptionList == this && "argument is not the last registered");
- RegisteredOptionList = NextRegistered;
+ if (RegisteredOptionList == this) {
+ RegisteredOptionList = NextRegistered;
+ MarkOptionsChanged();
+ return;
+ }
+ Option *O = RegisteredOptionList;
+ for (; O->NextRegistered != this; O = O->NextRegistered)
+ ;
+ O->NextRegistered = NextRegistered;
MarkOptionsChanged();
}
@@ -158,7 +165,7 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
// Handle named options.
for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
// Add argument to the argument map!
- if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
+ if (!OptionsMap.insert(std::make_pair(OptionNames[i], O)).second) {
errs() << ProgramName << ": CommandLine Error: Option '"
<< OptionNames[i] << "' registered more than once!\n";
HadErrors = true;
@@ -474,13 +481,18 @@ static bool isGNUSpecial(char C) {
}
void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
- SmallVectorImpl<const char *> &NewArgv) {
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs) {
SmallString<128> Token;
for (size_t I = 0, E = Src.size(); I != E; ++I) {
// Consume runs of whitespace.
if (Token.empty()) {
- while (I != E && isWhitespace(Src[I]))
+ while (I != E && isWhitespace(Src[I])) {
+ // Mark the end of lines in response files
+ if (MarkEOLs && Src[I] == '\n')
+ NewArgv.push_back(nullptr);
++I;
+ }
if (I == E) break;
}
@@ -521,6 +533,9 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
// Append the last token after hitting EOF with no whitespace.
if (!Token.empty())
NewArgv.push_back(Saver.SaveString(Token.c_str()));
+ // Mark the end of response files
+ if (MarkEOLs)
+ NewArgv.push_back(nullptr);
}
/// Backslashes are interpreted in a rather complicated way in the Windows-style
@@ -562,7 +577,8 @@ static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) {
}
void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
- SmallVectorImpl<const char *> &NewArgv) {
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs) {
SmallString<128> Token;
// This is a small state machine to consume characters until it reaches the
@@ -572,8 +588,12 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
// INIT state indicates that the current input index is at the start of
// the string or between tokens.
if (State == INIT) {
- if (isWhitespace(Src[I]))
+ if (isWhitespace(Src[I])) {
+ // Mark the end of lines in response files
+ if (MarkEOLs && Src[I] == '\n')
+ NewArgv.push_back(nullptr);
continue;
+ }
if (Src[I] == '"') {
State = QUOTED;
continue;
@@ -596,6 +616,9 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
NewArgv.push_back(Saver.SaveString(Token.c_str()));
Token.clear();
State = INIT;
+ // Mark the end of lines in response files
+ if (MarkEOLs && Src[I] == '\n')
+ NewArgv.push_back(nullptr);
continue;
}
if (Src[I] == '"') {
@@ -626,20 +649,24 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
// Append the last token after hitting EOF with no whitespace.
if (!Token.empty())
NewArgv.push_back(Saver.SaveString(Token.c_str()));
+ // Mark the end of response files
+ if (MarkEOLs)
+ NewArgv.push_back(nullptr);
}
static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
TokenizerCallback Tokenizer,
- SmallVectorImpl<const char *> &NewArgv) {
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs = false) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
MemoryBuffer::getFile(FName);
if (!MemBufOrErr)
return false;
- std::unique_ptr<MemoryBuffer> MemBuf = std::move(MemBufOrErr.get());
- StringRef Str(MemBuf->getBufferStart(), MemBuf->getBufferSize());
+ MemoryBuffer &MemBuf = *MemBufOrErr.get();
+ StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize());
// If we have a UTF-16 byte order mark, convert to UTF-8 for parsing.
- ArrayRef<char> BufRef(MemBuf->getBufferStart(), MemBuf->getBufferEnd());
+ ArrayRef<char> BufRef(MemBuf.getBufferStart(), MemBuf.getBufferEnd());
std::string UTF8Buf;
if (hasUTF16ByteOrderMark(BufRef)) {
if (!convertUTF16ToUTF8String(BufRef, UTF8Buf))
@@ -648,7 +675,7 @@ static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
}
// Tokenize the contents into NewArgv.
- Tokenizer(Str, Saver, NewArgv);
+ Tokenizer(Str, Saver, NewArgv, MarkEOLs);
return true;
}
@@ -656,13 +683,19 @@ static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
/// \brief Expand response files on a command line recursively using the given
/// StringSaver and tokenization strategy.
bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
- SmallVectorImpl<const char *> &Argv) {
+ SmallVectorImpl<const char *> &Argv,
+ bool MarkEOLs) {
unsigned RspFiles = 0;
bool AllExpanded = true;
// Don't cache Argv.size() because it can change.
for (unsigned I = 0; I != Argv.size(); ) {
const char *Arg = Argv[I];
+ // Check if it is an EOL marker
+ if (Arg == nullptr) {
+ ++I;
+ continue;
+ }
if (Arg[0] != '@') {
++I;
continue;
@@ -678,7 +711,8 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
// FIXME: If a nested response file uses a relative path, is it relative to
// the cwd of the process or the response file?
SmallVector<const char *, 0> ExpandedArgv;
- if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv)) {
+ if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv,
+ MarkEOLs)) {
// We couldn't read this file, so we leave it in the argument stream and
// move on.
AllExpanded = false;
@@ -1018,13 +1052,12 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
}
// Loop over args and make sure all required args are specified!
- for (StringMap<Option*>::iterator I = Opts.begin(),
- E = Opts.end(); I != E; ++I) {
- switch (I->second->getNumOccurrencesFlag()) {
+ for (const auto &Opt : Opts) {
+ switch (Opt.second->getNumOccurrencesFlag()) {
case Required:
case OneOrMore:
- if (I->second->getNumOccurrences() == 0) {
- I->second->error("must be specified at least once!");
+ if (Opt.second->getNumOccurrences() == 0) {
+ Opt.second->error("must be specified at least once!");
ErrorParsing = true;
}
// Fall through
@@ -1422,7 +1455,7 @@ sortOpts(StringMap<Option*> &OptMap,
continue;
// If we've already seen this option, don't add it to the list again.
- if (!OptionSet.insert(I->second))
+ if (!OptionSet.insert(I->second).second)
continue;
Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
@@ -1807,3 +1840,8 @@ void cl::getRegisteredOptions(StringMap<Option*> &Map)
GetOptionInfo(PositionalOpts, SinkOpts, Map);
return;
}
+
+void LLVMParseCommandLineOptions(int argc, const char *const *argv,
+ const char *Overview) {
+ llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
+}
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index 32653de..dbf6465 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -32,12 +32,12 @@ using namespace llvm;
#define DEBUG_TYPE "Data-stream"
// Interface goals:
-// * StreamableMemoryObject doesn't care about complexities like using
+// * StreamingMemoryObject doesn't care about complexities like using
// threads/async callbacks to actually overlap download+compile
// * Don't want to duplicate Data in memory
// * Don't need to know total Data len in advance
// Non-goals:
-// StreamableMemoryObject already has random access so this interface only does
+// StreamingMemoryObject already has random access so this interface only does
// in-order streaming (no arbitrary seeking, else we'd have to buffer all the
// Data here in addition to MemoryObject). This also means that if we want
// to be able to to free Data, BitstreamBytes/BitcodeReader will implement it
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index ad4d4ef..8246542 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -50,14 +51,14 @@ DebugBufferSize("debug-buffer-size",
cl::Hidden,
cl::init(0));
-static std::string CurrentDebugType;
+static ManagedStatic<std::string> CurrentDebugType;
namespace {
struct DebugOnlyOpt {
void operator=(const std::string &Val) const {
DebugFlag |= !Val.empty();
- CurrentDebugType = Val;
+ *CurrentDebugType = Val;
}
};
@@ -86,7 +87,7 @@ static void debug_user_sig_handler(void *Cookie) {
// with the -debug-only=X option.
//
bool llvm::isCurrentDebugType(const char *DebugType) {
- return CurrentDebugType.empty() || DebugType == CurrentDebugType;
+ return CurrentDebugType->empty() || DebugType == *CurrentDebugType;
}
/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X
@@ -94,7 +95,7 @@ bool llvm::isCurrentDebugType(const char *DebugType) {
/// debug output to be produced.
///
void llvm::setCurrentDebugType(const char *Type) {
- CurrentDebugType = Type;
+ *CurrentDebugType = Type;
}
/// dbgs - Return a circular-buffered debug stream.
diff --git a/lib/Support/Disassembler.cpp b/lib/Support/Disassembler.cpp
deleted file mode 100644
index 27df3a9..0000000
--- a/lib/Support/Disassembler.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- lib/Support/Disassembler.cpp -----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the necessary glue to call external disassembler
-// libraries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Disassembler.h"
-#include "llvm/Config/config.h"
-#include <cassert>
-#include <iomanip>
-#include <sstream>
-#include <string>
-
-#if USE_UDIS86
-#include <udis86.h>
-#endif
-
-using namespace llvm;
-
-bool llvm::sys::hasDisassembler()
-{
-#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
- // We have option to enable udis86 library.
-# if USE_UDIS86
- return true;
-#else
- return false;
-#endif
-#else
- return false;
-#endif
-}
-
-std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
- uint64_t pc) {
-#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
- && USE_UDIS86
- std::stringstream res;
-
- unsigned bits;
-# if defined(__i386__)
- bits = 32;
-# else
- bits = 64;
-# endif
-
- ud_t ud_obj;
-
- ud_init(&ud_obj);
- ud_set_input_buffer(&ud_obj, start, length);
- ud_set_mode(&ud_obj, bits);
- ud_set_pc(&ud_obj, pc);
- ud_set_syntax(&ud_obj, UD_SYN_ATT);
-
- res << std::setbase(16)
- << std::setw(bits/4);
-
- while (ud_disassemble(&ud_obj)) {
- res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
- }
-
- return res.str();
-#else
- return "No disassembler available. See configure help for options.\n";
-#endif
-}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index c9efa61..4b6337e 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -17,8 +17,6 @@
using namespace llvm;
using namespace dwarf;
-/// TagString - Return the string for the specified tag.
-///
const char *llvm::dwarf::TagString(unsigned Tag) {
switch (Tag) {
case DW_TAG_array_type: return "DW_TAG_array_type";
@@ -82,6 +80,7 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
case DW_TAG_hi_user: return "DW_TAG_hi_user";
case DW_TAG_auto_variable: return "DW_TAG_auto_variable";
case DW_TAG_arg_variable: return "DW_TAG_arg_variable";
+ case DW_TAG_expression: return "DW_TAG_expression";
case DW_TAG_rvalue_reference_type: return "DW_TAG_rvalue_reference_type";
case DW_TAG_template_alias: return "DW_TAG_template_alias";
case DW_TAG_coarray_type: return "DW_TAG_coarray_type";
@@ -103,8 +102,6 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
return nullptr;
}
-/// ChildrenString - Return the string for the specified children flag.
-///
const char *llvm::dwarf::ChildrenString(unsigned Children) {
switch (Children) {
case DW_CHILDREN_no: return "DW_CHILDREN_no";
@@ -113,8 +110,6 @@ const char *llvm::dwarf::ChildrenString(unsigned Children) {
return nullptr;
}
-/// AttributeString - Return the string for the specified attribute.
-///
const char *llvm::dwarf::AttributeString(unsigned Attribute) {
switch (Attribute) {
case DW_AT_sibling: return "DW_AT_sibling";
@@ -274,8 +269,6 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
return nullptr;
}
-/// FormEncodingString - Return the string for the specified form encoding.
-///
const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
switch (Encoding) {
case DW_FORM_addr: return "DW_FORM_addr";
@@ -311,8 +304,6 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
return nullptr;
}
-/// OperationEncodingString - Return the string for the specified operation
-/// encoding.
const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
switch (Encoding) {
case DW_OP_addr: return "DW_OP_addr";
@@ -480,8 +471,6 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
return nullptr;
}
-/// AttributeEncodingString - Return the string for the specified attribute
-/// encoding.
const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
switch (Encoding) {
case DW_ATE_address: return "DW_ATE_address";
@@ -506,8 +495,6 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
return nullptr;
}
-/// DecimalSignString - Return the string for the specified decimal sign
-/// attribute.
const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
switch (Sign) {
case DW_DS_unsigned: return "DW_DS_unsigned";
@@ -519,8 +506,6 @@ const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
return nullptr;
}
-/// EndianityString - Return the string for the specified endianity.
-///
const char *llvm::dwarf::EndianityString(unsigned Endian) {
switch (Endian) {
case DW_END_default: return "DW_END_default";
@@ -532,8 +517,6 @@ const char *llvm::dwarf::EndianityString(unsigned Endian) {
return nullptr;
}
-/// AccessibilityString - Return the string for the specified accessibility.
-///
const char *llvm::dwarf::AccessibilityString(unsigned Access) {
switch (Access) {
// Accessibility codes
@@ -544,8 +527,6 @@ const char *llvm::dwarf::AccessibilityString(unsigned Access) {
return nullptr;
}
-/// VisibilityString - Return the string for the specified visibility.
-///
const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
switch (Visibility) {
case DW_VIS_local: return "DW_VIS_local";
@@ -555,8 +536,6 @@ const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
return nullptr;
}
-/// VirtualityString - Return the string for the specified virtuality.
-///
const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
switch (Virtuality) {
case DW_VIRTUALITY_none: return "DW_VIRTUALITY_none";
@@ -566,8 +545,6 @@ const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
return nullptr;
}
-/// LanguageString - Return the string for the specified language.
-///
const char *llvm::dwarf::LanguageString(unsigned Language) {
switch (Language) {
case DW_LANG_C89: return "DW_LANG_C89";
@@ -598,13 +575,12 @@ const char *llvm::dwarf::LanguageString(unsigned Language) {
case DW_LANG_C_plus_plus_11: return "DW_LANG_C_plus_plus_11";
case DW_LANG_OCaml: return "DW_LANG_OCaml";
case DW_LANG_lo_user: return "DW_LANG_lo_user";
+ case DW_LANG_Mips_Assembler: return "DW_LANG_Mips_Assembler";
case DW_LANG_hi_user: return "DW_LANG_hi_user";
}
return nullptr;
}
-/// CaseString - Return the string for the specified identifier case.
-///
const char *llvm::dwarf::CaseString(unsigned Case) {
switch (Case) {
case DW_ID_case_sensitive: return "DW_ID_case_sensitive";
@@ -615,8 +591,6 @@ const char *llvm::dwarf::CaseString(unsigned Case) {
return nullptr;
}
-/// ConventionString - Return the string for the specified calling convention.
-///
const char *llvm::dwarf::ConventionString(unsigned Convention) {
switch (Convention) {
case DW_CC_normal: return "DW_CC_normal";
@@ -628,8 +602,6 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) {
return nullptr;
}
-/// InlineCodeString - Return the string for the specified inline code.
-///
const char *llvm::dwarf::InlineCodeString(unsigned Code) {
switch (Code) {
case DW_INL_not_inlined: return "DW_INL_not_inlined";
@@ -640,8 +612,6 @@ const char *llvm::dwarf::InlineCodeString(unsigned Code) {
return nullptr;
}
-/// ArrayOrderString - Return the string for the specified array order.
-///
const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
switch (Order) {
case DW_ORD_row_major: return "DW_ORD_row_major";
@@ -650,8 +620,6 @@ const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
return nullptr;
}
-/// DiscriminantString - Return the string for the specified discriminant
-/// descriptor.
const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
switch (Discriminant) {
case DW_DSC_label: return "DW_DSC_label";
@@ -660,8 +628,6 @@ const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
return nullptr;
}
-/// LNStandardString - Return the string for the specified line number standard.
-///
const char *llvm::dwarf::LNStandardString(unsigned Standard) {
switch (Standard) {
case DW_LNS_copy: return "DW_LNS_copy";
@@ -680,8 +646,6 @@ const char *llvm::dwarf::LNStandardString(unsigned Standard) {
return nullptr;
}
-/// LNExtendedString - Return the string for the specified line number extended
-/// opcode encodings.
const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
switch (Encoding) {
// Line Number Extended Opcode Encodings
@@ -695,8 +659,6 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
return nullptr;
}
-/// MacinfoString - Return the string for the specified macinfo type encodings.
-///
const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
switch (Encoding) {
// Macinfo Type Encodings
@@ -709,8 +671,6 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
return nullptr;
}
-/// CallFrameString - Return the string for the specified call frame instruction
-/// encodings.
const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
switch (Encoding) {
case DW_CFA_nop: return "DW_CFA_nop";
@@ -748,6 +708,36 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
return nullptr;
}
+const char *llvm::dwarf::ApplePropertyString(unsigned Prop) {
+ switch (Prop) {
+ case DW_APPLE_PROPERTY_readonly:
+ return "DW_APPLE_PROPERTY_readonly";
+ case DW_APPLE_PROPERTY_getter:
+ return "DW_APPLE_PROPERTY_getter";
+ case DW_APPLE_PROPERTY_assign:
+ return "DW_APPLE_PROPERTY_assign";
+ case DW_APPLE_PROPERTY_readwrite:
+ return "DW_APPLE_PROPERTY_readwrite";
+ case DW_APPLE_PROPERTY_retain:
+ return "DW_APPLE_PROPERTY_retain";
+ case DW_APPLE_PROPERTY_copy:
+ return "DW_APPLE_PROPERTY_copy";
+ case DW_APPLE_PROPERTY_nonatomic:
+ return "DW_APPLE_PROPERTY_nonatomic";
+ case DW_APPLE_PROPERTY_setter:
+ return "DW_APPLE_PROPERTY_setter";
+ case DW_APPLE_PROPERTY_atomic:
+ return "DW_APPLE_PROPERTY_atomic";
+ case DW_APPLE_PROPERTY_weak:
+ return "DW_APPLE_PROPERTY_weak";
+ case DW_APPLE_PROPERTY_strong:
+ return "DW_APPLE_PROPERTY_strong";
+ case DW_APPLE_PROPERTY_unsafe_unretained:
+ return "DW_APPLE_PROPERTY_unsafe_unretained";
+ }
+ return nullptr;
+}
+
const char *llvm::dwarf::AtomTypeString(unsigned AT) {
switch (AT) {
case dwarf::DW_ATOM_null:
@@ -795,3 +785,34 @@ const char *llvm::dwarf::GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage
}
llvm_unreachable("Unknown GDBIndexEntryLinkage value");
}
+
+const char *llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
+ switch (Attr) {
+ case DW_AT_accessibility:
+ return AccessibilityString(Val);
+ case DW_AT_virtuality:
+ return VirtualityString(Val);
+ case DW_AT_language:
+ return LanguageString(Val);
+ case DW_AT_encoding:
+ return AttributeEncodingString(Val);
+ case DW_AT_decimal_sign:
+ return DecimalSignString(Val);
+ case DW_AT_endianity:
+ return EndianityString(Val);
+ case DW_AT_visibility:
+ return VisibilityString(Val);
+ case DW_AT_identifier_case:
+ return CaseString(Val);
+ case DW_AT_calling_convention:
+ return ConventionString(Val);
+ case DW_AT_inline:
+ return InlineCodeString(Val);
+ case DW_AT_ordering:
+ return ArrayOrderString(Val);
+ case DW_AT_discr_value:
+ return DiscriminantString(Val);
+ }
+
+ return nullptr;
+}
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index c36007f..8e65066 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Signals.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/Threading.h"
@@ -41,18 +42,18 @@ using namespace llvm;
static fatal_error_handler_t ErrorHandler = nullptr;
static void *ErrorHandlerUserData = nullptr;
-static sys::Mutex ErrorHandlerMutex;
+static ManagedStatic<sys::Mutex> ErrorHandlerMutex;
void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
void *user_data) {
- llvm::MutexGuard Lock(ErrorHandlerMutex);
+ llvm::MutexGuard Lock(*ErrorHandlerMutex);
assert(!ErrorHandler && "Error handler already registered!\n");
ErrorHandler = handler;
ErrorHandlerUserData = user_data;
}
void llvm::remove_fatal_error_handler() {
- llvm::MutexGuard Lock(ErrorHandlerMutex);
+ llvm::MutexGuard Lock(*ErrorHandlerMutex);
ErrorHandler = nullptr;
ErrorHandlerUserData = nullptr;
}
@@ -75,7 +76,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
{
// Only acquire the mutex while reading the handler, so as not to invoke a
// user-supplied callback under a lock.
- llvm::MutexGuard Lock(ErrorHandlerMutex);
+ llvm::MutexGuard Lock(*ErrorHandlerMutex);
handler = ErrorHandler;
handlerData = ErrorHandlerUserData;
}
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 2e740ca..c62655d 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -14,18 +14,16 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>
using llvm::sys::fs::mapped_file_region;
namespace llvm {
-FileOutputBuffer::FileOutputBuffer(mapped_file_region * R,
+FileOutputBuffer::FileOutputBuffer(std::unique_ptr<mapped_file_region> R,
StringRef Path, StringRef TmpPath)
- : Region(R)
- , FinalPath(Path)
- , TempPath(TmpPath) {
-}
+ : Region(std::move(R)), FinalPath(Path), TempPath(TmpPath) {}
FileOutputBuffer::~FileOutputBuffer() {
sys::fs::remove(Twine(TempPath));
@@ -73,21 +71,20 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size,
if (EC)
return EC;
- std::unique_ptr<mapped_file_region> MappedFile(new mapped_file_region(
- FD, true, mapped_file_region::readwrite, Size, 0, EC));
+ auto MappedFile = llvm::make_unique<mapped_file_region>(
+ FD, true, mapped_file_region::readwrite, Size, 0, EC);
if (EC)
return EC;
- Result.reset(new FileOutputBuffer(MappedFile.get(), FilePath, TempFilePath));
- if (Result)
- MappedFile.release();
+ Result.reset(
+ new FileOutputBuffer(std::move(MappedFile), FilePath, TempFilePath));
return std::error_code();
}
std::error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
// Unmap buffer, letting OS flush dirty pages to file on disk.
- Region.reset(nullptr);
+ Region.reset();
// If requested, resize file as part of commit.
if ( NewSmallerSize != -1 ) {
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 8a23491..5316f04 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -182,7 +182,7 @@ int llvm::DiffFilesWithTolerance(StringRef NameA,
*Error = EC.message();
return 2;
}
- std::unique_ptr<MemoryBuffer> F1 = std::move(F1OrErr.get());
+ MemoryBuffer &F1 = *F1OrErr.get();
ErrorOr<std::unique_ptr<MemoryBuffer>> F2OrErr = MemoryBuffer::getFile(NameB);
if (std::error_code EC = F2OrErr.getError()) {
@@ -190,17 +190,17 @@ int llvm::DiffFilesWithTolerance(StringRef NameA,
*Error = EC.message();
return 2;
}
- std::unique_ptr<MemoryBuffer> F2 = std::move(F2OrErr.get());
+ MemoryBuffer &F2 = *F2OrErr.get();
// Okay, now that we opened the files, scan them for the first difference.
- const char *File1Start = F1->getBufferStart();
- const char *File2Start = F2->getBufferStart();
- const char *File1End = F1->getBufferEnd();
- const char *File2End = F2->getBufferEnd();
+ const char *File1Start = F1.getBufferStart();
+ const char *File2Start = F2.getBufferStart();
+ const char *File1End = F1.getBufferEnd();
+ const char *File2End = F2.getBufferEnd();
const char *F1P = File1Start;
const char *F2P = File2Start;
- uint64_t A_size = F1->getBufferSize();
- uint64_t B_size = F2->getBufferSize();
+ uint64_t A_size = F1.getBufferSize();
+ uint64_t B_size = F2.getBufferSize();
// Are the buffers identical? Common case: Handle this efficiently.
if (A_size == B_size &&
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index e68ee43..054df52 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -105,9 +105,10 @@ struct GraphSession {
SmallVector<StringRef, 8> parts;
Names.split(parts, "|");
for (auto Name : parts) {
- ProgramPath = sys::FindProgramByName(Name);
- if (!ProgramPath.empty())
+ if (ErrorOr<std::string> P = sys::findProgramByName(Name)) {
+ ProgramPath = *P;
return true;
+ }
Log << " Tried '" << Name << "'\n";
}
return false;
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index e2dd6d5..8782e2e 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -759,13 +759,13 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
#endif
if (LLVMFeatureStr != "")
- Features.GetOrCreateValue(LLVMFeatureStr).setValue(true);
+ Features[LLVMFeatureStr] = true;
}
#if defined(__aarch64__)
// If we have all crypto bits we can add the feature
if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
- Features.GetOrCreateValue("crypto").setValue(true);
+ Features["crypto"] = true;
#endif
return true;
diff --git a/lib/Support/IncludeFile.cpp b/lib/Support/IncludeFile.cpp
deleted file mode 100644
index e67acb3..0000000
--- a/lib/Support/IncludeFile.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===- lib/Support/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the IncludeFile constructor.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/IncludeFile.h"
-
-using namespace llvm;
-
-// This constructor is used to ensure linking of other modules. See the
-// llvm/Support/IncludeFile.h header for details.
-IncludeFile::IncludeFile(const void*) {}
diff --git a/lib/Support/LineIterator.cpp b/lib/Support/LineIterator.cpp
index 947a8fb..5baa1a3 100644
--- a/lib/Support/LineIterator.cpp
+++ b/lib/Support/LineIterator.cpp
@@ -12,16 +12,39 @@
using namespace llvm;
-line_iterator::line_iterator(const MemoryBuffer &Buffer, char CommentMarker)
+static bool isAtLineEnd(const char *P) {
+ if (*P == '\n')
+ return true;
+ if (*P == '\r' && *(P + 1) == '\n')
+ return true;
+ return false;
+}
+
+static bool skipIfAtLineEnd(const char *&P) {
+ if (*P == '\n') {
+ ++P;
+ return true;
+ }
+ if (*P == '\r' && *(P + 1) == '\n') {
+ P += 2;
+ return true;
+ }
+ return false;
+}
+
+line_iterator::line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks,
+ char CommentMarker)
: Buffer(Buffer.getBufferSize() ? &Buffer : nullptr),
- CommentMarker(CommentMarker), LineNumber(1),
+ CommentMarker(CommentMarker), SkipBlanks(SkipBlanks), LineNumber(1),
CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr,
0) {
// Ensure that if we are constructed on a non-empty memory buffer that it is
// a null terminated buffer.
if (Buffer.getBufferSize()) {
assert(Buffer.getBufferEnd()[0] == '\0');
- advance();
+ // Make sure we don't skip a leading newline if we're keeping blanks
+ if (SkipBlanks || !isAtLineEnd(Buffer.getBufferStart()))
+ advance();
}
}
@@ -29,25 +52,27 @@ void line_iterator::advance() {
assert(Buffer && "Cannot advance past the end!");
const char *Pos = CurrentLine.end();
- assert(Pos == Buffer->getBufferStart() || *Pos == '\n' || *Pos == '\0');
+ assert(Pos == Buffer->getBufferStart() || isAtLineEnd(Pos) || *Pos == '\0');
- if (CommentMarker == '\0') {
+ if (skipIfAtLineEnd(Pos))
+ ++LineNumber;
+ if (!SkipBlanks && isAtLineEnd(Pos)) {
+ // Nothing to do for a blank line.
+ } else if (CommentMarker == '\0') {
// If we're not stripping comments, this is simpler.
- size_t Blanks = 0;
- while (Pos[Blanks] == '\n')
- ++Blanks;
- Pos += Blanks;
- LineNumber += Blanks;
+ while (skipIfAtLineEnd(Pos))
+ ++LineNumber;
} else {
// Skip comments and count line numbers, which is a bit more complex.
for (;;) {
+ if (isAtLineEnd(Pos) && !SkipBlanks)
+ break;
if (*Pos == CommentMarker)
do {
++Pos;
- } while (*Pos != '\0' && *Pos != '\n');
- if (*Pos != '\n')
+ } while (*Pos != '\0' && !isAtLineEnd(Pos));
+ if (!skipIfAtLineEnd(Pos))
break;
- ++Pos;
++LineNumber;
}
}
@@ -61,9 +86,9 @@ void line_iterator::advance() {
// Measure the line.
size_t Length = 0;
- do {
+ while (Pos[Length] != '\0' && !isAtLineEnd(&Pos[Length])) {
++Length;
- } while (Pos[Length] != '\0' && Pos[Length] != '\n');
+ }
CurrentLine = StringRef(Pos, Length);
}
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 3f224e0..5b82c36 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -39,11 +39,11 @@ LockFileManager::readLockFile(StringRef LockFileName) {
sys::fs::remove(LockFileName);
return None;
}
- std::unique_ptr<MemoryBuffer> MB = std::move(MBOrErr.get());
+ MemoryBuffer &MB = *MBOrErr.get();
StringRef Hostname;
StringRef PIDStr;
- std::tie(Hostname, PIDStr) = getToken(MB->getBuffer(), " ");
+ std::tie(Hostname, PIDStr) = getToken(MB.getBuffer(), " ");
PIDStr = PIDStr.substr(PIDStr.find_first_not_of(" "));
int PID;
if (!PIDStr.getAsInteger(10, PID)) {
@@ -204,8 +204,8 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
// If the lock file is still expected to be there, check whether it still
// is.
if (!LockFileGone) {
- bool Exists;
- if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) {
+ if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) ==
+ errc::no_such_file_or_directory) {
LockFileGone = true;
LockFileJustDisappeared = true;
}
diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp
index 514466c..ceab580 100644
--- a/lib/Support/MD5.cpp
+++ b/lib/Support/MD5.cpp
@@ -208,11 +208,11 @@ void MD5::update(ArrayRef<uint8_t> Data) {
memcpy(&buffer[used], Ptr, free);
Ptr = Ptr + free;
Size -= free;
- body(ArrayRef<uint8_t>(buffer, 64));
+ body(makeArrayRef(buffer, 64));
}
if (Size >= 64) {
- Ptr = body(ArrayRef<uint8_t>(Ptr, Size & ~(unsigned long) 0x3f));
+ Ptr = body(makeArrayRef(Ptr, Size & ~(unsigned long) 0x3f));
Size &= 0x3f;
}
@@ -229,7 +229,7 @@ void MD5::update(StringRef Str) {
/// \brief Finish the hash and place the resulting hash into \p result.
/// \param result is assumed to be a minimum of 16-bytes in size.
-void MD5::final(MD5Result &result) {
+void MD5::final(MD5Result &Result) {
unsigned long used, free;
used = lo & 0x3f;
@@ -240,7 +240,7 @@ void MD5::final(MD5Result &result) {
if (free < 8) {
memset(&buffer[used], 0, free);
- body(ArrayRef<uint8_t>(buffer, 64));
+ body(makeArrayRef(buffer, 64));
used = 0;
free = 64;
}
@@ -257,30 +257,30 @@ void MD5::final(MD5Result &result) {
buffer[62] = hi >> 16;
buffer[63] = hi >> 24;
- body(ArrayRef<uint8_t>(buffer, 64));
-
- result[0] = a;
- result[1] = a >> 8;
- result[2] = a >> 16;
- result[3] = a >> 24;
- result[4] = b;
- result[5] = b >> 8;
- result[6] = b >> 16;
- result[7] = b >> 24;
- result[8] = c;
- result[9] = c >> 8;
- result[10] = c >> 16;
- result[11] = c >> 24;
- result[12] = d;
- result[13] = d >> 8;
- result[14] = d >> 16;
- result[15] = d >> 24;
+ body(makeArrayRef(buffer, 64));
+
+ Result[0] = a;
+ Result[1] = a >> 8;
+ Result[2] = a >> 16;
+ Result[3] = a >> 24;
+ Result[4] = b;
+ Result[5] = b >> 8;
+ Result[6] = b >> 16;
+ Result[7] = b >> 24;
+ Result[8] = c;
+ Result[9] = c >> 8;
+ Result[10] = c >> 16;
+ Result[11] = c >> 24;
+ Result[12] = d;
+ Result[13] = d >> 8;
+ Result[14] = d >> 16;
+ Result[15] = d >> 24;
}
-void MD5::stringifyResult(MD5Result &result, SmallString<32> &Str) {
+void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
raw_svector_ostream Res(Str);
for (int i = 0; i < 16; ++i)
- Res << format("%.2x", result[i]);
+ Res << format("%.2x", Result[i]);
}
}
diff --git a/lib/Support/MathExtras.cpp b/lib/Support/MathExtras.cpp
new file mode 100644
index 0000000..ba09245
--- /dev/null
+++ b/lib/Support/MathExtras.cpp
@@ -0,0 +1,32 @@
+//===-- MathExtras.cpp - Implement the MathExtras header --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MathExtras.h header
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MathExtras.h"
+
+#ifdef _MSC_VER
+#include <limits>
+#else
+#include <math.h>
+#endif
+
+namespace llvm {
+
+#if defined(_MSC_VER)
+ // Visual Studio defines the HUGE_VAL class of macros using purposeful
+ // constant arithmetic overflow, which it then warns on when encountered.
+ const float huge_valf = std::numeric_limits<float>::infinity();
+#else
+ const float huge_valf = HUGE_VALF;
+#endif
+
+}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 5f4b7da..7eb0752 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -64,14 +64,17 @@ static void CopyStringRef(char *Memory, StringRef Data) {
namespace {
struct NamedBufferAlloc {
- StringRef Name;
- NamedBufferAlloc(StringRef Name) : Name(Name) {}
+ const Twine &Name;
+ NamedBufferAlloc(const Twine &Name) : Name(Name) {}
};
}
void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
- char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
- CopyStringRef(Mem + N, Alloc.Name);
+ SmallString<256> NameBuf;
+ StringRef NameRef = Alloc.Name.toStringRef(NameBuf);
+
+ char *Mem = static_cast<char *>(operator new(N + NameRef.size() + 1));
+ CopyStringRef(Mem + N, NameRef);
return Mem;
}
@@ -94,71 +97,86 @@ public:
};
}
-/// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note
-/// that InputData must be a null terminated if RequiresNullTerminator is true!
-MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
- StringRef BufferName,
- bool RequiresNullTerminator) {
- return new (NamedBufferAlloc(BufferName))
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
+ uint64_t Offset, bool RequiresNullTerminator, bool IsVolatileSize);
+
+std::unique_ptr<MemoryBuffer>
+MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
+ bool RequiresNullTerminator) {
+ auto *Ret = new (NamedBufferAlloc(BufferName))
MemoryBufferMem(InputData, RequiresNullTerminator);
+ return std::unique_ptr<MemoryBuffer>(Ret);
+}
+
+std::unique_ptr<MemoryBuffer>
+MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
+ return std::unique_ptr<MemoryBuffer>(getMemBuffer(
+ Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator));
}
-/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
-/// copying the contents and taking ownership of it. This has no requirements
-/// on EndPtr[0].
-MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
- StringRef BufferName) {
- MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
- if (!Buf) return nullptr;
+std::unique_ptr<MemoryBuffer>
+MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
+ std::unique_ptr<MemoryBuffer> Buf =
+ getNewUninitMemBuffer(InputData.size(), BufferName);
+ if (!Buf)
+ return nullptr;
memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
InputData.size());
return Buf;
}
-/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
-/// that is not initialized. Note that the caller should initialize the
-/// memory allocated by this method. The memory is owned by the MemoryBuffer
-/// object.
-MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
- StringRef BufferName) {
+std::unique_ptr<MemoryBuffer>
+MemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) {
// Allocate space for the MemoryBuffer, the data and the name. It is important
// that MemoryBuffer and data are aligned so PointerIntPair works with them.
// TODO: Is 16-byte alignment enough? We copy small object files with large
// alignment expectations into this buffer.
+ SmallString<256> NameBuf;
+ StringRef NameRef = BufferName.toStringRef(NameBuf);
size_t AlignedStringLen =
- RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1, 16);
+ RoundUpToAlignment(sizeof(MemoryBufferMem) + NameRef.size() + 1, 16);
size_t RealLen = AlignedStringLen + Size + 1;
char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
- if (!Mem) return nullptr;
+ if (!Mem)
+ return nullptr;
// The name is stored after the class itself.
- CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
+ CopyStringRef(Mem + sizeof(MemoryBufferMem), NameRef);
// The buffer begins after the name and must be aligned.
char *Buf = Mem + AlignedStringLen;
Buf[Size] = 0; // Null terminate buffer.
- return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
+ auto *Ret = new (Mem) MemoryBufferMem(StringRef(Buf, Size), true);
+ return std::unique_ptr<MemoryBuffer>(Ret);
}
-/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
-/// is completely initialized to zeros. Note that the caller should
-/// initialize the memory allocated by this method. The memory is owned by
-/// the MemoryBuffer object.
-MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
- MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
- if (!SB) return nullptr;
+std::unique_ptr<MemoryBuffer>
+MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
+ std::unique_ptr<MemoryBuffer> SB = getNewUninitMemBuffer(Size, BufferName);
+ if (!SB)
+ return nullptr;
memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
return SB;
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getFileOrSTDIN(StringRef Filename, int64_t FileSize) {
- if (Filename == "-")
+MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize) {
+ SmallString<256> NameBuf;
+ StringRef NameRef = Filename.toStringRef(NameBuf);
+
+ if (NameRef == "-")
return getSTDIN();
return getFile(Filename, FileSize);
}
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
+ uint64_t Offset) {
+ return getFileAux(FilePath, -1, MapSize, Offset, false, false);
+}
+
//===----------------------------------------------------------------------===//
// MemoryBuffer::getFile implementation.
@@ -206,7 +224,7 @@ public:
}
static ErrorOr<std::unique_ptr<MemoryBuffer>>
-getMemoryBufferForStream(int FD, StringRef BufferName) {
+getMemoryBufferForStream(int FD, const Twine &BufferName) {
const ssize_t ChunkSize = 4096*4;
SmallString<ChunkSize> Buffer;
ssize_t ReadBytes;
@@ -221,40 +239,32 @@ getMemoryBufferForStream(int FD, StringRef BufferName) {
Buffer.set_size(Buffer.size() + ReadBytes);
} while (ReadBytes != 0);
- std::unique_ptr<MemoryBuffer> Ret(
- MemoryBuffer::getMemBufferCopy(Buffer, BufferName));
- return std::move(Ret);
+ return MemoryBuffer::getMemBufferCopy(Buffer, BufferName);
}
-static ErrorOr<std::unique_ptr<MemoryBuffer>>
-getFileAux(const char *Filename, int64_t FileSize, bool RequiresNullTerminator,
- bool IsVolatileSize);
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getFile(Twine Filename, int64_t FileSize,
+MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
bool RequiresNullTerminator, bool IsVolatileSize) {
- // Ensure the path is null terminated.
- SmallString<256> PathBuf;
- StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf);
- return getFileAux(NullTerminatedName.data(), FileSize, RequiresNullTerminator,
- IsVolatileSize);
+ return getFileAux(Filename, FileSize, FileSize, 0,
+ RequiresNullTerminator, IsVolatileSize);
}
static ErrorOr<std::unique_ptr<MemoryBuffer>>
-getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize,
+getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
bool IsVolatileSize);
static ErrorOr<std::unique_ptr<MemoryBuffer>>
-getFileAux(const char *Filename, int64_t FileSize, bool RequiresNullTerminator,
- bool IsVolatileSize) {
+getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
+ uint64_t Offset, bool RequiresNullTerminator, bool IsVolatileSize) {
int FD;
std::error_code EC = sys::fs::openFileForRead(Filename, FD);
if (EC)
return EC;
ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
- getOpenFileImpl(FD, Filename, FileSize, FileSize, 0,
+ getOpenFileImpl(FD, Filename, FileSize, MapSize, Offset,
RequiresNullTerminator, IsVolatileSize);
close(FD);
return Ret;
@@ -305,11 +315,19 @@ static bool shouldUseMmap(int FD,
if ((FileSize & (PageSize -1)) == 0)
return false;
+#if defined(__CYGWIN__)
+ // Don't try to map files that are exactly a multiple of the physical page size
+ // if we need a null terminator.
+ // FIXME: We should reorganize again getPageSize() on Win32.
+ if ((FileSize & (4096 - 1)) == 0)
+ return false;
+#endif
+
return true;
}
static ErrorOr<std::unique_ptr<MemoryBuffer>>
-getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize,
+getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
bool IsVolatileSize) {
static int PageSize = sys::process::get_self()->page_size();
@@ -347,15 +365,15 @@ getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize,
return std::move(Result);
}
- MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
+ std::unique_ptr<MemoryBuffer> Buf =
+ MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
if (!Buf) {
// Failed to create a buffer. The only way it can fail is if
// new(std::nothrow) returns 0.
return make_error_code(errc::not_enough_memory);
}
- std::unique_ptr<MemoryBuffer> SB(Buf);
- char *BufPtr = const_cast<char*>(SB->getBufferStart());
+ char *BufPtr = const_cast<char *>(Buf->getBufferStart());
size_t BytesLeft = MapSize;
#ifndef HAVE_PREAD
@@ -383,21 +401,22 @@ getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize,
BufPtr += NumRead;
}
- return std::move(SB);
+ return std::move(Buf);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getOpenFile(int FD, const char *Filename, uint64_t FileSize,
+MemoryBuffer::getOpenFile(int FD, const Twine &Filename, uint64_t FileSize,
bool RequiresNullTerminator, bool IsVolatileSize) {
return getOpenFileImpl(FD, Filename, FileSize, FileSize, 0,
RequiresNullTerminator, IsVolatileSize);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getOpenFileSlice(int FD, const char *Filename, uint64_t MapSize,
- int64_t Offset, bool IsVolatileSize) {
+MemoryBuffer::getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize,
+ int64_t Offset) {
+ assert(MapSize != uint64_t(-1));
return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false,
- IsVolatileSize);
+ /*IsVolatileSize*/ false);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
@@ -409,3 +428,9 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
return getMemoryBufferForStream(0, "<stdin>");
}
+
+MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
+ StringRef Data = getBuffer();
+ StringRef Identifier = getBufferIdentifier();
+ return MemoryBufferRef(Data, Identifier);
+}
diff --git a/lib/Support/MemoryObject.cpp b/lib/Support/MemoryObject.cpp
index 02b5b50..d796acf 100644
--- a/lib/Support/MemoryObject.cpp
+++ b/lib/Support/MemoryObject.cpp
@@ -12,22 +12,3 @@ using namespace llvm;
MemoryObject::~MemoryObject() {
}
-
-int MemoryObject::readBytes(uint64_t address,
- uint64_t size,
- uint8_t* buf) const {
- uint64_t current = address;
- uint64_t limit = getBase() + getExtent();
-
- if (current + size > limit)
- return -1;
-
- while (current - address < size) {
- if (readByte(current, &buf[(current - address)]))
- return -1;
-
- current++;
- }
-
- return 0;
-}
diff --git a/lib/Support/Options.cpp b/lib/Support/Options.cpp
new file mode 100644
index 0000000..7125845
--- /dev/null
+++ b/lib/Support/Options.cpp
@@ -0,0 +1,33 @@
+//===- llvm/Support/Options.cpp - Debug options support ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper objects for defining debug options using the
+// new API built on cl::opt, but not requiring the use of static globals.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Options.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+OptionRegistry::~OptionRegistry() {
+ for (auto IT = Options.begin(); IT != Options.end(); ++IT)
+ delete IT->second;
+}
+
+void OptionRegistry::addOption(void *Key, cl::Option *O) {
+ assert(Options.find(Key) == Options.end() &&
+ "Argument with this key already registerd");
+ Options.insert(std::make_pair(Key, O));
+}
+
+static ManagedStatic<OptionRegistry> OR;
+
+OptionRegistry &OptionRegistry::instance() { return *OR; }
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index d5a0ec5..a7a9919 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -11,9 +11,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Process.h"
@@ -164,9 +165,6 @@ enum FSEntity {
FS_Name
};
-// Implemented in Unix/Path.inc and Windows/Path.inc.
-static std::error_code TempDir(SmallVectorImpl<char> &result);
-
static std::error_code createUniqueEntity(const Twine &Model, int &ResultFD,
SmallVectorImpl<char> &ResultPath,
bool MakeAbsolute, unsigned Mode,
@@ -178,8 +176,7 @@ static std::error_code createUniqueEntity(const Twine &Model, int &ResultFD,
// Make model absolute by prepending a temp directory if it's not already.
if (!sys::path::is_absolute(Twine(ModelStorage))) {
SmallString<128> TDir;
- if (std::error_code EC = TempDir(TDir))
- return EC;
+ sys::path::system_temp_directory(true, TDir);
sys::path::append(TDir, Twine(ModelStorage));
ModelStorage.swap(TDir);
}
@@ -214,13 +211,13 @@ retry_random_path:
}
case FS_Name: {
- bool Exists;
- std::error_code EC = sys::fs::exists(ResultPath.begin(), Exists);
+ std::error_code EC =
+ sys::fs::access(ResultPath.begin(), sys::fs::AccessMode::Exist);
+ if (EC == errc::no_such_file_or_directory)
+ return std::error_code();
if (EC)
return EC;
- if (Exists)
- goto retry_random_path;
- return std::error_code();
+ goto retry_random_path;
}
case FS_Dir: {
@@ -308,7 +305,30 @@ const_iterator &const_iterator::operator++() {
return *this;
}
-const_iterator &const_iterator::operator--() {
+bool const_iterator::operator==(const const_iterator &RHS) const {
+ return Path.begin() == RHS.Path.begin() && Position == RHS.Position;
+}
+
+ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
+ return Position - RHS.Position;
+}
+
+reverse_iterator rbegin(StringRef Path) {
+ reverse_iterator I;
+ I.Path = Path;
+ I.Position = Path.size();
+ return ++I;
+}
+
+reverse_iterator rend(StringRef Path) {
+ reverse_iterator I;
+ I.Path = Path;
+ I.Component = Path.substr(0, 0);
+ I.Position = 0;
+ return I;
+}
+
+reverse_iterator &reverse_iterator::operator++() {
// If we're at the end and the previous char was a '/', return '.' unless
// we are the root path.
size_t root_dir_pos = root_dir_start(Path);
@@ -335,20 +355,12 @@ const_iterator &const_iterator::operator--() {
return *this;
}
-bool const_iterator::operator==(const const_iterator &RHS) const {
- return Path.begin() == RHS.Path.begin() &&
+bool reverse_iterator::operator==(const reverse_iterator &RHS) const {
+ return Path.begin() == RHS.Path.begin() && Component == RHS.Component &&
Position == RHS.Position;
}
-bool const_iterator::operator!=(const const_iterator &RHS) const {
- return !(*this == RHS);
-}
-
-ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
- return Position - RHS.Position;
-}
-
-const StringRef root_path(StringRef path) {
+StringRef root_path(StringRef path) {
const_iterator b = begin(path),
pos = b,
e = end(path);
@@ -380,7 +392,7 @@ const StringRef root_path(StringRef path) {
return StringRef();
}
-const StringRef root_name(StringRef path) {
+StringRef root_name(StringRef path) {
const_iterator b = begin(path),
e = end(path);
if (b != e) {
@@ -402,7 +414,7 @@ const StringRef root_name(StringRef path) {
return StringRef();
}
-const StringRef root_directory(StringRef path) {
+StringRef root_directory(StringRef path) {
const_iterator b = begin(path),
pos = b,
e = end(path);
@@ -431,7 +443,7 @@ const StringRef root_directory(StringRef path) {
return StringRef();
}
-const StringRef relative_path(StringRef path) {
+StringRef relative_path(StringRef path) {
StringRef root = root_path(path);
return path.substr(root.size());
}
@@ -483,7 +495,7 @@ void append(SmallVectorImpl<char> &path,
path::append(path, *begin);
}
-const StringRef parent_path(StringRef path) {
+StringRef parent_path(StringRef path) {
size_t end_pos = parent_path_end(path);
if (end_pos == StringRef::npos)
return StringRef();
@@ -525,17 +537,27 @@ void native(const Twine &path, SmallVectorImpl<char> &result) {
native(result);
}
-void native(SmallVectorImpl<char> &path) {
+void native(SmallVectorImpl<char> &Path) {
#ifdef LLVM_ON_WIN32
- std::replace(path.begin(), path.end(), '/', '\\');
+ std::replace(Path.begin(), Path.end(), '/', '\\');
+#else
+ for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) {
+ if (*PI == '\\') {
+ auto PN = PI + 1;
+ if (PN < PE && *PN == '\\')
+ ++PI; // increment once, the for loop will move over the escaped slash
+ else
+ *PI = '/';
+ }
+ }
#endif
}
-const StringRef filename(StringRef path) {
- return *(--end(path));
+StringRef filename(StringRef path) {
+ return *rbegin(path);
}
-const StringRef stem(StringRef path) {
+StringRef stem(StringRef path) {
StringRef fname = filename(path);
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
@@ -548,7 +570,7 @@ const StringRef stem(StringRef path) {
return fname.substr(0, pos);
}
-const StringRef extension(StringRef path) {
+StringRef extension(StringRef path) {
StringRef fname = filename(path);
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
@@ -573,62 +595,10 @@ bool is_separator(char value) {
static const char preferred_separator_string[] = { preferred_separator, '\0' };
-const StringRef get_separator() {
+StringRef get_separator() {
return preferred_separator_string;
}
-void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
- result.clear();
-
-#if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR)
- // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
- // macros defined in <unistd.h> on darwin >= 9
- int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
- : _CS_DARWIN_USER_CACHE_DIR;
- size_t ConfLen = confstr(ConfName, nullptr, 0);
- if (ConfLen > 0) {
- do {
- result.resize(ConfLen);
- ConfLen = confstr(ConfName, result.data(), result.size());
- } while (ConfLen > 0 && ConfLen != result.size());
-
- if (ConfLen > 0) {
- assert(result.back() == 0);
- result.pop_back();
- return;
- }
-
- result.clear();
- }
-#endif
-
- // Check whether the temporary directory is specified by an environment
- // variable.
- const char *EnvironmentVariable;
-#ifdef LLVM_ON_WIN32
- EnvironmentVariable = "TEMP";
-#else
- EnvironmentVariable = "TMPDIR";
-#endif
- if (char *RequestedDir = getenv(EnvironmentVariable)) {
- result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
- return;
- }
-
- // Fall back to a system default.
- const char *DefaultResult;
-#ifdef LLVM_ON_WIN32
- (void)erasedOnReboot;
- DefaultResult = "C:\\TEMP";
-#else
- if (erasedOnReboot)
- DefaultResult = "/tmp";
- else
- DefaultResult = "/var/tmp";
-#endif
- result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
-}
-
bool has_root_name(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
@@ -932,10 +902,23 @@ file_magic identify_magic(StringRef Magic) {
return file_magic::unknown;
switch ((unsigned char)Magic[0]) {
case 0x00: {
- // COFF short import library file
+ // COFF bigobj or short import library file
if (Magic[1] == (char)0x00 && Magic[2] == (char)0xff &&
- Magic[3] == (char)0xff)
- return file_magic::coff_import_library;
+ Magic[3] == (char)0xff) {
+ size_t MinSize = offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
+ if (Magic.size() < MinSize)
+ return file_magic::coff_import_library;
+
+ int BigObjVersion = *reinterpret_cast<const support::ulittle16_t*>(
+ Magic.data() + offsetof(COFF::BigObjHeader, Version));
+ if (BigObjVersion < COFF::BigObjHeader::MinBigObjectVersion)
+ return file_magic::coff_import_library;
+
+ const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
+ if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) != 0)
+ return file_magic::coff_import_library;
+ return file_magic::coff_object;
+ }
// Windows resource file
const char Expected[] = { 0, 0, 0, 0, '\x20', 0, 0, 0, '\xff' };
if (Magic.size() >= sizeof(Expected) &&
@@ -975,6 +958,9 @@ file_magic identify_magic(StringRef Magic) {
case 3: return file_magic::elf_shared_object;
case 4: return file_magic::elf_core;
}
+ else
+ // It's still some type of ELF file.
+ return file_magic::elf;
}
break;
@@ -1016,7 +1002,7 @@ file_magic identify_magic(StringRef Magic) {
case 6: return file_magic::macho_dynamically_linked_shared_lib;
case 7: return file_magic::macho_dynamic_linker;
case 8: return file_magic::macho_bundle;
- case 9: return file_magic::macho_dynamic_linker;
+ case 9: return file_magic::macho_dynamically_linked_shared_lib_stub;
case 10: return file_magic::macho_dsym_companion;
}
break;
@@ -1037,12 +1023,13 @@ file_magic identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
- case 0x4d: // Possible MS-DOS stub on Windows PE file
- if (Magic[1] == 0x5a) {
+ case 'M': // Possible MS-DOS stub on Windows PE file
+ if (Magic[1] == 'Z') {
uint32_t off =
*reinterpret_cast<const support::ulittle32_t*>(Magic.data() + 0x3c);
// PE/COFF file, either EXE or DLL.
- if (off < Magic.size() && memcmp(Magic.data() + off, "PE\0\0",4) == 0)
+ if (off < Magic.size() &&
+ memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0)
return file_magic::pecoff_executable;
}
break;
diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp
index 3fe027b..fc6d4e7 100644
--- a/lib/Support/ScaledNumber.cpp
+++ b/lib/Support/ScaledNumber.cpp
@@ -220,6 +220,9 @@ std::string ScaledNumberBase::toString(uint64_t D, int16_t E, int Width,
} else if (E > -64) {
Above0 = D >> -E;
Below0 = D << (64 + E);
+ } else if (E == -64) {
+ // Special case: shift by 64 bits is undefined behavior.
+ Below0 = D;
} else if (E > -120) {
Below0 = D >> (-E - 64);
Extra = D << (128 + E);
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index a80e095..c87ee7d 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -34,18 +34,19 @@ void SmallPtrSetImplBase::shrink_and_clear() {
memset(CurArray, -1, CurArraySize*sizeof(void*));
}
-bool SmallPtrSetImplBase::insert_imp(const void * Ptr) {
+std::pair<const void *const *, bool>
+SmallPtrSetImplBase::insert_imp(const void *Ptr) {
if (isSmall()) {
// Check to see if it is already in the set.
for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
APtr != E; ++APtr)
if (*APtr == Ptr)
- return false;
-
+ return std::make_pair(APtr, false);
+
// Nope, there isn't. If we stay small, just 'pushback' now.
- if (NumElements < CurArraySize-1) {
+ if (NumElements < CurArraySize) {
SmallArray[NumElements++] = Ptr;
- return true;
+ return std::make_pair(SmallArray + (NumElements - 1), true);
}
// Otherwise, hit the big set case, which will call grow.
}
@@ -61,14 +62,15 @@ bool SmallPtrSetImplBase::insert_imp(const void * Ptr) {
// Okay, we know we have space. Find a hash bucket.
const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
- if (*Bucket == Ptr) return false; // Already inserted, good.
-
+ if (*Bucket == Ptr)
+ return std::make_pair(Bucket, false); // Already inserted, good.
+
// Otherwise, insert it!
if (*Bucket == getTombstoneMarker())
--NumTombstones;
*Bucket = Ptr;
++NumElements; // Track density.
- return true;
+ return std::make_pair(Bucket, true);
}
bool SmallPtrSetImplBase::erase_imp(const void * Ptr) {
@@ -200,13 +202,12 @@ SmallPtrSetImplBase::SmallPtrSetImplBase(const void **SmallStorage,
if (that.isSmall()) {
CurArray = SmallArray;
memcpy(CurArray, that.CurArray, sizeof(void *) * CurArraySize);
- return;
+ } else {
+ // Otherwise, we steal the large memory allocation and no copy is needed.
+ CurArray = that.CurArray;
+ that.CurArray = that.SmallArray;
}
- // Otherwise, we steal the large memory allocation and no copy is needed.
- CurArray = that.CurArray;
- that.CurArray = that.SmallArray;
-
// Make the "that" object small and empty.
that.CurArraySize = SmallSize;
assert(that.CurArray == that.SmallArray);
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 003cb56..b50a66b 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -42,11 +42,6 @@ SourceMgr::~SourceMgr() {
// Delete the line # cache if allocated.
if (LineNoCacheTy *Cache = getCache(LineNoCache))
delete Cache;
-
- while (!Buffers.empty()) {
- delete Buffers.back().Buffer;
- Buffers.pop_back();
- }
}
unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
@@ -54,20 +49,20 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
std::string &IncludedFile) {
IncludedFile = Filename;
ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
- MemoryBuffer::getFile(IncludedFile.c_str());
+ MemoryBuffer::getFile(IncludedFile);
// If the file didn't exist directly, see if it's in an include path.
for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
++i) {
IncludedFile =
IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
- NewBufOrErr = MemoryBuffer::getFile(IncludedFile.c_str());
+ NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
}
if (!NewBufOrErr)
return 0;
- return AddNewSourceBuffer(NewBufOrErr.get().release(), IncludeLoc);
+ return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
}
unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp
index 21e43c5..785cc60 100644
--- a/lib/Support/SpecialCaseList.cpp
+++ b/lib/Support/SpecialCaseList.cpp
@@ -48,10 +48,10 @@ struct SpecialCaseList::Entry {
SpecialCaseList::SpecialCaseList() : Entries() {}
-SpecialCaseList *SpecialCaseList::create(
- const StringRef Path, std::string &Error) {
+std::unique_ptr<SpecialCaseList> SpecialCaseList::create(StringRef Path,
+ std::string &Error) {
if (Path.empty())
- return new SpecialCaseList();
+ return std::unique_ptr<SpecialCaseList>(new SpecialCaseList());
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFile(Path);
if (std::error_code EC = FileOrErr.getError()) {
@@ -61,17 +61,17 @@ SpecialCaseList *SpecialCaseList::create(
return create(FileOrErr.get().get(), Error);
}
-SpecialCaseList *SpecialCaseList::create(
- const MemoryBuffer *MB, std::string &Error) {
+std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
+ std::string &Error) {
std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
if (!SCL->parse(MB, Error))
return nullptr;
- return SCL.release();
+ return SCL;
}
-SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
+std::unique_ptr<SpecialCaseList> SpecialCaseList::createOrDie(StringRef Path) {
std::string Error;
- if (SpecialCaseList *SCL = create(Path, Error))
+ if (auto SCL = create(Path, Error))
return SCL;
report_fatal_error(Error);
}
@@ -103,18 +103,6 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
std::string Regexp = SplitRegexp.first;
StringRef Category = SplitRegexp.second;
- // Backwards compatibility.
- if (Prefix == "global-init") {
- Prefix = "global";
- Category = "init";
- } else if (Prefix == "global-init-type") {
- Prefix = "type";
- Category = "init";
- } else if (Prefix == "global-init-src") {
- Prefix = "src";
- Category = "init";
- }
-
// See if we can store Regexp in Strings.
if (Regex::isLiteralERE(Regexp)) {
Entries[Prefix][Category].Strings.insert(Regexp);
@@ -157,8 +145,8 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
SpecialCaseList::~SpecialCaseList() {}
-bool SpecialCaseList::inSection(const StringRef Section, const StringRef Query,
- const StringRef Category) const {
+bool SpecialCaseList::inSection(StringRef Section, StringRef Query,
+ StringRef Category) const {
StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
if (I == Entries.end()) return false;
StringMap<Entry>::const_iterator II = I->second.find(Category);
diff --git a/lib/Support/StreamableMemoryObject.cpp b/lib/Support/StreamingMemoryObject.cpp
index 5cb0680..68beeef 100644
--- a/lib/Support/StreamableMemoryObject.cpp
+++ b/lib/Support/StreamingMemoryObject.cpp
@@ -1,4 +1,4 @@
-//===- StreamableMemoryObject.cpp - Streamable data interface -------------===//
+//===- StreamingMemoryObject.cpp - Streamable data interface -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/StreamableMemoryObject.h"
+#include "llvm/Support/StreamingMemoryObject.h"
#include "llvm/Support/Compiler.h"
#include <cassert>
#include <cstddef>
@@ -18,27 +18,22 @@ using namespace llvm;
namespace {
-class RawMemoryObject : public StreamableMemoryObject {
+class RawMemoryObject : public MemoryObject {
public:
RawMemoryObject(const unsigned char *Start, const unsigned char *End) :
FirstChar(Start), LastChar(End) {
assert(LastChar >= FirstChar && "Invalid start/end range");
}
- uint64_t getBase() const override { return 0; }
uint64_t getExtent() const override {
return LastChar - FirstChar;
}
- int readByte(uint64_t address, uint8_t* ptr) const override;
- int readBytes(uint64_t address, uint64_t size,
- uint8_t *buf) const override;
+ uint64_t readBytes(uint8_t *Buf, uint64_t Size,
+ uint64_t Address) const override;
const uint8_t *getPointer(uint64_t address, uint64_t size) const override;
bool isValidAddress(uint64_t address) const override {
return validAddress(address);
}
- bool isObjectEnd(uint64_t address) const override {
- return objectEnd(address);
- }
private:
const uint8_t* const FirstChar;
@@ -49,26 +44,25 @@ private:
bool validAddress(uint64_t address) const {
return static_cast<std::ptrdiff_t>(address) < LastChar - FirstChar;
}
- bool objectEnd(uint64_t address) const {
- return static_cast<std::ptrdiff_t>(address) == LastChar - FirstChar;
- }
RawMemoryObject(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
void operator=(const RawMemoryObject&) LLVM_DELETED_FUNCTION;
};
-int RawMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
- if (!validAddress(address)) return -1;
- *ptr = *((uint8_t *)(uintptr_t)(address + FirstChar));
- return 0;
-}
+uint64_t RawMemoryObject::readBytes(uint8_t *Buf, uint64_t Size,
+ uint64_t Address) const {
+ uint64_t BufferSize = LastChar - FirstChar;
+ if (Address >= BufferSize)
+ return 0;
-int RawMemoryObject::readBytes(uint64_t address,
- uint64_t size,
- uint8_t *buf) const {
- if (!validAddress(address) || !validAddress(address + size - 1)) return -1;
- memcpy(buf, (uint8_t *)(uintptr_t)(address + FirstChar), size);
- return size;
+ uint64_t End = Address + Size;
+ if (End > BufferSize)
+ End = BufferSize;
+
+ assert(static_cast<int64_t>(End - Address) >= 0);
+ Size = End - Address;
+ memcpy(Buf, Address + FirstChar, Size);
+ return Size;
}
const uint8_t *RawMemoryObject::getPointer(uint64_t address,
@@ -85,12 +79,6 @@ bool StreamingMemoryObject::isValidAddress(uint64_t address) const {
return fetchToPos(address);
}
-bool StreamingMemoryObject::isObjectEnd(uint64_t address) const {
- if (ObjectSize) return address == ObjectSize;
- fetchToPos(address);
- return address == ObjectSize && address != 0;
-}
-
uint64_t StreamingMemoryObject::getExtent() const {
if (ObjectSize) return ObjectSize;
size_t pos = BytesRead + kChunkSize;
@@ -99,18 +87,19 @@ uint64_t StreamingMemoryObject::getExtent() const {
return ObjectSize;
}
-int StreamingMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
- if (!fetchToPos(address)) return -1;
- *ptr = Bytes[address + BytesSkipped];
- return 0;
-}
-
-int StreamingMemoryObject::readBytes(uint64_t address,
- uint64_t size,
- uint8_t *buf) const {
- if (!fetchToPos(address + size - 1)) return -1;
- memcpy(buf, &Bytes[address + BytesSkipped], size);
- return 0;
+uint64_t StreamingMemoryObject::readBytes(uint8_t *Buf, uint64_t Size,
+ uint64_t Address) const {
+ fetchToPos(Address + Size - 1);
+ if (Address >= BytesRead)
+ return 0;
+
+ uint64_t End = Address + Size;
+ if (End > BytesRead)
+ End = BytesRead;
+ assert(static_cast<int64_t>(End - Address) >= 0);
+ Size = End - Address;
+ memcpy(Buf, &Bytes[Address + BytesSkipped], Size);
+ return Size;
}
bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
@@ -125,13 +114,11 @@ void StreamingMemoryObject::setKnownObjectSize(size_t size) {
Bytes.reserve(size);
}
-StreamableMemoryObject *getNonStreamedMemoryObject(
- const unsigned char *Start, const unsigned char *End) {
+MemoryObject *getNonStreamedMemoryObject(const unsigned char *Start,
+ const unsigned char *End) {
return new RawMemoryObject(Start, End);
}
-StreamableMemoryObject::~StreamableMemoryObject() { }
-
StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) :
Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0),
ObjectSize(0), EOFReached(false) {
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index cde8258..ddece08 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -50,7 +50,7 @@ static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
/// compare_lower - Compare strings, ignoring case.
int StringRef::compare_lower(StringRef RHS) const {
- if (int Res = ascii_strncasecmp(Data, RHS.Data, min(Length, RHS.Length)))
+ if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
return Res;
if (Length == RHS.Length)
return 0;
@@ -71,7 +71,7 @@ bool StringRef::endswith_lower(StringRef Suffix) const {
/// compare_numeric - Compare strings, handle embedded numbers.
int StringRef::compare_numeric(StringRef RHS) const {
- for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+ for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
// Check for sequences of digits.
if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
// The longer sequence of numbers is considered larger.
@@ -105,8 +105,8 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
bool AllowReplacements,
unsigned MaxEditDistance) const {
return llvm::ComputeEditDistance(
- llvm::ArrayRef<char>(data(), size()),
- llvm::ArrayRef<char>(Other.data(), Other.size()),
+ makeArrayRef(data(), size()),
+ makeArrayRef(Other.data(), Other.size()),
AllowReplacements, MaxEditDistance);
}
@@ -146,7 +146,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
// For short haystacks or unsupported needles fall back to the naive algorithm
if (Length < 16 || N > 255 || N == 0) {
- for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+ for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
if (substr(i, N).equals(Str))
return i;
return npos;
@@ -201,7 +201,7 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars,
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
- for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
@@ -210,7 +210,7 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars,
/// find_first_not_of - Find the first character in the string that is not
/// \arg C or npos if not found.
StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
- for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (Data[i] != C)
return i;
return npos;
@@ -226,7 +226,7 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
- for (size_type i = min(From, Length), e = Length; i != e; ++i)
+ for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
@@ -242,7 +242,7 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars,
for (size_type i = 0; i != Chars.size(); ++i)
CharBits.set((unsigned char)Chars[i]);
- for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
@@ -251,7 +251,7 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars,
/// find_last_not_of - Find the last character in the string that is not
/// \arg C, or npos if not found.
StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
- for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
if (Data[i] != C)
return i;
return npos;
@@ -267,7 +267,7 @@ StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
for (size_type i = 0, e = Chars.size(); i != e; ++i)
CharBits.set((unsigned char)Chars[i]);
- for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
diff --git a/lib/Support/StringRefMemoryObject.cpp b/lib/Support/StringRefMemoryObject.cpp
deleted file mode 100644
index e035ed1..0000000
--- a/lib/Support/StringRefMemoryObject.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===- lib/Support/StringRefMemoryObject.cpp --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/StringRefMemoryObject.h"
-
-using namespace llvm;
-
-int StringRefMemoryObject::readByte(uint64_t Addr, uint8_t *Byte) const {
- if (Addr >= Base + getExtent() || Addr < Base)
- return -1;
- *Byte = Bytes[Addr - Base];
- return 0;
-}
-
-int StringRefMemoryObject::readBytes(uint64_t Addr,
- uint64_t Size,
- uint8_t *Buf) const {
- uint64_t Offset = Addr - Base;
- if (Addr >= Base + getExtent() || Offset + Size > getExtent() || Addr < Base)
- return -1;
- memcpy(Buf, Bytes.data() + Offset, Size);
- return 0;
-}
diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp
index 4a70797..136b93e 100644
--- a/lib/Support/TimeValue.cpp
+++ b/lib/Support/TimeValue.cpp
@@ -22,12 +22,6 @@ const TimeValue::SecondsType
const TimeValue::SecondsType
TimeValue::Win32ZeroTimeSeconds = -12591158400ULL;
-const TimeValue TimeValue::MinTime = TimeValue ( INT64_MIN,0 );
-const TimeValue TimeValue::MaxTime = TimeValue ( INT64_MAX,0 );
-const TimeValue TimeValue::ZeroTime = TimeValue ( 0,0 );
-const TimeValue TimeValue::PosixZeroTime = TimeValue ( PosixZeroTimeSeconds,0 );
-const TimeValue TimeValue::Win32ZeroTime = TimeValue ( Win32ZeroTimeSeconds,0 );
-
void
TimeValue::normalize( void ) {
if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 210bda7..e1a531a 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -66,10 +66,10 @@ raw_ostream *llvm::CreateInfoOutputFile() {
// each time -stats or -time-passes wants to print output to it. To
// compensate for this, the test-suite Makefiles have code to delete the
// info output file before running commands which write to it.
- std::string Error;
- raw_ostream *Result = new raw_fd_ostream(
- OutputFilename.c_str(), Error, sys::fs::F_Append | sys::fs::F_Text);
- if (Error.empty())
+ std::error_code EC;
+ raw_ostream *Result = new raw_fd_ostream(OutputFilename, EC,
+ sys::fs::F_Append | sys::fs::F_Text);
+ if (!EC)
return Result;
errs() << "Error opening info-output-file '"
diff --git a/lib/Support/ToolOutputFile.cpp b/lib/Support/ToolOutputFile.cpp
index b5fb20f..8ae977d 100644
--- a/lib/Support/ToolOutputFile.cpp
+++ b/lib/Support/ToolOutputFile.cpp
@@ -16,8 +16,8 @@
#include "llvm/Support/Signals.h"
using namespace llvm;
-tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
- : Filename(filename), Keep(false) {
+tool_output_file::CleanupInstaller::CleanupInstaller(StringRef Filename)
+ : Filename(Filename), Keep(false) {
// Arrange for the file to be deleted if the process is killed.
if (Filename != "-")
sys::RemoveFileOnSignal(Filename);
@@ -34,14 +34,13 @@ tool_output_file::CleanupInstaller::~CleanupInstaller() {
sys::DontRemoveFileOnSignal(Filename);
}
-tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+tool_output_file::tool_output_file(StringRef Filename, std::error_code &EC,
sys::fs::OpenFlags Flags)
- : Installer(filename), OS(filename, ErrorInfo, Flags) {
+ : Installer(Filename), OS(Filename, EC, Flags) {
// If open fails, no cleanup is needed.
- if (!ErrorInfo.empty())
+ if (EC)
Installer.Keep = true;
}
-tool_output_file::tool_output_file(const char *Filename, int FD)
- : Installer(Filename), OS(FD, true) {
-}
+tool_output_file::tool_output_file(StringRef Filename, int FD)
+ : Installer(Filename), OS(FD, true) {}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index b74ee13..4a4773e 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -23,8 +23,6 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case aarch64_be: return "aarch64_be";
case arm: return "arm";
case armeb: return "armeb";
- case arm64: return "arm64";
- case arm64_be: return "arm64_be";
case hexagon: return "hexagon";
case mips: return "mips";
case mipsel: return "mipsel";
@@ -47,7 +45,11 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case nvptx: return "nvptx";
case nvptx64: return "nvptx64";
case le32: return "le32";
+ case le64: return "le64";
case amdil: return "amdil";
+ case amdil64: return "amdil64";
+ case hsail: return "hsail";
+ case hsail64: return "hsail64";
case spir: return "spir";
case spir64: return "spir64";
case kalimba: return "kalimba";
@@ -69,9 +71,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case thumb:
case thumbeb: return "arm";
- case arm64:
- case arm64_be: return "arm64";
-
case ppc64:
case ppc64le:
case ppc: return "ppc";
@@ -99,8 +98,15 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case nvptx64: return "nvptx";
case le32: return "le32";
- case amdil: return "amdil";
- case spir: return "spir";
+ case le64: return "le64";
+
+ case amdil:
+ case amdil64: return "amdil";
+
+ case hsail:
+ case hsail64: return "hsail";
+
+ case spir:
case spir64: return "spir";
case kalimba: return "kalimba";
}
@@ -118,6 +124,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
case Freescale: return "fsl";
case IBM: return "ibm";
case ImaginationTechnologies: return "img";
+ case MipsTechnologies: return "mti";
case NVIDIA: return "nvidia";
case CSR: return "csr";
}
@@ -129,8 +136,6 @@ const char *Triple::getOSTypeName(OSType Kind) {
switch (Kind) {
case UnknownOS: return "unknown";
- case AuroraUX: return "auroraux";
- case Cygwin: return "cygwin";
case Darwin: return "darwin";
case DragonFly: return "dragonfly";
case FreeBSD: return "freebsd";
@@ -139,7 +144,6 @@ const char *Triple::getOSTypeName(OSType Kind) {
case Linux: return "linux";
case Lv2: return "lv2";
case MacOSX: return "macosx";
- case MinGW32: return "mingw32";
case NetBSD: return "netbsd";
case OpenBSD: return "openbsd";
case Solaris: return "solaris";
@@ -181,10 +185,9 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return StringSwitch<Triple::ArchType>(Name)
.Case("aarch64", aarch64)
.Case("aarch64_be", aarch64_be)
+ .Case("arm64", aarch64) // "arm64" is an alias for "aarch64"
.Case("arm", arm)
.Case("armeb", armeb)
- .Case("arm64", arm64)
- .Case("arm64_be", arm64_be)
.Case("mips", mips)
.Case("mipsel", mipsel)
.Case("mips64", mips64)
@@ -208,40 +211,59 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("nvptx", nvptx)
.Case("nvptx64", nvptx64)
.Case("le32", le32)
+ .Case("le64", le64)
.Case("amdil", amdil)
+ .Case("amdil64", amdil64)
+ .Case("hsail", hsail)
+ .Case("hsail64", hsail64)
.Case("spir", spir)
.Case("spir64", spir64)
.Case("kalimba", kalimba)
.Default(UnknownArch);
}
-// Returns architecture name that is understood by the target assembler.
-const char *Triple::getArchNameForAssembler() {
- if (!isOSDarwin() && getVendor() != Triple::Apple)
- return nullptr;
-
- return StringSwitch<const char*>(getArchName())
- .Case("i386", "i386")
- .Case("x86_64", "x86_64")
- .Case("powerpc", "ppc")
- .Case("powerpc64", "ppc64")
- .Case("powerpc64le", "ppc64le")
- .Case("arm", "arm")
- .Cases("armv4t", "thumbv4t", "armv4t")
- .Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5")
- .Cases("armv6", "thumbv6", "armv6")
- .Cases("armv7", "thumbv7", "armv7")
- .Case("armeb", "armeb")
- .Case("arm64", "arm64")
- .Case("arm64_be", "arm64")
- .Case("r600", "r600")
- .Case("nvptx", "nvptx")
- .Case("nvptx64", "nvptx64")
- .Case("le32", "le32")
- .Case("amdil", "amdil")
- .Case("spir", "spir")
- .Case("spir64", "spir64")
- .Default(nullptr);
+static Triple::ArchType parseARMArch(StringRef ArchName) {
+ size_t offset = StringRef::npos;
+ Triple::ArchType arch = Triple::UnknownArch;
+ bool isThumb = ArchName.startswith("thumb");
+
+ if (ArchName.equals("arm"))
+ return Triple::arm;
+ if (ArchName.equals("armeb"))
+ return Triple::armeb;
+ if (ArchName.equals("thumb"))
+ return Triple::thumb;
+ if (ArchName.equals("thumbeb"))
+ return Triple::thumbeb;
+ if (ArchName.equals("arm64") || ArchName.equals("aarch64"))
+ return Triple::aarch64;
+ if (ArchName.equals("aarch64_be"))
+ return Triple::aarch64_be;
+
+ if (ArchName.startswith("armv")) {
+ offset = 3;
+ arch = Triple::arm;
+ } else if (ArchName.startswith("armebv")) {
+ offset = 5;
+ arch = Triple::armeb;
+ } else if (ArchName.startswith("thumbv")) {
+ offset = 5;
+ arch = Triple::thumb;
+ } else if (ArchName.startswith("thumbebv")) {
+ offset = 7;
+ arch = Triple::thumbeb;
+ }
+ return StringSwitch<Triple::ArchType>(ArchName.substr(offset))
+ .Cases("v2", "v2a", isThumb ? Triple::UnknownArch : arch)
+ .Cases("v3", "v3m", isThumb ? Triple::UnknownArch : arch)
+ .Cases("v4", "v4t", arch)
+ .Cases("v5", "v5e", "v5t", "v5te", "v5tej", arch)
+ .Cases("v6", "v6j", "v6k", "v6m", arch)
+ .Cases("v6t2", "v6z", "v6zk", arch)
+ .Cases("v7", "v7a", "v7em", "v7l", arch)
+ .Cases("v7m", "v7r", "v7s", arch)
+ .Cases("v8", "v8a", arch)
+ .Default(Triple::UnknownArch);
}
static Triple::ArchType parseArch(StringRef ArchName) {
@@ -253,20 +275,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("powerpc", Triple::ppc)
.Cases("powerpc64", "ppu", Triple::ppc64)
.Case("powerpc64le", Triple::ppc64le)
- .Case("aarch64", Triple::aarch64)
- .Case("aarch64_be", Triple::aarch64_be)
- .Cases("arm", "xscale", Triple::arm)
- // FIXME: It would be good to replace these with explicit names for all the
- // various suffixes supported.
- .StartsWith("armv", Triple::arm)
- .Case("armeb", Triple::armeb)
- .StartsWith("armebv", Triple::armeb)
- .Case("thumb", Triple::thumb)
- .StartsWith("thumbv", Triple::thumb)
- .Case("thumbeb", Triple::thumbeb)
- .StartsWith("thumbebv", Triple::thumbeb)
- .Case("arm64", Triple::arm64)
- .Case("arm64_be", Triple::arm64_be)
+ .Case("xscale", Triple::arm)
+ .StartsWith("arm", parseARMArch(ArchName))
+ .StartsWith("thumb", parseARMArch(ArchName))
+ .StartsWith("aarch64", parseARMArch(ArchName))
.Case("msp430", Triple::msp430)
.Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@@ -282,10 +294,14 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("nvptx", Triple::nvptx)
.Case("nvptx64", Triple::nvptx64)
.Case("le32", Triple::le32)
+ .Case("le64", Triple::le64)
.Case("amdil", Triple::amdil)
+ .Case("amdil64", Triple::amdil64)
+ .Case("hsail", Triple::hsail)
+ .Case("hsail64", Triple::hsail64)
.Case("spir", Triple::spir)
.Case("spir64", Triple::spir64)
- .Case("kalimba", Triple::kalimba)
+ .StartsWith("kalimba", Triple::kalimba)
.Default(Triple::UnknownArch);
}
@@ -299,6 +315,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("fsl", Triple::Freescale)
.Case("ibm", Triple::IBM)
.Case("img", Triple::ImaginationTechnologies)
+ .Case("mti", Triple::MipsTechnologies)
.Case("nvidia", Triple::NVIDIA)
.Case("csr", Triple::CSR)
.Default(Triple::UnknownVendor);
@@ -306,8 +323,6 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
static Triple::OSType parseOS(StringRef OSName) {
return StringSwitch<Triple::OSType>(OSName)
- .StartsWith("auroraux", Triple::AuroraUX)
- .StartsWith("cygwin", Triple::Cygwin)
.StartsWith("darwin", Triple::Darwin)
.StartsWith("dragonfly", Triple::DragonFly)
.StartsWith("freebsd", Triple::FreeBSD)
@@ -316,7 +331,6 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("linux", Triple::Linux)
.StartsWith("lv2", Triple::Lv2)
.StartsWith("macosx", Triple::MacOSX)
- .StartsWith("mingw32", Triple::MinGW32)
.StartsWith("netbsd", Triple::NetBSD)
.StartsWith("openbsd", Triple::OpenBSD)
.StartsWith("solaris", Triple::Solaris)
@@ -358,6 +372,31 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
.Default(Triple::UnknownObjectFormat);
}
+static Triple::SubArchType parseSubArch(StringRef SubArchName) {
+ return StringSwitch<Triple::SubArchType>(SubArchName)
+ .EndsWith("v8", Triple::ARMSubArch_v8)
+ .EndsWith("v8a", Triple::ARMSubArch_v8)
+ .EndsWith("v7", Triple::ARMSubArch_v7)
+ .EndsWith("v7a", Triple::ARMSubArch_v7)
+ .EndsWith("v7em", Triple::ARMSubArch_v7em)
+ .EndsWith("v7l", Triple::ARMSubArch_v7)
+ .EndsWith("v7m", Triple::ARMSubArch_v7m)
+ .EndsWith("v7r", Triple::ARMSubArch_v7)
+ .EndsWith("v7s", Triple::ARMSubArch_v7s)
+ .EndsWith("v6", Triple::ARMSubArch_v6)
+ .EndsWith("v6m", Triple::ARMSubArch_v6m)
+ .EndsWith("v6t2", Triple::ARMSubArch_v6t2)
+ .EndsWith("v5", Triple::ARMSubArch_v5)
+ .EndsWith("v5e", Triple::ARMSubArch_v5)
+ .EndsWith("v5t", Triple::ARMSubArch_v5)
+ .EndsWith("v5te", Triple::ARMSubArch_v5te)
+ .EndsWith("v4t", Triple::ARMSubArch_v4t)
+ .EndsWith("kalimba3", Triple::KalimbaSubArch_v3)
+ .EndsWith("kalimba4", Triple::KalimbaSubArch_v4)
+ .EndsWith("kalimba5", Triple::KalimbaSubArch_v5)
+ .Default(Triple::NoSubArch);
+}
+
static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
switch (Kind) {
case Triple::UnknownObjectFormat: return "";
@@ -383,6 +422,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
Triple::Triple(const Twine &Str)
: Data(Str.str()),
Arch(parseArch(getArchName())),
+ SubArch(parseSubArch(getArchName())),
Vendor(parseVendor(getVendorName())),
OS(parseOS(getOSName())),
Environment(parseEnvironment(getEnvironmentName())),
@@ -400,6 +440,7 @@ Triple::Triple(const Twine &Str)
Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr)
: Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()),
Arch(parseArch(ArchStr.str())),
+ SubArch(parseSubArch(ArchStr.str())),
Vendor(parseVendor(VendorStr.str())),
OS(parseOS(OSStr.str())),
Environment(), ObjectFormat(Triple::UnknownObjectFormat) {
@@ -416,6 +457,7 @@ Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr,
: Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') +
EnvironmentStr).str()),
Arch(parseArch(ArchStr.str())),
+ SubArch(parseSubArch(ArchStr.str())),
Vendor(parseVendor(VendorStr.str())),
OS(parseOS(OSStr.str())),
Environment(parseEnvironment(EnvironmentStr.str())),
@@ -425,6 +467,9 @@ Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr,
}
std::string Triple::normalize(StringRef Str) {
+ bool IsMinGW32 = false;
+ bool IsCygwin = false;
+
// Parse into components.
SmallVector<StringRef, 4> Components;
Str.split(Components, "-");
@@ -441,8 +486,11 @@ std::string Triple::normalize(StringRef Str) {
if (Components.size() > 1)
Vendor = parseVendor(Components[1]);
OSType OS = UnknownOS;
- if (Components.size() > 2)
+ if (Components.size() > 2) {
OS = parseOS(Components[2]);
+ IsCygwin = Components[2].startswith("cygwin");
+ IsMinGW32 = Components[2].startswith("mingw");
+ }
EnvironmentType Environment = UnknownEnvironment;
if (Components.size() > 3)
Environment = parseEnvironment(Components[3]);
@@ -485,7 +533,9 @@ std::string Triple::normalize(StringRef Str) {
break;
case 2:
OS = parseOS(Comp);
- Valid = OS != UnknownOS;
+ IsCygwin = Comp.startswith("cygwin");
+ IsMinGW32 = Comp.startswith("mingw");
+ Valid = OS != UnknownOS || IsCygwin || IsMinGW32;
break;
case 3:
Environment = parseEnvironment(Comp);
@@ -565,16 +615,16 @@ std::string Triple::normalize(StringRef Str) {
else
Components[3] = getObjectFormatTypeName(ObjectFormat);
}
- } else if (OS == Triple::MinGW32) {
+ } else if (IsMinGW32) {
Components.resize(4);
Components[2] = "windows";
Components[3] = "gnu";
- } else if (OS == Triple::Cygwin) {
+ } else if (IsCygwin) {
Components.resize(4);
Components[2] = "windows";
Components[3] = "cygnus";
}
- if (OS == Triple::MinGW32 || OS == Triple::Cygwin ||
+ if (IsMinGW32 || IsCygwin ||
(OS == Triple::Win32 && Environment != UnknownEnvironment)) {
if (ObjectFormat != UnknownObjectFormat && ObjectFormat != Triple::COFF) {
Components.resize(5);
@@ -716,7 +766,7 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
getOSVersion(Major, Minor, Micro);
// Default to 5.0 (or 7.0 for arm64).
if (Major == 0)
- Major = (getArch() == arm64) ? 7 : 5;
+ Major = (getArch() == aarch64) ? 7 : 5;
break;
}
}
@@ -789,7 +839,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::msp430:
return 16;
- case llvm::Triple::amdil:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::hexagon:
@@ -805,14 +854,15 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::thumbeb:
case llvm::Triple::x86:
case llvm::Triple::xcore:
+ case llvm::Triple::amdil:
+ case llvm::Triple::hsail:
case llvm::Triple::spir:
case llvm::Triple::kalimba:
return 32;
- case llvm::Triple::arm64:
- case llvm::Triple::arm64_be:
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
+ case llvm::Triple::le64:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::nvptx64:
@@ -821,6 +871,8 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::sparcv9:
case llvm::Triple::systemz:
case llvm::Triple::x86_64:
+ case llvm::Triple::amdil64:
+ case llvm::Triple::hsail64:
case llvm::Triple::spir64:
return 64;
}
@@ -845,8 +897,6 @@ Triple Triple::get32BitArchVariant() const {
case Triple::UnknownArch:
case Triple::aarch64:
case Triple::aarch64_be:
- case Triple::arm64:
- case Triple::arm64_be:
case Triple::msp430:
case Triple::systemz:
case Triple::ppc64le:
@@ -854,6 +904,7 @@ Triple Triple::get32BitArchVariant() const {
break;
case Triple::amdil:
+ case Triple::hsail:
case Triple::spir:
case Triple::arm:
case Triple::armeb:
@@ -874,12 +925,15 @@ Triple Triple::get32BitArchVariant() const {
// Already 32-bit.
break;
+ case Triple::le64: T.setArch(Triple::le32); break;
case Triple::mips64: T.setArch(Triple::mips); break;
case Triple::mips64el: T.setArch(Triple::mipsel); break;
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::x86_64: T.setArch(Triple::x86); break;
+ case Triple::amdil64: T.setArch(Triple::amdil); break;
+ case Triple::hsail64: T.setArch(Triple::hsail); break;
case Triple::spir64: T.setArch(Triple::spir); break;
}
return T;
@@ -889,12 +943,10 @@ Triple Triple::get64BitArchVariant() const {
Triple T(*this);
switch (getArch()) {
case Triple::UnknownArch:
- case Triple::amdil:
case Triple::arm:
case Triple::armeb:
case Triple::hexagon:
case Triple::kalimba:
- case Triple::le32:
case Triple::msp430:
case Triple::r600:
case Triple::tce:
@@ -906,6 +958,9 @@ Triple Triple::get64BitArchVariant() const {
case Triple::aarch64:
case Triple::aarch64_be:
+ case Triple::le64:
+ case Triple::amdil64:
+ case Triple::hsail64:
case Triple::spir64:
case Triple::mips64:
case Triple::mips64el:
@@ -915,18 +970,102 @@ Triple Triple::get64BitArchVariant() const {
case Triple::sparcv9:
case Triple::systemz:
case Triple::x86_64:
- case Triple::arm64:
- case Triple::arm64_be:
// Already 64-bit.
break;
+ case Triple::le32: T.setArch(Triple::le64); break;
case Triple::mips: T.setArch(Triple::mips64); break;
case Triple::mipsel: T.setArch(Triple::mips64el); break;
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::x86: T.setArch(Triple::x86_64); break;
+ case Triple::amdil: T.setArch(Triple::amdil64); break;
+ case Triple::hsail: T.setArch(Triple::hsail64); break;
case Triple::spir: T.setArch(Triple::spir64); break;
}
return T;
}
+
+// FIXME: tblgen this.
+const char *Triple::getARMCPUForArch(StringRef MArch) const {
+ if (MArch.empty())
+ MArch = getArchName();
+
+ switch (getOS()) {
+ case llvm::Triple::FreeBSD:
+ case llvm::Triple::NetBSD:
+ if (MArch == "armv6")
+ return "arm1176jzf-s";
+ break;
+ case llvm::Triple::Win32:
+ // FIXME: this is invalid for WindowsCE
+ return "cortex-a9";
+ default:
+ break;
+ }
+
+ const char *result = nullptr;
+ size_t offset = StringRef::npos;
+ if (MArch.startswith("arm"))
+ offset = 3;
+ if (MArch.startswith("thumb"))
+ offset = 5;
+ if (offset != StringRef::npos && MArch.substr(offset, 2) == "eb")
+ offset += 2;
+ if (offset != StringRef::npos)
+ result = llvm::StringSwitch<const char *>(MArch.substr(offset))
+ .Cases("v2", "v2a", "arm2")
+ .Case("v3", "arm6")
+ .Case("v3m", "arm7m")
+ .Case("v4", "strongarm")
+ .Case("v4t", "arm7tdmi")
+ .Cases("v5", "v5t", "arm10tdmi")
+ .Cases("v5e", "v5te", "arm1022e")
+ .Case("v5tej", "arm926ej-s")
+ .Cases("v6", "v6k", "arm1136jf-s")
+ .Case("v6j", "arm1136j-s")
+ .Cases("v6z", "v6zk", "arm1176jzf-s")
+ .Case("v6t2", "arm1156t2-s")
+ .Cases("v6m", "v6-m", "cortex-m0")
+ .Cases("v7", "v7a", "v7-a", "v7l", "v7-l", "cortex-a8")
+ .Cases("v7s", "v7-s", "swift")
+ .Cases("v7r", "v7-r", "cortex-r4")
+ .Cases("v7m", "v7-m", "cortex-m3")
+ .Cases("v7em", "v7e-m", "cortex-m4")
+ .Cases("v8", "v8a", "v8-a", "cortex-a53")
+ .Default(nullptr);
+ else
+ result = llvm::StringSwitch<const char *>(MArch)
+ .Case("ep9312", "ep9312")
+ .Case("iwmmxt", "iwmmxt")
+ .Case("xscale", "xscale")
+ .Default(nullptr);
+
+ if (result)
+ return result;
+
+ // If all else failed, return the most base CPU with thumb interworking
+ // supported by LLVM.
+ // FIXME: Should warn once that we're falling back.
+ switch (getOS()) {
+ case llvm::Triple::NetBSD:
+ switch (getEnvironment()) {
+ case llvm::Triple::GNUEABIHF:
+ case llvm::Triple::GNUEABI:
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::EABI:
+ return "arm926ej-s";
+ default:
+ return "strongarm";
+ }
+ default:
+ switch (getEnvironment()) {
+ case llvm::Triple::EABIHF:
+ case llvm::Triple::GNUEABIHF:
+ return "arm1176jzf-s";
+ default:
+ return "arm7tdmi";
+ }
+ }
+}
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index c5d36ff..fcb3638 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -1,4 +1,4 @@
- //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
+//===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -22,7 +22,6 @@
#include <sys/utsname.h>
#include <cctype>
#include <string>
-#include <cstdlib> // ::getenv
using namespace llvm;
@@ -39,7 +38,8 @@ std::string sys::getDefaultTargetTriple() {
StringRef TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE);
std::pair<StringRef, StringRef> ArchSplit = TargetTripleString.split('-');
- // Normalize the arch, since the target triple may not actually match the target.
+ // Normalize the arch, since the target triple may not actually match the
+ // target.
std::string Arch = ArchSplit.first;
std::string Triple(Arch);
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 623547a..634d404 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -87,22 +87,6 @@ namespace {
};
}
-static std::error_code TempDir(SmallVectorImpl<char> &result) {
- // FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
- const char *dir = nullptr;
- (dir = std::getenv("TMPDIR")) || (dir = std::getenv("TMP")) ||
- (dir = std::getenv("TEMP")) || (dir = std::getenv("TEMPDIR")) ||
-#ifdef P_tmpdir
- (dir = P_tmpdir) ||
-#endif
- (dir = "/tmp");
-
- result.clear();
- StringRef d(dir);
- result.append(d.begin(), d.end());
- return std::error_code();
-}
-
namespace llvm {
namespace sys {
namespace fs {
@@ -272,19 +256,6 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
return std::error_code();
}
-std::error_code normalize_separators(SmallVectorImpl<char> &Path) {
- for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) {
- if (*PI == '\\') {
- auto PN = PI + 1;
- if (PN < PE && *PN == '\\')
- ++PI; // increment once, the for loop will move over the escaped slash
- else
- *PI = '/';
- }
- }
- return std::error_code();
-}
-
// Note that we are using symbolic link because hard links are not supported by
// all filesystems (SMB doesn't).
std::error_code create_link(const Twine &to, const Twine &from) {
@@ -350,38 +321,35 @@ std::error_code resize_file(const Twine &path, uint64_t size) {
return std::error_code();
}
-std::error_code exists(const Twine &path, bool &result) {
- SmallString<128> path_storage;
- StringRef p = path.toNullTerminatedStringRef(path_storage);
-
- if (::access(p.begin(), F_OK) == -1) {
- if (errno != ENOENT)
- return std::error_code(errno, std::generic_category());
- result = false;
- } else
- result = true;
-
- return std::error_code();
+static int convertAccessMode(AccessMode Mode) {
+ switch (Mode) {
+ case AccessMode::Exist:
+ return F_OK;
+ case AccessMode::Write:
+ return W_OK;
+ case AccessMode::Execute:
+ return R_OK | X_OK; // scripts also need R_OK.
+ }
+ llvm_unreachable("invalid enum");
}
-bool can_write(const Twine &Path) {
+std::error_code access(const Twine &Path, AccessMode Mode) {
SmallString<128> PathStorage;
StringRef P = Path.toNullTerminatedStringRef(PathStorage);
- return 0 == access(P.begin(), W_OK);
-}
-bool can_execute(const Twine &Path) {
- SmallString<128> PathStorage;
- StringRef P = Path.toNullTerminatedStringRef(PathStorage);
+ if (::access(P.begin(), convertAccessMode(Mode)) == -1)
+ return std::error_code(errno, std::generic_category());
- if (0 != access(P.begin(), R_OK | X_OK))
- return false;
- struct stat buf;
- if (0 != stat(P.begin(), &buf))
- return false;
- if (!S_ISREG(buf.st_mode))
- return false;
- return true;
+ if (Mode == AccessMode::Execute) {
+ // Don't say that directories are executable.
+ struct stat buf;
+ if (0 != stat(P.begin(), &buf))
+ return errc::permission_denied;
+ if (!S_ISREG(buf.st_mode))
+ return errc::permission_denied;
+ }
+
+ return std::error_code();
}
bool equivalent(file_status A, file_status B) {
@@ -678,6 +646,66 @@ bool home_directory(SmallVectorImpl<char> &result) {
return false;
}
+static const char *getEnvTempDir() {
+ // Check whether the temporary directory is specified by an environment
+ // variable.
+ const char *EnvironmentVariables[] = {"TMPDIR", "TMP", "TEMP", "TEMPDIR"};
+ for (const char *Env : EnvironmentVariables) {
+ if (const char *Dir = std::getenv(Env))
+ return Dir;
+ }
+
+ return nullptr;
+}
+
+static const char *getDefaultTempDir(bool ErasedOnReboot) {
+#ifdef P_tmpdir
+ if ((bool)P_tmpdir)
+ return P_tmpdir;
+#endif
+
+ if (ErasedOnReboot)
+ return "/tmp";
+ return "/var/tmp";
+}
+
+void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
+ Result.clear();
+
+ if (ErasedOnReboot) {
+ // There is no env variable for the cache directory.
+ if (const char *RequestedDir = getEnvTempDir()) {
+ Result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
+ return;
+ }
+ }
+
+#if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR)
+ // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+ // macros defined in <unistd.h> on darwin >= 9
+ int ConfName = ErasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
+ : _CS_DARWIN_USER_CACHE_DIR;
+ size_t ConfLen = confstr(ConfName, nullptr, 0);
+ if (ConfLen > 0) {
+ do {
+ Result.resize(ConfLen);
+ ConfLen = confstr(ConfName, Result.data(), Result.size());
+ } while (ConfLen > 0 && ConfLen != Result.size());
+
+ if (ConfLen > 0) {
+ assert(Result.back() == 0);
+ Result.pop_back();
+ return;
+ }
+
+ Result.clear();
+ }
+#endif
+
+ const char *RequestedDir = getDefaultTempDir(ErasedOnReboot);
+ Result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
+}
+
} // end namespace path
} // end namespace sys
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index d2c5dbc..a429bb3 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -14,15 +14,25 @@
#include "Unix.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/TimeValue.h"
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#ifdef HAVE_SYS_RESOURCE_H
#include <sys/resource.h>
#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
// DragonFlyBSD, OpenBSD, and Bitrig have deprecated <malloc.h> for
// <stdlib.h> instead. Unix.h includes this for us already.
#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \
@@ -198,6 +208,97 @@ Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut,
return std::error_code();
}
+namespace {
+class FDCloser {
+public:
+ FDCloser(int &FD) : FD(FD), KeepOpen(false) {}
+ void keepOpen() { KeepOpen = true; }
+ ~FDCloser() {
+ if (!KeepOpen && FD >= 0)
+ ::close(FD);
+ }
+
+private:
+ FDCloser(const FDCloser &) LLVM_DELETED_FUNCTION;
+ void operator=(const FDCloser &) LLVM_DELETED_FUNCTION;
+
+ int &FD;
+ bool KeepOpen;
+};
+}
+
+std::error_code Process::FixupStandardFileDescriptors() {
+ int NullFD = -1;
+ FDCloser FDC(NullFD);
+ const int StandardFDs[] = {STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO};
+ for (int StandardFD : StandardFDs) {
+ struct stat st;
+ errno = 0;
+ while (fstat(StandardFD, &st) < 0) {
+ assert(errno && "expected errno to be set if fstat failed!");
+ // fstat should return EBADF if the file descriptor is closed.
+ if (errno == EBADF)
+ break;
+ // retry fstat if we got EINTR, otherwise bubble up the failure.
+ if (errno != EINTR)
+ return std::error_code(errno, std::generic_category());
+ }
+ // if fstat succeeds, move on to the next FD.
+ if (!errno)
+ continue;
+ assert(errno == EBADF && "expected errno to have EBADF at this point!");
+
+ if (NullFD < 0) {
+ while ((NullFD = open("/dev/null", O_RDWR)) < 0) {
+ if (errno == EINTR)
+ continue;
+ return std::error_code(errno, std::generic_category());
+ }
+ }
+
+ if (NullFD == StandardFD)
+ FDC.keepOpen();
+ else if (dup2(NullFD, StandardFD) < 0)
+ return std::error_code(errno, std::generic_category());
+ }
+ return std::error_code();
+}
+
+std::error_code Process::SafelyCloseFileDescriptor(int FD) {
+ // Create a signal set filled with *all* signals.
+ sigset_t FullSet;
+ if (sigfillset(&FullSet) < 0)
+ return std::error_code(errno, std::generic_category());
+ // Atomically swap our current signal mask with a full mask.
+ sigset_t SavedSet;
+#if LLVM_ENABLE_THREADS
+ if (int EC = pthread_sigmask(SIG_SETMASK, &FullSet, &SavedSet))
+ return std::error_code(EC, std::generic_category());
+#else
+ if (sigprocmask(SIG_SETMASK, &FullSet, &SavedSet) < 0)
+ return std::error_code(errno, std::generic_category());
+#endif
+ // Attempt to close the file descriptor.
+ // We need to save the error, if one occurs, because our subsequent call to
+ // pthread_sigmask might tamper with errno.
+ int ErrnoFromClose = 0;
+ if (::close(FD) < 0)
+ ErrnoFromClose = errno;
+ // Restore the signal mask back to what we saved earlier.
+ int EC = 0;
+#if LLVM_ENABLE_THREADS
+ EC = pthread_sigmask(SIG_SETMASK, &SavedSet, nullptr);
+#else
+ if (sigprocmask(SIG_SETMASK, &SavedSet, nullptr) < 0)
+ EC = errno;
+#endif
+ // The error code from close takes precedence over the one from
+ // pthread_sigmask.
+ if (ErrnoFromClose)
+ return std::error_code(ErrnoFromClose, std::generic_category());
+ return std::error_code(EC, std::generic_category());
+}
+
bool Process::StandardInIsUserInput() {
return FileDescriptorIsDisplayed(STDIN_FILENO);
}
@@ -263,11 +364,14 @@ extern "C" int del_curterm(struct term *termp);
extern "C" int tigetnum(char *capname);
#endif
+#ifdef HAVE_TERMINFO
+static ManagedStatic<sys::Mutex> TermColorMutex;
+#endif
+
static bool terminalHasColors(int fd) {
#ifdef HAVE_TERMINFO
// First, acquire a global lock because these C routines are thread hostile.
- static sys::Mutex M;
- MutexGuard G(M);
+ MutexGuard G(*TermColorMutex);
int errret = 0;
if (setupterm((char *)nullptr, fd, &errret) != 0)
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 06a33cd..0670ad3 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -17,8 +17,10 @@
//===----------------------------------------------------------------------===//
#include "Unix.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
#include <llvm/Config/config.h>
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
@@ -53,50 +55,31 @@ using namespace sys;
ProcessInfo::ProcessInfo() : Pid(0), ReturnCode(0) {}
-// This function just uses the PATH environment variable to find the program.
-std::string
-sys::FindProgramByName(const std::string& progName) {
-
- // Check some degenerate cases
- if (progName.length() == 0) // no program
- return "";
- std::string temp = progName;
+ErrorOr<std::string> sys::findProgramByName(StringRef Name,
+ ArrayRef<StringRef> Paths) {
+ assert(!Name.empty() && "Must have a name!");
// Use the given path verbatim if it contains any slashes; this matches
// the behavior of sh(1) and friends.
- if (progName.find('/') != std::string::npos)
- return temp;
-
- // At this point, the file name is valid and does not contain slashes. Search
- // for it through the directories specified in the PATH environment variable.
+ if (Name.find('/') != StringRef::npos)
+ return std::string(Name);
- // Get the path. If its empty, we can't do anything to find it.
- const char *PathStr = getenv("PATH");
- if (!PathStr)
- return "";
+ if (Paths.empty()) {
+ SmallVector<StringRef, 16> SearchPaths;
+ SplitString(std::getenv("PATH"), SearchPaths, ":");
+ return findProgramByName(Name, SearchPaths);
+ }
- // Now we have a colon separated list of directories to search; try them.
- size_t PathLen = strlen(PathStr);
- while (PathLen) {
- // Find the first colon...
- const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
+ for (auto Path : Paths) {
+ if (Path.empty())
+ continue;
// Check to see if this first directory contains the executable...
- SmallString<128> FilePath(PathStr,Colon);
- sys::path::append(FilePath, progName);
- if (sys::fs::can_execute(Twine(FilePath)))
- return FilePath.str(); // Found the executable!
-
- // Nope it wasn't in this directory, check the next path in the list!
- PathLen -= Colon-PathStr;
- PathStr = Colon;
-
- // Advance past duplicate colons
- while (*PathStr == ':') {
- PathStr++;
- PathLen--;
- }
+ SmallString<128> FilePath(Path);
+ sys::path::append(FilePath, Name);
+ if (sys::fs::can_execute(FilePath.c_str()))
+ return std::string(FilePath.str()); // Found the executable!
}
- return "";
+ return std::errc::no_such_file_or_directory;
}
static bool RedirectIO(const StringRef *Path, int FD, std::string* ErrMsg) {
@@ -334,7 +317,6 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
pid_t ChildPid = PI.Pid;
if (WaitUntilTerminates) {
SecondsToWait = 0;
- ChildPid = -1; // mimic a wait() using waitpid()
} else if (SecondsToWait) {
// Install a timeout handler. The handler itself does nothing, but the
// simple fact of having a handler at all causes the wait below to return
@@ -440,6 +422,23 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
return std::error_code();
}
+std::error_code
+llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
+ WindowsEncodingMethod Encoding /*unused*/) {
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::F_Text);
+
+ if (EC)
+ return EC;
+
+ OS << Contents;
+
+ if (OS.has_error())
+ return std::make_error_code(std::errc::io_error);
+
+ return EC;
+}
+
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
static long ArgMax = sysconf(_SC_ARG_MAX);
@@ -448,13 +447,13 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
return true;
// Conservatively account for space required by environment variables.
- ArgMax /= 2;
+ long HalfArgMax = ArgMax / 2;
size_t ArgLength = 0;
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
ArgLength += strlen(*I) + 1;
- if (ArgLength > size_t(ArgMax)) {
+ if (ArgLength > size_t(HalfArgMax)) {
return false;
}
}
diff --git a/lib/Support/Unix/RWMutex.inc b/lib/Support/Unix/RWMutex.inc
index edcbd52..85a1043 100644
--- a/lib/Support/Unix/RWMutex.inc
+++ b/lib/Support/Unix/RWMutex.inc
@@ -26,26 +26,26 @@ using namespace sys;
// will therefore deadlock if a thread tries to acquire a read lock
// multiple times.
-RWMutexImpl::RWMutexImpl() : data_(new Mutex(false)) { }
+RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { }
RWMutexImpl::~RWMutexImpl() {
- delete static_cast<Mutex *>(data_);
+ delete static_cast<MutexImpl *>(data_);
}
bool RWMutexImpl::reader_acquire() {
- return static_cast<Mutex *>(data_)->acquire();
+ return static_cast<MutexImpl *>(data_)->acquire();
}
bool RWMutexImpl::reader_release() {
- return static_cast<Mutex *>(data_)->release();
+ return static_cast<MutexImpl *>(data_)->release();
}
bool RWMutexImpl::writer_acquire() {
- return static_cast<Mutex *>(data_)->acquire();
+ return static_cast<MutexImpl *>(data_)->acquire();
}
bool RWMutexImpl::writer_release() {
- return static_cast<Mutex *>(data_)->release();
+ return static_cast<MutexImpl *>(data_)->release();
}
}
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 1841fea..e8f4643 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -14,7 +14,14 @@
#include "Unix.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/UniqueLock.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
#include <vector>
@@ -36,18 +43,22 @@
#if HAVE_MACH_MACH_H
#include <mach/mach.h>
#endif
+#if HAVE_LINK_H
+#include <link.h>
+#endif
using namespace llvm;
static RETSIGTYPE SignalHandler(int Sig); // defined below.
-static SmartMutex<true> SignalsMutex;
+static ManagedStatic<SmartMutex<true> > SignalsMutex;
/// InterruptFunction - The function to call if ctrl-c is pressed.
static void (*InterruptFunction)() = nullptr;
-static std::vector<std::string> FilesToRemove;
-static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
+static ManagedStatic<std::vector<std::string>> FilesToRemove;
+static ManagedStatic<std::vector<std::pair<void (*)(void *), void *>>>
+ CallBacksToRun;
// IntSigs - Signals that represent requested termination. There's no bug
// or failure, or if there is, it's not our direct responsibility. For whatever
@@ -55,7 +66,6 @@ static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
static const int IntSigs[] = {
SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
};
-static const int *const IntSigsEnd = std::end(IntSigs);
// KillSigs - Signals that represent that we have a bug, and our prompt
// termination has been ordered.
@@ -74,7 +84,6 @@ static const int KillSigs[] = {
, SIGEMT
#endif
};
-static const int *const KillSigsEnd = std::end(KillSigs);
static unsigned NumRegisteredSignals = 0;
static struct {
@@ -105,8 +114,8 @@ static void RegisterHandlers() {
// If the handlers are already registered, we're done.
if (NumRegisteredSignals != 0) return;
- std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
- std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
+ for (auto S : IntSigs) RegisterHandler(S);
+ for (auto S : KillSigs) RegisterHandler(S);
}
static void UnregisterHandlers() {
@@ -125,11 +134,12 @@ static void UnregisterHandlers() {
static void RemoveFilesToRemove() {
// We avoid iterators in case of debug iterators that allocate or release
// memory.
- for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) {
+ std::vector<std::string>& FilesToRemoveRef = *FilesToRemove;
+ for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i) {
// We rely on a std::string implementation for which repeated calls to
// 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these
// strings to try to ensure this is safe.
- const char *path = FilesToRemove[i].c_str();
+ const char *path = FilesToRemoveRef[i].c_str();
// Get the status so we can determine if it's a file or directory. If we
// can't stat the file, ignore it.
@@ -162,28 +172,31 @@ static RETSIGTYPE SignalHandler(int Sig) {
sigfillset(&SigMask);
sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
- SignalsMutex.acquire();
- RemoveFilesToRemove();
-
- if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
- if (InterruptFunction) {
- void (*IF)() = InterruptFunction;
- SignalsMutex.release();
- InterruptFunction = nullptr;
- IF(); // run the interrupt function.
+ {
+ unique_lock<SmartMutex<true>> Guard(*SignalsMutex);
+ RemoveFilesToRemove();
+
+ if (std::find(std::begin(IntSigs), std::end(IntSigs), Sig)
+ != std::end(IntSigs)) {
+ if (InterruptFunction) {
+ void (*IF)() = InterruptFunction;
+ Guard.unlock();
+ InterruptFunction = nullptr;
+ IF(); // run the interrupt function.
+ return;
+ }
+
+ Guard.unlock();
+ raise(Sig); // Execute the default handler.
return;
- }
-
- SignalsMutex.release();
- raise(Sig); // Execute the default handler.
- return;
+ }
}
- SignalsMutex.release();
-
// Otherwise if it is a fault (like SEGV) run any handler.
- for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
- CallBacksToRun[i].first(CallBacksToRun[i].second);
+ std::vector<std::pair<void (*)(void *), void *>>& CallBacksToRunRef =
+ *CallBacksToRun;
+ for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+ CallBacksToRunRef[i].first(CallBacksToRunRef[i].second);
#ifdef __s390__
// On S/390, certain signals are delivered with PSW Address pointing to
@@ -196,37 +209,39 @@ static RETSIGTYPE SignalHandler(int Sig) {
}
void llvm::sys::RunInterruptHandlers() {
- SignalsMutex.acquire();
+ sys::SmartScopedLock<true> Guard(*SignalsMutex);
RemoveFilesToRemove();
- SignalsMutex.release();
}
void llvm::sys::SetInterruptFunction(void (*IF)()) {
- SignalsMutex.acquire();
- InterruptFunction = IF;
- SignalsMutex.release();
+ {
+ sys::SmartScopedLock<true> Guard(*SignalsMutex);
+ InterruptFunction = IF;
+ }
RegisterHandlers();
}
// RemoveFileOnSignal - The public API
bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
std::string* ErrMsg) {
- SignalsMutex.acquire();
- std::string *OldPtr = FilesToRemove.empty() ? nullptr : &FilesToRemove[0];
- FilesToRemove.push_back(Filename);
-
- // We want to call 'c_str()' on every std::string in this vector so that if
- // the underlying implementation requires a re-allocation, it happens here
- // rather than inside of the signal handler. If we see the vector grow, we
- // have to call it on every entry. If it remains in place, we only need to
- // call it on the latest one.
- if (OldPtr == &FilesToRemove[0])
- FilesToRemove.back().c_str();
- else
- for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i)
- FilesToRemove[i].c_str();
-
- SignalsMutex.release();
+ {
+ sys::SmartScopedLock<true> Guard(*SignalsMutex);
+ std::vector<std::string>& FilesToRemoveRef = *FilesToRemove;
+ std::string *OldPtr =
+ FilesToRemoveRef.empty() ? nullptr : &FilesToRemoveRef[0];
+ FilesToRemoveRef.push_back(Filename);
+
+ // We want to call 'c_str()' on every std::string in this vector so that if
+ // the underlying implementation requires a re-allocation, it happens here
+ // rather than inside of the signal handler. If we see the vector grow, we
+ // have to call it on every entry. If it remains in place, we only need to
+ // call it on the latest one.
+ if (OldPtr == &FilesToRemoveRef[0])
+ FilesToRemoveRef.back().c_str();
+ else
+ for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i)
+ FilesToRemoveRef[i].c_str();
+ }
RegisterHandlers();
return false;
@@ -234,31 +249,166 @@ bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
// DontRemoveFileOnSignal - The public API
void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) {
- SignalsMutex.acquire();
+ sys::SmartScopedLock<true> Guard(*SignalsMutex);
std::vector<std::string>::reverse_iterator RI =
- std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
- std::vector<std::string>::iterator I = FilesToRemove.end();
- if (RI != FilesToRemove.rend())
- I = FilesToRemove.erase(RI.base()-1);
+ std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+ std::vector<std::string>::iterator I = FilesToRemove->end();
+ if (RI != FilesToRemove->rend())
+ I = FilesToRemove->erase(RI.base()-1);
// We need to call c_str() on every element which would have been moved by
// the erase. These elements, in a C++98 implementation where c_str()
// requires a reallocation on the first call may have had the call to c_str()
// made on insertion become invalid by being copied down an element.
- for (std::vector<std::string>::iterator E = FilesToRemove.end(); I != E; ++I)
+ for (std::vector<std::string>::iterator E = FilesToRemove->end(); I != E; ++I)
I->c_str();
-
- SignalsMutex.release();
}
/// AddSignalHandler - Add a function to be called when a signal is delivered
/// to the process. The handler can have a cookie passed to it to identify
/// what instance of the handler it is.
void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
- CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
+ CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
RegisterHandlers();
}
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+
+#if HAVE_LINK_H && (defined(__linux__) || defined(__FreeBSD__) || \
+ defined(__FreeBSD_kernel__) || defined(__NetBSD__))
+struct DlIteratePhdrData {
+ void **StackTrace;
+ int depth;
+ bool first;
+ const char **modules;
+ intptr_t *offsets;
+ const char *main_exec_name;
+};
+
+static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
+ DlIteratePhdrData *data = (DlIteratePhdrData*)arg;
+ const char *name = data->first ? data->main_exec_name : info->dlpi_name;
+ data->first = false;
+ for (int i = 0; i < info->dlpi_phnum; i++) {
+ const auto *phdr = &info->dlpi_phdr[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+ intptr_t beg = info->dlpi_addr + phdr->p_vaddr;
+ intptr_t end = beg + phdr->p_memsz;
+ for (int j = 0; j < data->depth; j++) {
+ if (data->modules[j])
+ continue;
+ intptr_t addr = (intptr_t)data->StackTrace[j];
+ if (beg <= addr && addr < end) {
+ data->modules[j] = name;
+ data->offsets[j] = addr - info->dlpi_addr;
+ }
+ }
+ }
+ return 0;
+}
+
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+ const char **Modules, intptr_t *Offsets,
+ const char *MainExecutableName) {
+ DlIteratePhdrData data = {StackTrace, Depth, true,
+ Modules, Offsets, MainExecutableName};
+ dl_iterate_phdr(dl_iterate_phdr_cb, &data);
+ return true;
+}
+#else
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+ const char **Modules, intptr_t *Offsets,
+ const char *MainExecutableName) {
+ return false;
+}
+#endif
+
+static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
+ // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
+ // into actual instruction addresses.
+ // Use llvm-symbolizer tool to symbolize the stack traces.
+ ErrorOr<std::string> LLVMSymbolizerPathOrErr =
+ sys::findProgramByName("llvm-symbolizer");
+ if (!LLVMSymbolizerPathOrErr)
+ return false;
+ const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+ // We don't know argv0 or the address of main() at this point, but try
+ // to guess it anyway (it's possible on some platforms).
+ std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
+ if (MainExecutableName.empty() ||
+ MainExecutableName.find("llvm-symbolizer") != std::string::npos)
+ return false;
+
+ std::vector<const char *> Modules(Depth, nullptr);
+ std::vector<intptr_t> Offsets(Depth, 0);
+ if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
+ MainExecutableName.c_str()))
+ return false;
+ int InputFD;
+ SmallString<32> InputFile, OutputFile;
+ sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
+ sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile);
+ FileRemover InputRemover(InputFile.c_str());
+ FileRemover OutputRemover(OutputFile.c_str());
+
+ {
+ raw_fd_ostream Input(InputFD, true);
+ for (int i = 0; i < Depth; i++) {
+ if (Modules[i])
+ Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
+ }
+ }
+
+ StringRef InputFileStr(InputFile);
+ StringRef OutputFileStr(OutputFile);
+ StringRef StderrFileStr;
+ const StringRef *Redirects[] = {&InputFileStr, &OutputFileStr,
+ &StderrFileStr};
+ const char *Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
+ "--demangle", nullptr};
+ int RunResult =
+ sys::ExecuteAndWait(LLVMSymbolizerPath, Args, nullptr, Redirects);
+ if (RunResult != 0)
+ return false;
+
+ auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
+ if (!OutputBuf)
+ return false;
+ StringRef Output = OutputBuf.get()->getBuffer();
+ SmallVector<StringRef, 32> Lines;
+ Output.split(Lines, "\n");
+ auto CurLine = Lines.begin();
+ int frame_no = 0;
+ for (int i = 0; i < Depth; i++) {
+ if (!Modules[i]) {
+ fprintf(FD, "#%d %p\n", frame_no++, StackTrace[i]);
+ continue;
+ }
+ // Read pairs of lines (function name and file/line info) until we
+ // encounter empty line.
+ for (;;) {
+ if (CurLine == Lines.end())
+ return false;
+ StringRef FunctionName = *CurLine++;
+ if (FunctionName.empty())
+ break;
+ fprintf(FD, "#%d %p ", frame_no++, StackTrace[i]);
+ if (!FunctionName.startswith("??"))
+ fprintf(FD, "%s ", FunctionName.str().c_str());
+ if (CurLine == Lines.end())
+ return false;
+ StringRef FileLineInfo = *CurLine++;
+ if (!FileLineInfo.startswith("??"))
+ fprintf(FD, "%s", FileLineInfo.str().c_str());
+ else
+ fprintf(FD, "(%s+%p)", Modules[i], (void *)Offsets[i]);
+ fprintf(FD, "\n");
+ }
+ }
+ return true;
+}
+#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
// PrintStackTrace - In the case of a program crash or fault, print out a stack
// trace so that the user has an indication of why and where we died.
@@ -271,6 +421,8 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
// Use backtrace() to output a backtrace on Linux systems with glibc.
int depth = backtrace(StackTrace,
static_cast<int>(array_lengthof(StackTrace)));
+ if (printSymbolizedStackTrace(StackTrace, depth, FD))
+ return;
#if HAVE_DLFCN_H && __GNUG__
int width = 0;
for (int i = 0; i < depth; ++i) {
diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc
index 7d4acf7..042e0da 100644
--- a/lib/Support/Unix/TimeValue.inc
+++ b/lib/Support/Unix/TimeValue.inc
@@ -41,7 +41,7 @@ TimeValue TimeValue::now() {
// errors concern the timezone parameter which we're passing in as 0.
// In the unlikely case it does happen, just return MinTime, no error
// message needed.
- return MinTime;
+ return MinTime();
}
return TimeValue(
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
index ba688e3..e16a226 100644
--- a/lib/Support/Unix/Unix.h
+++ b/lib/Support/Unix/Unix.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYSTEM_UNIX_UNIX_H
-#define LLVM_SYSTEM_UNIX_UNIX_H
+#ifndef LLVM_LIB_SUPPORT_UNIX_UNIX_H
+#define LLVM_LIB_SUPPORT_UNIX_UNIX_H
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only generic UNIX code that
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 5ed0b70..79d5f79 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -41,32 +41,11 @@ using namespace sys;
static DenseSet<HMODULE> *OpenedHandles;
-extern "C" {
-
- static BOOL CALLBACK ELM_Callback(WIN32_ELMCB_PCSTR ModuleName,
- ULONG_PTR ModuleBase,
- ULONG ModuleSize,
- PVOID UserContext)
- {
- // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded
- // into the process.
- if (stricmp(ModuleName, "msvci70") != 0 &&
- stricmp(ModuleName, "msvcirt") != 0 &&
- stricmp(ModuleName, "msvcp50") != 0 &&
- stricmp(ModuleName, "msvcp60") != 0 &&
- stricmp(ModuleName, "msvcp70") != 0 &&
- stricmp(ModuleName, "msvcr70") != 0 &&
-#ifndef __MINGW32__
- // Mingw32 uses msvcrt.dll by default. Don't ignore it.
- // Otherwise the user should be aware what they are doing.
- stricmp(ModuleName, "msvcrt") != 0 &&
-#endif
- stricmp(ModuleName, "msvcrt20") != 0 &&
- stricmp(ModuleName, "msvcrt40") != 0) {
- OpenedHandles->insert((HMODULE)ModuleBase);
- }
- return TRUE;
- }
+static BOOL CALLBACK
+ELM_Callback(WIN32_ELMCB_PCSTR ModuleName, ULONG_PTR ModuleBase,
+ ULONG ModuleSize, PVOID UserContext) {
+ OpenedHandles->insert((HMODULE)ModuleBase);
+ return TRUE;
}
DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
@@ -115,10 +94,24 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
extern "C" { extern void *SYM; }
#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO)
+#ifdef _M_IX86
+// Win32 on x86 implements certain single-precision math functions as macros.
+// These functions are not exported by the DLL, but will still be needed
+// for symbol-resolution by the JIT loader. Therefore, this Support libray
+// provides helper functions with the same implementation.
+
+#define INLINE_DEF_SYMBOL1(TYP, SYM) \
+ extern "C" TYP inline_##SYM(TYP _X) { return SYM(_X); }
+#define INLINE_DEF_SYMBOL2(TYP, SYM) \
+ extern "C" TYP inline_##SYM(TYP _X, TYP _Y) { return SYM(_X, _Y); }
+#endif
+
#include "explicit_symbols.inc"
#undef EXPLICIT_SYMBOL
#undef EXPLICIT_SYMBOL2
+#undef INLINE_DEF_SYMBOL1
+#undef INLINE_DEF_SYMBOL2
void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
SmartScopedLock<true> Lock(*SymbolsMutex);
@@ -142,22 +135,32 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
}
}
- #define EXPLICIT_SYMBOL(SYM) \
- if (!strcmp(symbolName, #SYM)) return (void*)&SYM;
- #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \
- if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO;
+#define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) \
+ return (void *)&SYM;
+#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \
+ if (!strcmp(symbolName, #SYMFROM)) \
+ return (void *)&SYMTO;
+
+#ifdef _M_IX86
+#define INLINE_DEF_SYMBOL1(TYP, SYM) \
+ if (!strcmp(symbolName, #SYM)) \
+ return (void *)&inline_##SYM;
+#define INLINE_DEF_SYMBOL2(TYP, SYM) INLINE_DEF_SYMBOL1(TYP, SYM)
+#endif
{
- #include "explicit_symbols.inc"
+#include "explicit_symbols.inc"
}
- #undef EXPLICIT_SYMBOL
- #undef EXPLICIT_SYMBOL2
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+#undef INLINE_DEF_SYMBOL1
+#undef INLINE_DEF_SYMBOL2
return 0;
}
-
void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
if (!isValid())
return NULL;
@@ -166,5 +169,4 @@ void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName);
}
-
}
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
index 0c02bf9..fe89fe0a 100644
--- a/lib/Support/Windows/Host.inc
+++ b/lib/Support/Windows/Host.inc
@@ -1,4 +1,4 @@
-//===- llvm/Support/Win32/Host.inc -------------------------------*- C++ -*-===//
+//===- llvm/Support/Win32/Host.inc ------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 7a1bc04..365031c 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -49,23 +49,6 @@ static std::error_code windows_error(DWORD E) {
return mapWindowsError(E);
}
-static std::error_code TempDir(SmallVectorImpl<char> &Result) {
- SmallVector<wchar_t, 64> Res;
-retry_temp_dir:
- DWORD Len = ::GetTempPathW(Res.capacity(), Res.begin());
-
- if (Len == 0)
- return windows_error(::GetLastError());
-
- if (Len > Res.capacity()) {
- Res.reserve(Len);
- goto retry_temp_dir;
- }
-
- Res.set_size(Len);
- return UTF16ToUTF8(Res.begin(), Res.size(), Result);
-}
-
static bool is_separator(const wchar_t value) {
switch (value) {
case L'\\':
@@ -76,6 +59,59 @@ static bool is_separator(const wchar_t value) {
}
}
+// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the
+// path is longer than CreateDirectory can tolerate, make it absolute and
+// prefixed by '\\?\'.
+static std::error_code widenPath(const Twine &Path8,
+ SmallVectorImpl<wchar_t> &Path16) {
+ const size_t MaxDirLen = MAX_PATH - 12; // Must leave room for 8.3 filename.
+
+ // Several operations would convert Path8 to SmallString; more efficient to
+ // do it once up front.
+ SmallString<128> Path8Str;
+ Path8.toVector(Path8Str);
+
+ // If we made this path absolute, how much longer would it get?
+ size_t CurPathLen;
+ if (llvm::sys::path::is_absolute(Twine(Path8Str)))
+ CurPathLen = 0; // No contribution from current_path needed.
+ else {
+ CurPathLen = ::GetCurrentDirectoryW(0, NULL);
+ if (CurPathLen == 0)
+ return windows_error(::GetLastError());
+ }
+
+ // Would the absolute path be longer than our limit?
+ if ((Path8Str.size() + CurPathLen) >= MaxDirLen &&
+ !Path8Str.startswith("\\\\?\\")) {
+ SmallString<2*MAX_PATH> FullPath("\\\\?\\");
+ if (CurPathLen) {
+ SmallString<80> CurPath;
+ if (std::error_code EC = llvm::sys::fs::current_path(CurPath))
+ return EC;
+ FullPath.append(CurPath);
+ }
+ // Traverse the requested path, canonicalizing . and .. as we go (because
+ // the \\?\ prefix is documented to treat them as real components).
+ // The iterators don't report separators and append() always attaches
+ // preferred_separator so we don't need to call native() on the result.
+ for (llvm::sys::path::const_iterator I = llvm::sys::path::begin(Path8Str),
+ E = llvm::sys::path::end(Path8Str);
+ I != E; ++I) {
+ if (I->size() == 1 && *I == ".")
+ continue;
+ if (I->size() == 2 && *I == "..")
+ llvm::sys::path::remove_filename(FullPath);
+ else
+ llvm::sys::path::append(FullPath, *I);
+ }
+ return UTF8ToUTF16(FullPath, Path16);
+ }
+
+ // Just use the caller's original path.
+ return UTF8ToUTF16(Path8Str, Path16);
+}
+
namespace llvm {
namespace sys {
namespace fs {
@@ -147,11 +183,9 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
}
std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
- SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
- if (std::error_code ec =
- UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+ if (std::error_code ec = widenPath(path, path_utf16))
return ec;
if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
@@ -163,25 +197,14 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
return std::error_code();
}
-std::error_code normalize_separators(SmallVectorImpl<char> &Path) {
- (void) Path;
- return std::error_code();
-}
-
// We can't use symbolic links for windows.
std::error_code create_link(const Twine &to, const Twine &from) {
- // Get arguments.
- SmallString<128> from_storage;
- SmallString<128> to_storage;
- StringRef f = from.toStringRef(from_storage);
- StringRef t = to.toStringRef(to_storage);
-
// Convert to utf-16.
SmallVector<wchar_t, 128> wide_from;
SmallVector<wchar_t, 128> wide_to;
- if (std::error_code ec = UTF8ToUTF16(f, wide_from))
+ if (std::error_code ec = widenPath(from, wide_from))
return ec;
- if (std::error_code ec = UTF8ToUTF16(t, wide_to))
+ if (std::error_code ec = widenPath(to, wide_to))
return ec;
if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
@@ -191,7 +214,6 @@ std::error_code create_link(const Twine &to, const Twine &from) {
}
std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
- SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
file_status ST;
@@ -201,8 +223,7 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
return std::error_code();
}
- if (std::error_code ec =
- UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+ if (std::error_code ec = widenPath(path, path_utf16))
return ec;
if (ST.type() == file_type::directory_file) {
@@ -222,18 +243,12 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
}
std::error_code rename(const Twine &from, const Twine &to) {
- // Get arguments.
- SmallString<128> from_storage;
- SmallString<128> to_storage;
- StringRef f = from.toStringRef(from_storage);
- StringRef t = to.toStringRef(to_storage);
-
// Convert to utf-16.
SmallVector<wchar_t, 128> wide_from;
SmallVector<wchar_t, 128> wide_to;
- if (std::error_code ec = UTF8ToUTF16(f, wide_from))
+ if (std::error_code ec = widenPath(from, wide_from))
return ec;
- if (std::error_code ec = UTF8ToUTF16(t, wide_to))
+ if (std::error_code ec = widenPath(to, wide_to))
return ec;
std::error_code ec = std::error_code();
@@ -254,11 +269,9 @@ std::error_code rename(const Twine &from, const Twine &to) {
}
std::error_code resize_file(const Twine &path, uint64_t size) {
- SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
- if (std::error_code ec =
- UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+ if (std::error_code ec = widenPath(path, path_utf16))
return ec;
int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE);
@@ -273,49 +286,27 @@ std::error_code resize_file(const Twine &path, uint64_t size) {
return std::error_code(error, std::generic_category());
}
-std::error_code exists(const Twine &path, bool &result) {
- SmallString<128> path_storage;
- SmallVector<wchar_t, 128> path_utf16;
+std::error_code access(const Twine &Path, AccessMode Mode) {
+ SmallVector<wchar_t, 128> PathUtf16;
- if (std::error_code ec =
- UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
- return ec;
+ if (std::error_code EC = widenPath(Path, PathUtf16))
+ return EC;
- DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+ DWORD Attributes = ::GetFileAttributesW(PathUtf16.begin());
- if (attributes == INVALID_FILE_ATTRIBUTES) {
+ if (Attributes == INVALID_FILE_ATTRIBUTES) {
// See if the file didn't actually exist.
DWORD LastError = ::GetLastError();
if (LastError != ERROR_FILE_NOT_FOUND &&
LastError != ERROR_PATH_NOT_FOUND)
return windows_error(LastError);
- result = false;
- } else
- result = true;
- return std::error_code();
-}
-
-bool can_write(const Twine &Path) {
- // FIXME: take security attributes into account.
- SmallString<128> PathStorage;
- SmallVector<wchar_t, 128> PathUtf16;
-
- if (UTF8ToUTF16(Path.toStringRef(PathStorage), PathUtf16))
- return false;
-
- DWORD Attr = ::GetFileAttributesW(PathUtf16.begin());
- return (Attr != INVALID_FILE_ATTRIBUTES) && !(Attr & FILE_ATTRIBUTE_READONLY);
-}
+ return errc::no_such_file_or_directory;
+ }
-bool can_execute(const Twine &Path) {
- SmallString<128> PathStorage;
- SmallVector<wchar_t, 128> PathUtf16;
+ if (Mode == AccessMode::Write && (Attributes & FILE_ATTRIBUTE_READONLY))
+ return errc::permission_denied;
- if (UTF8ToUTF16(Path.toStringRef(PathStorage), PathUtf16))
- return false;
-
- DWORD Attr = ::GetFileAttributesW(PathUtf16.begin());
- return Attr != INVALID_FILE_ATTRIBUTES;
+ return std::error_code();
}
bool equivalent(file_status A, file_status B) {
@@ -424,7 +415,7 @@ std::error_code status(const Twine &path, file_status &result) {
return std::error_code();
}
- if (std::error_code ec = UTF8ToUTF16(path8, path_utf16))
+ if (std::error_code ec = widenPath(path8, path_utf16))
return ec;
DWORD attr = ::GetFileAttributesW(path_utf16.begin());
@@ -567,11 +558,10 @@ mapped_file_region::mapped_file_region(const Twine &path,
, FileDescriptor()
, FileHandle(INVALID_HANDLE_VALUE)
, FileMappingHandle() {
- SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
// Convert path to UTF-16.
- if ((ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)))
+ if ((ec = widenPath(path, path_utf16)))
return;
// Get file handle for creating a file mapping.
@@ -677,7 +667,7 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallVector<wchar_t, 128> path_utf16;
- if (std::error_code ec = UTF8ToUTF16(path, path_utf16))
+ if (std::error_code ec = widenPath(path, path_utf16))
return ec;
// Convert path to the format that Windows is happy with.
@@ -760,11 +750,9 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
}
std::error_code openFileForRead(const Twine &Name, int &ResultFD) {
- SmallString<128> PathStorage;
SmallVector<wchar_t, 128> PathUTF16;
- if (std::error_code EC =
- UTF8ToUTF16(Name.toStringRef(PathStorage), PathUTF16))
+ if (std::error_code EC = widenPath(Name, PathUTF16))
return EC;
HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_READ,
@@ -799,11 +787,9 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) &&
"Cannot specify both 'excl' and 'append' file creation flags!");
- SmallString<128> PathStorage;
SmallVector<wchar_t, 128> PathUTF16;
- if (std::error_code EC =
- UTF8ToUTF16(Name.toStringRef(PathStorage), PathUTF16))
+ if (std::error_code EC = widenPath(Name, PathUTF16))
return EC;
DWORD CreationDisposition;
@@ -867,6 +853,51 @@ bool home_directory(SmallVectorImpl<char> &result) {
return true;
}
+static bool getTempDirEnvVar(const char *Var, SmallVectorImpl<char> &Res) {
+ SmallVector<wchar_t, 128> NameUTF16;
+ if (windows::UTF8ToUTF16(Var, NameUTF16))
+ return false;
+
+ SmallVector<wchar_t, 1024> Buf;
+ size_t Size = 1024;
+ do {
+ Buf.reserve(Size);
+ Size =
+ GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity());
+ if (Size == 0)
+ return false;
+
+ // Try again with larger buffer.
+ } while (Size > Buf.capacity());
+ Buf.set_size(Size);
+
+ if (windows::UTF16ToUTF8(Buf.data(), Size, Res))
+ return false;
+ return true;
+}
+
+static bool getTempDirEnvVar(SmallVectorImpl<char> &Res) {
+ const char *EnvironmentVariables[] = {"TMP", "TEMP", "USERPROFILE"};
+ for (const char *Env : EnvironmentVariables) {
+ if (getTempDirEnvVar(Env, Res))
+ return true;
+ }
+ return false;
+}
+
+void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
+ (void)ErasedOnReboot;
+ Result.clear();
+
+ // Check whether the temporary directory is specified by an environment
+ // variable.
+ if (getTempDirEnvVar(Result))
+ return;
+
+ // Fall back to a system default.
+ const char *DefaultResult = "C:\\TEMP";
+ Result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
+}
} // end namespace path
namespace windows {
@@ -896,11 +927,13 @@ std::error_code UTF8ToUTF16(llvm::StringRef utf8,
return std::error_code();
}
-std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
- llvm::SmallVectorImpl<char> &utf8) {
+static
+std::error_code UTF16ToCodePage(unsigned codepage, const wchar_t *utf16,
+ size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
if (utf16_len) {
// Get length.
- int len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.begin(),
+ int len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.begin(),
0, NULL, NULL);
if (len == 0)
@@ -910,7 +943,7 @@ std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
utf8.set_size(len);
// Now do the actual conversion.
- len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.data(),
+ len = ::WideCharToMultiByte(codepage, 0, utf16, utf16_len, utf8.data(),
utf8.size(), NULL, NULL);
if (len == 0)
@@ -923,6 +956,16 @@ std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
return std::error_code();
}
+
+std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
+ return UTF16ToCodePage(CP_UTF8, utf16, utf16_len, utf8);
+}
+
+std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
+ return UTF16ToCodePage(CP_ACP, utf16, utf16_len, utf8);
+}
} // end namespace windows
} // end namespace sys
} // end namespace llvm
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 81aee0e..3819e63 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -183,36 +183,103 @@ static std::error_code windows_error(DWORD E) {
return mapWindowsError(E);
}
+static void AllocateAndPush(const SmallVectorImpl<char> &S,
+ SmallVectorImpl<const char *> &Vector,
+ SpecificBumpPtrAllocator<char> &Allocator) {
+ char *Buffer = Allocator.Allocate(S.size() + 1);
+ ::memcpy(Buffer, S.data(), S.size());
+ Buffer[S.size()] = '\0';
+ Vector.push_back(Buffer);
+}
+
+/// Convert Arg from UTF-16 to UTF-8 and push it onto Args.
+static std::error_code
+ConvertAndPushArg(const wchar_t *Arg, SmallVectorImpl<const char *> &Args,
+ SpecificBumpPtrAllocator<char> &Allocator) {
+ SmallVector<char, MAX_PATH> ArgString;
+ if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), ArgString))
+ return ec;
+ AllocateAndPush(ArgString, Args, Allocator);
+ return std::error_code();
+}
+
+/// \brief Perform wildcard expansion of Arg, or just push it into Args if it
+/// doesn't have wildcards or doesn't match any files.
+static std::error_code
+WildcardExpand(const wchar_t *Arg, SmallVectorImpl<const char *> &Args,
+ SpecificBumpPtrAllocator<char> &Allocator) {
+ if (!wcspbrk(Arg, L"*?")) {
+ // Arg does not contain any wildcard characters. This is the common case.
+ return ConvertAndPushArg(Arg, Args, Allocator);
+ }
+
+ if (wcscmp(Arg, L"/?") == 0 || wcscmp(Arg, L"-?") == 0) {
+ // Don't wildcard expand /?. Always treat it as an option.
+ return ConvertAndPushArg(Arg, Args, Allocator);
+ }
+
+ // Extract any directory part of the argument.
+ SmallVector<char, MAX_PATH> Dir;
+ if (std::error_code ec = windows::UTF16ToUTF8(Arg, wcslen(Arg), Dir))
+ return ec;
+ sys::path::remove_filename(Dir);
+ const int DirSize = Dir.size();
+
+ // Search for matching files.
+ WIN32_FIND_DATAW FileData;
+ HANDLE FindHandle = FindFirstFileW(Arg, &FileData);
+ if (FindHandle == INVALID_HANDLE_VALUE) {
+ return ConvertAndPushArg(Arg, Args, Allocator);
+ }
+
+ std::error_code ec;
+ do {
+ SmallVector<char, MAX_PATH> FileName;
+ ec = windows::UTF16ToUTF8(FileData.cFileName, wcslen(FileData.cFileName),
+ FileName);
+ if (ec)
+ break;
+
+ // Push the filename onto Dir, and remove it afterwards.
+ llvm::sys::path::append(Dir, StringRef(FileName.data(), FileName.size()));
+ AllocateAndPush(Dir, Args, Allocator);
+ Dir.resize(DirSize);
+ } while (FindNextFileW(FindHandle, &FileData));
+
+ FindClose(FindHandle);
+ return ec;
+}
+
std::error_code
Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
ArrayRef<const char *>,
SpecificBumpPtrAllocator<char> &ArgAllocator) {
- int NewArgCount;
- std::error_code ec;
-
- wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(),
- &NewArgCount);
+ int ArgCount;
+ wchar_t **UnicodeCommandLine =
+ CommandLineToArgvW(GetCommandLineW(), &ArgCount);
if (!UnicodeCommandLine)
return windows_error(::GetLastError());
- Args.reserve(NewArgCount);
+ Args.reserve(ArgCount);
+ std::error_code ec;
- for (int i = 0; i < NewArgCount; ++i) {
- SmallVector<char, MAX_PATH> NewArgString;
- ec = windows::UTF16ToUTF8(UnicodeCommandLine[i],
- wcslen(UnicodeCommandLine[i]),
- NewArgString);
+ for (int i = 0; i < ArgCount; ++i) {
+ ec = WildcardExpand(UnicodeCommandLine[i], Args, ArgAllocator);
if (ec)
break;
-
- char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1);
- ::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1);
- Args.push_back(Buffer);
}
+
LocalFree(UnicodeCommandLine);
- if (ec)
- return ec;
+ return ec;
+}
+
+std::error_code Process::FixupStandardFileDescriptors() {
+ return std::error_code();
+}
+std::error_code Process::SafelyCloseFileDescriptor(int FD) {
+ if (::close(FD) < 0)
+ return std::error_code(errno, std::generic_category());
return std::error_code();
}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index b2f71ae..72c2a58 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -12,7 +12,11 @@
//===----------------------------------------------------------------------===//
#include "WindowsSupport.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/WindowsError.h"
#include <cstdio>
#include <fcntl.h>
#include <io.h>
@@ -28,43 +32,67 @@ using namespace sys;
ProcessInfo::ProcessInfo() : ProcessHandle(0), Pid(0), ReturnCode(0) {}
-// This function just uses the PATH environment variable to find the program.
-std::string sys::FindProgramByName(const std::string &progName) {
- // Check some degenerate cases
- if (progName.length() == 0) // no program
- return "";
- std::string temp = progName;
- // Return paths with slashes verbatim.
- if (progName.find('\\') != std::string::npos ||
- progName.find('/') != std::string::npos)
- return temp;
-
- // At this point, the file name is valid and does not contain slashes.
- // Let Windows search for it.
- SmallVector<wchar_t, MAX_PATH> progNameUnicode;
- if (windows::UTF8ToUTF16(progName, progNameUnicode))
- return "";
-
- SmallVector<wchar_t, MAX_PATH> buffer;
- DWORD len = MAX_PATH;
- do {
- buffer.reserve(len);
- len = ::SearchPathW(NULL, progNameUnicode.data(), L".exe",
- buffer.capacity(), buffer.data(), NULL);
-
- // See if it wasn't found.
- if (len == 0)
- return "";
-
- // Buffer was too small; grow and retry.
- } while (len > buffer.capacity());
-
- buffer.set_size(len);
- SmallVector<char, MAX_PATH> result;
- if (windows::UTF16ToUTF8(buffer.begin(), buffer.size(), result))
- return "";
-
- return std::string(result.data(), result.size());
+ErrorOr<std::string> sys::findProgramByName(StringRef Name,
+ ArrayRef<StringRef> Paths) {
+ assert(!Name.empty() && "Must have a name!");
+
+ if (Name.find_first_of("/\\") != StringRef::npos)
+ return std::string(Name);
+
+ const wchar_t *Path = nullptr;
+ std::wstring PathStorage;
+ if (!Paths.empty()) {
+ PathStorage.reserve(Paths.size() * MAX_PATH);
+ for (unsigned i = 0; i < Paths.size(); ++i) {
+ if (i)
+ PathStorage.push_back(L';');
+ StringRef P = Paths[i];
+ SmallVector<wchar_t, MAX_PATH> TmpPath;
+ if (std::error_code EC = windows::UTF8ToUTF16(P, TmpPath))
+ return EC;
+ PathStorage.append(TmpPath.begin(), TmpPath.end());
+ }
+ Path = PathStorage.c_str();
+ }
+
+ SmallVector<wchar_t, MAX_PATH> U16Name;
+ if (std::error_code EC = windows::UTF8ToUTF16(Name, U16Name))
+ return EC;
+
+ SmallVector<StringRef, 12> PathExts;
+ PathExts.push_back("");
+ PathExts.push_back(".exe"); // FIXME: This must be in %PATHEXT%.
+ SplitString(std::getenv("PATHEXT"), PathExts, ";");
+
+ SmallVector<wchar_t, MAX_PATH> U16Result;
+ DWORD Len = MAX_PATH;
+ for (StringRef Ext : PathExts) {
+ SmallVector<wchar_t, MAX_PATH> U16Ext;
+ if (std::error_code EC = windows::UTF8ToUTF16(Ext, U16Ext))
+ return EC;
+
+ do {
+ U16Result.reserve(Len);
+ Len = ::SearchPathW(Path, c_str(U16Name),
+ U16Ext.empty() ? nullptr : c_str(U16Ext),
+ U16Result.capacity(), U16Result.data(), nullptr);
+ } while (Len > U16Result.capacity());
+
+ if (Len != 0)
+ break; // Found it.
+ }
+
+ if (Len == 0)
+ return mapWindowsError(::GetLastError());
+
+ U16Result.set_size(Len);
+
+ SmallVector<char, MAX_PATH> U8Result;
+ if (std::error_code EC =
+ windows::UTF16ToUTF8(U16Result.data(), U16Result.size(), U8Result))
+ return EC;
+
+ return std::string(U8Result.begin(), U8Result.end());
}
static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
@@ -166,19 +194,7 @@ static unsigned int ArgLenWithQuotes(const char *Str) {
}
-static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
- const char **envp, const StringRef **redirects,
- unsigned memoryLimit, std::string *ErrMsg) {
- if (!sys::fs::can_execute(Program)) {
- if (ErrMsg)
- *ErrMsg = "program not executable";
- return false;
- }
-
- // Windows wants a command line, not an array of args, to pass to the new
- // process. We have to concatenate them all, while quoting the args that
- // have embedded spaces (or are empty).
-
+static std::unique_ptr<char[]> flattenArgs(const char **args) {
// First, determine the length of the command line.
unsigned len = 0;
for (unsigned i = 0; args[i]; i++) {
@@ -216,6 +232,22 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
}
*p = 0;
+ return command;
+}
+
+static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
+ const char **envp, const StringRef **redirects,
+ unsigned memoryLimit, std::string *ErrMsg) {
+ if (!sys::fs::can_execute(Program)) {
+ if (ErrMsg)
+ *ErrMsg = "program not executable";
+ return false;
+ }
+
+ // Windows wants a command line, not an array of args, to pass to the new
+ // process. We have to concatenate them all, while quoting the args that
+ // have embedded spaces (or are empty).
+ std::unique_ptr<char[]> command = flattenArgs(args);
// The pointer to the environment block for the new process.
std::vector<wchar_t> EnvBlock;
@@ -422,20 +454,64 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
return WaitResult;
}
- std::error_code sys::ChangeStdinToBinary(){
- int result = _setmode( _fileno(stdin), _O_BINARY );
+std::error_code sys::ChangeStdinToBinary() {
+ int result = _setmode(_fileno(stdin), _O_BINARY);
if (result == -1)
return std::error_code(errno, std::generic_category());
return std::error_code();
}
- std::error_code sys::ChangeStdoutToBinary(){
- int result = _setmode( _fileno(stdout), _O_BINARY );
+std::error_code sys::ChangeStdoutToBinary() {
+ int result = _setmode(_fileno(stdout), _O_BINARY);
if (result == -1)
return std::error_code(errno, std::generic_category());
return std::error_code();
}
+std::error_code
+llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
+ WindowsEncodingMethod Encoding) {
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::F_Text);
+ if (EC)
+ return EC;
+
+ if (Encoding == WEM_UTF8) {
+ OS << Contents;
+ } else if (Encoding == WEM_CurrentCodePage) {
+ SmallVector<wchar_t, 1> ArgsUTF16;
+ SmallVector<char, 1> ArgsCurCP;
+
+ if ((EC = windows::UTF8ToUTF16(Contents, ArgsUTF16)))
+ return EC;
+
+ if ((EC = windows::UTF16ToCurCP(
+ ArgsUTF16.data(), ArgsUTF16.size(), ArgsCurCP)))
+ return EC;
+
+ OS.write(ArgsCurCP.data(), ArgsCurCP.size());
+ } else if (Encoding == WEM_UTF16) {
+ SmallVector<wchar_t, 1> ArgsUTF16;
+
+ if ((EC = windows::UTF8ToUTF16(Contents, ArgsUTF16)))
+ return EC;
+
+ // Endianness guessing
+ char BOM[2];
+ uint16_t src = UNI_UTF16_BYTE_ORDER_MARK_NATIVE;
+ memcpy(BOM, &src, 2);
+ OS.write(BOM, 2);
+ OS.write((char *)ArgsUTF16.data(), ArgsUTF16.size() << 1);
+ } else {
+ llvm_unreachable("Unknown encoding");
+ }
+
+ if (OS.has_error())
+ return std::make_error_code(std::errc::io_error);
+
+ return EC;
+}
+
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
// The documented max length of the command line passed to CreateProcess.
static const size_t MaxCommandStringLength = 32768;
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
index 00d0e93..2d1d25f 100644
--- a/lib/Support/Windows/RWMutex.inc
+++ b/lib/Support/Windows/RWMutex.inc
@@ -84,12 +84,10 @@ RWMutexImpl::RWMutexImpl() {
}
RWMutexImpl::~RWMutexImpl() {
- if (sHasSRW) {
- // Nothing to do in the case of slim reader/writers
- } else {
+ if (!sHasSRW)
DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
- free(data_);
- }
+ // Nothing to do in the case of slim reader/writers except free the memory.
+ free(data_);
}
bool RWMutexImpl::reader_acquire() {
diff --git a/lib/Support/Windows/ThreadLocal.inc b/lib/Support/Windows/ThreadLocal.inc
index 3914cf7..14ce619 100644
--- a/lib/Support/Windows/ThreadLocal.inc
+++ b/lib/Support/Windows/ThreadLocal.inc
@@ -23,7 +23,7 @@ namespace llvm {
using namespace sys;
ThreadLocalImpl::ThreadLocalImpl() : data() {
- typedef int SIZE_TOO_BIG[sizeof(DWORD) <= sizeof(data) ? 1 : -1];
+ static_assert(sizeof(DWORD) <= sizeof(data), "size too big");
DWORD* tls = reinterpret_cast<DWORD*>(&data);
*tls = TlsAlloc();
assert(*tls != TLS_OUT_OF_INDEXES);
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index f68835b..6d9c5fb 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -166,6 +166,9 @@ namespace windows {
std::error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16);
std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
SmallVectorImpl<char> &utf8);
+/// Convert from UTF16 to the current code page used in the system
+std::error_code UTF16ToCurCP(const wchar_t *utf16, size_t utf16_len,
+ SmallVectorImpl<char> &utf8);
} // end namespace windows
} // end namespace sys
} // end namespace llvm.
diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc
index 379645d..cd56b13 100644
--- a/lib/Support/Windows/explicit_symbols.inc
+++ b/lib/Support/Windows/explicit_symbols.inc
@@ -63,4 +63,34 @@
/* msvcrt */
#if defined(_MSC_VER)
EXPLICIT_SYMBOL2(alloca, _alloca_probe)
+
+#ifdef _M_IX86
+#define INLINE_DEF_FLOAT_SYMBOL(SYM, ARGC) INLINE_DEF_SYMBOL##ARGC(float, SYM)
+ INLINE_DEF_FLOAT_SYMBOL(acosf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(asinf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(atanf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(atan2f, 2)
+ INLINE_DEF_FLOAT_SYMBOL(ceilf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(cosf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(coshf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(expf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(floorf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(fmodf, 2)
+ INLINE_DEF_FLOAT_SYMBOL(logf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(powf, 2)
+ INLINE_DEF_FLOAT_SYMBOL(sinf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(sinhf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(sqrtf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(tanf, 1)
+ INLINE_DEF_FLOAT_SYMBOL(tanhf, 1)
+
+ // These were added in VS 2013.
+#if (1800 <= _MSC_VER && _MSC_VER < 1900)
+ INLINE_DEF_FLOAT_SYMBOL(copysignf, 2)
+ INLINE_DEF_FLOAT_SYMBOL(fminf, 2)
+ INLINE_DEF_FLOAT_SYMBOL(fmaxf, 2)
+#endif
+#undef INLINE_DEF_FLOAT_SYMBOL
+#endif
+
#endif
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 3be02ee..4688ff1 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -259,8 +259,8 @@ namespace yaml {
/// @brief Scans YAML tokens from a MemoryBuffer.
class Scanner {
public:
- Scanner(const StringRef Input, SourceMgr &SM);
- Scanner(MemoryBuffer *Buffer, SourceMgr &SM_);
+ Scanner(StringRef Input, SourceMgr &SM);
+ Scanner(MemoryBufferRef Buffer, SourceMgr &SM_);
/// @brief Parse the next token and return it without popping it.
Token &peekNext();
@@ -294,6 +294,8 @@ public:
}
private:
+ void init(MemoryBufferRef Buffer);
+
StringRef currentInput() {
return StringRef(Current, End - Current);
}
@@ -469,7 +471,7 @@ private:
SourceMgr &SM;
/// @brief The original input.
- MemoryBuffer *InputBuffer;
+ MemoryBufferRef InputBuffer;
/// @brief The current position of the scanner.
StringRef::iterator Current;
@@ -699,34 +701,28 @@ std::string yaml::escape(StringRef Input) {
return EscapedInput;
}
-Scanner::Scanner(StringRef Input, SourceMgr &sm)
- : SM(sm)
- , Indent(-1)
- , Column(0)
- , Line(0)
- , FlowLevel(0)
- , IsStartOfStream(true)
- , IsSimpleKeyAllowed(true)
- , Failed(false) {
- InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
- SM.AddNewSourceBuffer(InputBuffer, SMLoc());
- Current = InputBuffer->getBufferStart();
- End = InputBuffer->getBufferEnd();
-}
-
-Scanner::Scanner(MemoryBuffer *Buffer, SourceMgr &SM_)
- : SM(SM_)
- , InputBuffer(Buffer)
- , Current(InputBuffer->getBufferStart())
- , End(InputBuffer->getBufferEnd())
- , Indent(-1)
- , Column(0)
- , Line(0)
- , FlowLevel(0)
- , IsStartOfStream(true)
- , IsSimpleKeyAllowed(true)
- , Failed(false) {
- SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+Scanner::Scanner(StringRef Input, SourceMgr &sm) : SM(sm) {
+ init(MemoryBufferRef(Input, "YAML"));
+}
+
+Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_) : SM(SM_) {
+ init(Buffer);
+}
+
+void Scanner::init(MemoryBufferRef Buffer) {
+ InputBuffer = Buffer;
+ Current = InputBuffer.getBufferStart();
+ End = InputBuffer.getBufferEnd();
+ Indent = -1;
+ Column = 0;
+ Line = 0;
+ FlowLevel = 0;
+ IsStartOfStream = true;
+ IsSimpleKeyAllowed = true;
+ Failed = false;
+ std::unique_ptr<MemoryBuffer> InputBufferOwner =
+ MemoryBuffer::getMemBuffer(Buffer);
+ SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
}
Token &Scanner::peekNext() {
@@ -1524,7 +1520,7 @@ bool Scanner::fetchMoreTokens() {
Stream::Stream(StringRef Input, SourceMgr &SM)
: scanner(new Scanner(Input, SM)), CurrentDoc() {}
-Stream::Stream(MemoryBuffer *InputBuffer, SourceMgr &SM)
+Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM)
: scanner(new Scanner(InputBuffer, SM)), CurrentDoc() {}
Stream::~Stream() {}
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 5212624..81edca2 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -63,6 +63,8 @@ std::error_code Input::error() { return EC; }
void Input::HNode::anchor() {}
void Input::EmptyHNode::anchor() {}
void Input::ScalarHNode::anchor() {}
+void Input::MapHNode::anchor() {}
+void Input::SequenceHNode::anchor() {}
bool Input::outputting() {
return false;
@@ -82,7 +84,7 @@ bool Input::setCurrentDocument() {
++DocIterator;
return setCurrentDocument();
}
- TopNode.reset(this->createHNodes(N));
+ TopNode = this->createHNodes(N);
CurrentNode = TopNode.get();
return true;
}
@@ -133,7 +135,7 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
return false;
}
MN->ValidKeys.push_back(Key);
- HNode *Value = MN->Mapping[Key];
+ HNode *Value = MN->Mapping[Key].get();
if (!Value) {
if (Required)
setError(CurrentNode, Twine("missing required key '") + Key + "'");
@@ -159,7 +161,7 @@ void Input::endMapping() {
return;
for (const auto &NN : MN->Mapping) {
if (!MN->isValidKey(NN.first())) {
- setError(NN.second, Twine("unknown key '") + NN.first() + "'");
+ setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'");
break;
}
}
@@ -180,7 +182,7 @@ bool Input::preflightElement(unsigned Index, void *&SaveInfo) {
return false;
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
SaveInfo = CurrentNode;
- CurrentNode = SQ->Entries[Index];
+ CurrentNode = SQ->Entries[Index].get();
return true;
}
return false;
@@ -202,7 +204,7 @@ bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) {
return false;
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
SaveInfo = CurrentNode;
- CurrentNode = SQ->Entries[index];
+ CurrentNode = SQ->Entries[index].get();
return true;
}
return false;
@@ -253,8 +255,8 @@ bool Input::bitSetMatch(const char *Str, bool) {
return false;
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
unsigned Index = 0;
- for (HNode *N : SQ->Entries) {
- if (ScalarHNode *SN = dyn_cast<ScalarHNode>(N)) {
+ for (auto &N : SQ->Entries) {
+ if (ScalarHNode *SN = dyn_cast<ScalarHNode>(N.get())) {
if (SN->value().equals(Str)) {
BitValuesUsed[Index] = true;
return true;
@@ -277,7 +279,7 @@ void Input::endBitSetScalar() {
assert(BitValuesUsed.size() == SQ->Entries.size());
for (unsigned i = 0; i < SQ->Entries.size(); ++i) {
if (!BitValuesUsed[i]) {
- setError(SQ->Entries[i], "unknown bit value");
+ setError(SQ->Entries[i].get(), "unknown bit value");
return;
}
}
@@ -302,7 +304,7 @@ void Input::setError(Node *node, const Twine &message) {
EC = make_error_code(errc::invalid_argument);
}
-Input::HNode *Input::createHNodes(Node *N) {
+std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
SmallString<128> StringStorage;
if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) {
StringRef KeyStr = SN->getValue(StringStorage);
@@ -313,20 +315,25 @@ Input::HNode *Input::createHNodes(Node *N) {
memcpy(Buf, &StringStorage[0], Len);
KeyStr = StringRef(Buf, Len);
}
- return new ScalarHNode(N, KeyStr);
+ return llvm::make_unique<ScalarHNode>(N, KeyStr);
} else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
- SequenceHNode *SQHNode = new SequenceHNode(N);
+ auto SQHNode = llvm::make_unique<SequenceHNode>(N);
for (Node &SN : *SQ) {
- HNode *Entry = this->createHNodes(&SN);
+ auto Entry = this->createHNodes(&SN);
if (EC)
break;
- SQHNode->Entries.push_back(Entry);
+ SQHNode->Entries.push_back(std::move(Entry));
}
- return SQHNode;
+ return std::move(SQHNode);
} else if (MappingNode *Map = dyn_cast<MappingNode>(N)) {
- MapHNode *mapHNode = new MapHNode(N);
+ auto mapHNode = llvm::make_unique<MapHNode>(N);
for (KeyValueNode &KVN : *Map) {
- ScalarNode *KeyScalar = dyn_cast<ScalarNode>(KVN.getKey());
+ Node *KeyNode = KVN.getKey();
+ ScalarNode *KeyScalar = dyn_cast<ScalarNode>(KeyNode);
+ if (!KeyScalar) {
+ setError(KeyNode, "Map key must be a scalar");
+ break;
+ }
StringStorage.clear();
StringRef KeyStr = KeyScalar->getValue(StringStorage);
if (!StringStorage.empty()) {
@@ -336,14 +343,14 @@ Input::HNode *Input::createHNodes(Node *N) {
memcpy(Buf, &StringStorage[0], Len);
KeyStr = StringRef(Buf, Len);
}
- HNode *ValueHNode = this->createHNodes(KVN.getValue());
+ auto ValueHNode = this->createHNodes(KVN.getValue());
if (EC)
break;
- mapHNode->Mapping[KeyStr] = ValueHNode;
+ mapHNode->Mapping[KeyStr] = std::move(ValueHNode);
}
- return mapHNode;
+ return std::move(mapHNode);
} else if (isa<NullNode>(N)) {
- return new EmptyHNode(N);
+ return llvm::make_unique<EmptyHNode>(N);
} else {
setError(N, "unknown node kind");
return nullptr;
@@ -366,18 +373,6 @@ bool Input::canElideEmptySequence() {
return false;
}
-Input::MapHNode::~MapHNode() {
- for (auto &N : Mapping)
- delete N.second;
-}
-
-Input::SequenceHNode::~SequenceHNode() {
- for (HNode *N : Entries)
- delete N;
-}
-
-
-
//===----------------------------------------------------------------------===//
// Output
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index f7c213a..bbbbe4a 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -20,6 +20,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include <cctype>
@@ -394,6 +395,62 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
}
}
+raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
+ unsigned Len = FS.Str.size();
+ int PadAmount = FS.Width - Len;
+ if (FS.RightJustify && (PadAmount > 0))
+ this->indent(PadAmount);
+ this->operator<<(FS.Str);
+ if (!FS.RightJustify && (PadAmount > 0))
+ this->indent(PadAmount);
+ return *this;
+}
+
+raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
+ if (FN.Hex) {
+ unsigned Nibbles = (64 - countLeadingZeros(FN.HexValue)+3)/4;
+ unsigned Width = (FN.Width > Nibbles+2) ? FN.Width : Nibbles+2;
+
+ char NumberBuffer[20] = "0x0000000000000000";
+ char *EndPtr = NumberBuffer+Width;
+ char *CurPtr = EndPtr;
+ const char A = FN.Upper ? 'A' : 'a';
+ unsigned long long N = FN.HexValue;
+ while (N) {
+ uintptr_t x = N % 16;
+ *--CurPtr = (x < 10 ? '0' + x : A + x - 10);
+ N /= 16;
+ }
+
+ return write(NumberBuffer, Width);
+ } else {
+ // Zero is a special case.
+ if (FN.DecValue == 0) {
+ this->indent(FN.Width-1);
+ return *this << '0';
+ }
+ char NumberBuffer[32];
+ char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+ char *CurPtr = EndPtr;
+ bool Neg = (FN.DecValue < 0);
+ uint64_t N = Neg ? -static_cast<uint64_t>(FN.DecValue) : FN.DecValue;
+ while (N) {
+ *--CurPtr = '0' + char(N % 10);
+ N /= 10;
+ }
+ int Len = EndPtr - CurPtr;
+ int Pad = FN.Width - Len;
+ if (Neg)
+ --Pad;
+ if (Pad > 0)
+ this->indent(Pad);
+ if (Neg)
+ *this << '-';
+ return write(CurPtr, Len);
+ }
+}
+
+
/// indent - Insert 'NumSpaces' spaces.
raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
static const char Spaces[] = " "
@@ -426,20 +483,14 @@ void format_object_base::home() {
// raw_fd_ostream
//===----------------------------------------------------------------------===//
-/// raw_fd_ostream - Open the specified file for writing. If an error
-/// occurs, information about the error is put into ErrorInfo, and the
-/// stream should be immediately destroyed; the string will be empty
-/// if no error occurred.
-raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
sys::fs::OpenFlags Flags)
: Error(false), UseAtomicWrites(false), pos(0) {
- assert(Filename && "Filename is null");
- ErrorInfo.clear();
-
+ EC = std::error_code();
// Handle "-" as stdout. Note that when we do this, we consider ourself
// the owner of stdout. This means that we can do things like close the
// file descriptor when we're done and set the "binary" flag globally.
- if (Filename[0] == '-' && Filename[1] == 0) {
+ if (Filename == "-") {
FD = STDOUT_FILENO;
// If user requested binary then put stdout into binary mode if
// possible.
@@ -450,11 +501,9 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
return;
}
- std::error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags);
+ EC = sys::fs::openFileForWrite(Filename, FD, Flags);
if (EC) {
- ErrorInfo = "Error opening output file '" + std::string(Filename) + "': " +
- EC.message();
ShouldClose = false;
return;
}
@@ -487,12 +536,8 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
raw_fd_ostream::~raw_fd_ostream() {
if (FD >= 0) {
flush();
- if (ShouldClose)
- while (::close(FD) != 0)
- if (errno != EINTR) {
- error_detected();
- break;
- }
+ if (ShouldClose && sys::Process::SafelyCloseFileDescriptor(FD))
+ error_detected();
}
#ifdef __MINGW32__
@@ -566,11 +611,8 @@ void raw_fd_ostream::close() {
assert(ShouldClose);
ShouldClose = false;
flush();
- while (::close(FD) != 0)
- if (errno != EINTR) {
- error_detected();
- break;
- }
+ if (sys::Process::SafelyCloseFileDescriptor(FD))
+ error_detected();
FD = -1;
}
@@ -660,7 +702,7 @@ bool raw_fd_ostream::has_colors() const {
/// Use it like: outs() << "foo" << "bar";
raw_ostream &llvm::outs() {
// Set buffer settings to model stdout behavior.
- // Delete the file descriptor when the program exists, forcing error
+ // Delete the file descriptor when the program exits, forcing error
// detection. If you don't want this behavior, don't use outs().
static raw_fd_ostream S(STDOUT_FILENO, true);
return S;
@@ -729,24 +771,17 @@ void raw_svector_ostream::resync() {
}
void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
- // If we're writing bytes from the end of the buffer into the smallvector, we
- // don't need to copy the bytes, just commit the bytes because they are
- // already in the right place.
if (Ptr == OS.end()) {
- assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
- OS.set_size(OS.size() + Size);
+ // Grow the buffer to include the scratch area without copying.
+ size_t NewSize = OS.size() + Size;
+ assert(NewSize <= OS.capacity() && "Invalid write_impl() call!");
+ OS.set_size(NewSize);
} else {
- assert(GetNumBytesInBuffer() == 0 &&
- "Should be writing from buffer if some bytes in it");
- // Otherwise, do copy the bytes.
- OS.append(Ptr, Ptr+Size);
+ assert(!GetNumBytesInBuffer());
+ OS.append(Ptr, Ptr + Size);
}
- // Grow the vector if necessary.
- if (OS.capacity() - OS.size() < 64)
- OS.reserve(OS.capacity() * 2);
-
- // Update the buffer position.
+ OS.reserve(OS.size() + 64);
SetBuffer(OS.end(), OS.capacity() - OS.size());
}
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index e317fbf..2578cc2 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -56,11 +56,11 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
errs() << argv0 << ": the option -d must be used together with -o\n";
return 1;
}
- std::string Error;
- tool_output_file DepOut(DependFilename.c_str(), Error, sys::fs::F_Text);
- if (!Error.empty()) {
- errs() << argv0 << ": error opening " << DependFilename
- << ":" << Error << "\n";
+ std::error_code EC;
+ tool_output_file DepOut(DependFilename, EC, sys::fs::F_Text);
+ if (EC) {
+ errs() << argv0 << ": error opening " << DependFilename << ":"
+ << EC.message() << "\n";
return 1;
}
DepOut.os() << OutputFilename << ":";
@@ -88,10 +88,9 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
<< "': " << EC.message() << "\n";
return 1;
}
- MemoryBuffer *F = FileOrErr.get().release();
// Tell SrcMgr about this buffer, which is what TGParser will pick up.
- SrcMgr.AddNewSourceBuffer(F, SMLoc());
+ SrcMgr.AddNewSourceBuffer(std::move(*FileOrErr), SMLoc());
// Record the location of the include directory so that the lexer can find
// it later.
@@ -102,11 +101,11 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
if (Parser.ParseFile())
return 1;
- std::string Error;
- tool_output_file Out(OutputFilename.c_str(), Error, sys::fs::F_Text);
- if (!Error.empty()) {
- errs() << argv0 << ": error opening " << OutputFilename
- << ":" << Error << "\n";
+ std::error_code EC;
+ tool_output_file Out(OutputFilename, EC, sys::fs::F_Text);
+ if (EC) {
+ errs() << argv0 << ": error opening " << OutputFilename << ":"
+ << EC.message() << "\n";
return 1;
}
if (!DependFilename.empty()) {
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index f7843dc..34e3ab4 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -114,8 +114,21 @@ Init *BitRecTy::convertValue(IntInit *II) {
Init *BitRecTy::convertValue(TypedInit *VI) {
RecTy *Ty = VI->getType();
- if (isa<BitRecTy>(Ty) || isa<BitsRecTy>(Ty) || isa<IntRecTy>(Ty))
+ if (isa<BitRecTy>(Ty))
return VI; // Accept variable if it is already of bit type!
+ if (auto *BitsTy = dyn_cast<BitsRecTy>(Ty))
+ // Accept only bits<1> expression.
+ return BitsTy->getNumBits() == 1 ? VI : nullptr;
+ // Ternary !if can be converted to bit, but only if both sides are
+ // convertible to a bit.
+ if (TernOpInit *TOI = dyn_cast<TernOpInit>(VI)) {
+ if (TOI->getOpcode() != TernOpInit::TernaryOp::IF)
+ return nullptr;
+ if (!TOI->getMHS()->convertInitializerTo(BitRecTy::get()) ||
+ !TOI->getRHS()->convertInitializerTo(BitRecTy::get()))
+ return nullptr;
+ return TOI;
+ }
return nullptr;
}
@@ -952,17 +965,21 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
break;
}
case ADD:
+ case AND:
case SHL:
case SRA:
case SRL: {
- IntInit *LHSi = dyn_cast<IntInit>(LHS);
- IntInit *RHSi = dyn_cast<IntInit>(RHS);
+ IntInit *LHSi =
+ dyn_cast_or_null<IntInit>(LHS->convertInitializerTo(IntRecTy::get()));
+ IntInit *RHSi =
+ dyn_cast_or_null<IntInit>(RHS->convertInitializerTo(IntRecTy::get()));
if (LHSi && RHSi) {
int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
int64_t Result;
switch (getOpcode()) {
default: llvm_unreachable("Bad opcode!");
case ADD: Result = LHSv + RHSv; break;
+ case AND: Result = LHSv & RHSv; break;
case SHL: Result = LHSv << RHSv; break;
case SRA: Result = LHSv >> RHSv; break;
case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
@@ -989,6 +1006,7 @@ std::string BinOpInit::getAsString() const {
switch (Opc) {
case CONCAT: Result = "!con"; break;
case ADD: Result = "!add"; break;
+ case AND: Result = "!and"; break;
case SHL: Result = "!shl"; break;
case SRA: Result = "!sra"; break;
case SRL: Result = "!srl"; break;
@@ -1690,13 +1708,6 @@ const std::string &Record::getName() const {
}
void Record::setName(Init *NewName) {
- if (TrackedRecords.getDef(Name->getAsUnquotedString()) == this) {
- TrackedRecords.removeDef(Name->getAsUnquotedString());
- TrackedRecords.addDef(this);
- } else if (TrackedRecords.getClass(Name->getAsUnquotedString()) == this) {
- TrackedRecords.removeClass(Name->getAsUnquotedString());
- TrackedRecords.addClass(this);
- } // Otherwise this isn't yet registered.
Name = NewName;
checkName();
// DO NOT resolve record values to the name at this point because
@@ -1996,16 +2007,14 @@ void RecordKeeper::dump() const { errs() << *this; }
raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
OS << "------------- Classes -----------------\n";
- const std::map<std::string, Record*> &Classes = RK.getClasses();
- for (std::map<std::string, Record*>::const_iterator I = Classes.begin(),
- E = Classes.end(); I != E; ++I)
- OS << "class " << *I->second;
+ const auto &Classes = RK.getClasses();
+ for (const auto &C : Classes)
+ OS << "class " << *C.second;
OS << "------------- Defs -----------------\n";
- const std::map<std::string, Record*> &Defs = RK.getDefs();
- for (std::map<std::string, Record*>::const_iterator I = Defs.begin(),
- E = Defs.end(); I != E; ++I)
- OS << "def " << *I->second;
+ const auto &Defs = RK.getDefs();
+ for (const auto &D : Defs)
+ OS << "def " << *D.second;
return OS;
}
@@ -2020,10 +2029,9 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
PrintFatalError("ERROR: Couldn't find the `" + ClassName + "' class!\n");
std::vector<Record*> Defs;
- for (std::map<std::string, Record*>::const_iterator I = getDefs().begin(),
- E = getDefs().end(); I != E; ++I)
- if (I->second->isSubClassOf(Class))
- Defs.push_back(I->second);
+ for (const auto &D : getDefs())
+ if (D.second->isSubClassOf(Class))
+ Defs.push_back(D.second.get());
return Defs;
}
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index fc1d3ca..63b8584 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -411,7 +411,7 @@ tgtok::TokKind TGLexer::LexNumber() {
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "Invalid binary number");
CurIntVal = strtoll(NumStart, nullptr, 2);
- return tgtok::IntVal;
+ return tgtok::BinaryIntVal;
}
}
@@ -471,6 +471,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("tail", tgtok::XTail)
.Case("con", tgtok::XConcat)
.Case("add", tgtok::XADD)
+ .Case("and", tgtok::XAND)
.Case("shl", tgtok::XSHL)
.Case("sra", tgtok::XSRA)
.Case("srl", tgtok::XSRL)
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index a2c95ca..1f750fc 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TGLEXER_H
-#define TGLEXER_H
+#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
+#define LLVM_LIB_TABLEGEN_TGLEXER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
@@ -47,11 +47,15 @@ namespace tgtok {
MultiClass, String,
// !keywords.
- XConcat, XADD, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast, XSubst,
- XForEach, XHead, XTail, XEmpty, XIf, XEq,
+ XConcat, XADD, XAND, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
+ XSubst, XForEach, XHead, XTail, XEmpty, XIf, XEq,
// Integer value.
IntVal,
+
+ // Binary constant. Note that these are sized according to the number of
+ // bits given.
+ BinaryIntVal,
// String valued tokens.
Id, StrVal, VarName, CodeFragment
@@ -105,6 +109,11 @@ public:
assert(CurCode == tgtok::IntVal && "This token isn't an integer");
return CurIntVal;
}
+ std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
+ assert(CurCode == tgtok::BinaryIntVal &&
+ "This token isn't a binary integer");
+ return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
+ }
SMLoc getLoc() const;
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 0550692..4d4bbe9 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -135,11 +135,18 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
V = BitsInit::get(NewBits);
}
- if (RV->setValue(V))
+ if (RV->setValue(V)) {
+ std::string InitType = "";
+ if (BitsInit *BI = dyn_cast<BitsInit>(V)) {
+ InitType = (Twine("' of type bit initializer with length ") +
+ Twine(BI->getNumBits())).str();
+ }
return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' of type '"
+ RV->getType()->getAsString() +
"' is incompatible with initializer '" + V->getAsString()
+ + InitType
+ "'");
+ }
return false;
}
@@ -225,14 +232,14 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
i != iend;
++i) {
// Clone the def and add it to the current multiclass
- Record *NewDef = new Record(**i);
+ auto NewDef = make_unique<Record>(**i);
// Add all of the values in the superclass into the current def.
for (unsigned i = 0, e = MCVals.size(); i != e; ++i)
- if (AddValue(NewDef, SubMultiClass.RefRange.Start, MCVals[i]))
+ if (AddValue(NewDef.get(), SubMultiClass.RefRange.Start, MCVals[i]))
return true;
- CurMC->DefPrototypes.push_back(NewDef);
+ CurMC->DefPrototypes.push_back(NewDef.release());
}
const std::vector<Init *> &SMCTArgs = SMC->Rec.getTemplateArgs();
@@ -341,6 +348,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue);
if (!IVal) {
Error(Loc, "foreach iterator value is untyped");
+ delete IterRec;
return true;
}
@@ -349,6 +357,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
if (SetValue(IterRec, Loc, IterVar->getName(),
std::vector<unsigned>(), IVal)) {
Error(Loc, "when instantiating this def");
+ delete IterRec;
return true;
}
@@ -365,6 +374,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
IterRec->setName(GetNewAnonymousName());
else {
Error(Loc, "def already exists: " + IterRec->getNameInitAsString());
+ delete IterRec;
return true;
}
}
@@ -904,6 +914,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XConcat:
case tgtok::XADD:
+ case tgtok::XAND:
case tgtok::XSRA:
case tgtok::XSRL:
case tgtok::XSHL:
@@ -921,6 +932,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
default: llvm_unreachable("Unhandled code!");
case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break;
case tgtok::XADD: Code = BinOpInit::ADD; Type = IntRecTy::get(); break;
+ case tgtok::XAND: Code = BinOpInit::AND; Type = IntRecTy::get(); break;
case tgtok::XSRA: Code = BinOpInit::SRA; Type = IntRecTy::get(); break;
case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break;
case tgtok::XSHL: Code = BinOpInit::SHL; Type = IntRecTy::get(); break;
@@ -1173,6 +1185,15 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
Lex.Lex(); // Skip '#'.
return ParseSimpleValue(CurRec, ItemType, Mode);
case tgtok::IntVal: R = IntInit::get(Lex.getCurIntVal()); Lex.Lex(); break;
+ case tgtok::BinaryIntVal: {
+ auto BinaryVal = Lex.getCurBinaryIntVal();
+ SmallVector<Init*, 16> Bits(BinaryVal.second);
+ for (unsigned i = 0, e = BinaryVal.second; i != e; ++i)
+ Bits[i] = BitInit::get(BinaryVal.first & (1LL << i));
+ R = BitsInit::get(Bits);
+ Lex.Lex();
+ break;
+ }
case tgtok::StrVal: {
std::string Val = Lex.getCurStrVal();
Lex.Lex();
@@ -1233,12 +1254,17 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
SCRef.Rec = Class;
SCRef.TemplateArgs = ValueList;
// Add info about the subclass to NewRec.
- if (AddSubClass(NewRec, SCRef))
+ if (AddSubClass(NewRec, SCRef)) {
+ delete NewRec;
return nullptr;
+ }
if (!CurMultiClass) {
NewRec->resolveReferences();
Records.addDef(NewRec);
} else {
+ // This needs to get resolved once the multiclass template arguments are
+ // known before any use.
+ NewRec->setResolveFirst(true);
// Otherwise, we're inside a multiclass, add it to the multiclass.
CurMultiClass->DefPrototypes.push_back(NewRec);
@@ -1284,17 +1310,40 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
}
Lex.Lex(); // eat the '}'
- SmallVector<Init *, 16> NewBits(Vals.size());
+ SmallVector<Init *, 16> NewBits;
+ // As we parse { a, b, ... }, 'a' is the highest bit, but we parse it
+ // first. We'll first read everything in to a vector, then we can reverse
+ // it to get the bits in the correct order for the BitsInit value.
for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+ // FIXME: The following two loops would not be duplicated
+ // if the API was a little more orthogonal.
+
+ // bits<n> values are allowed to initialize n bits.
+ if (BitsInit *BI = dyn_cast<BitsInit>(Vals[i])) {
+ for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
+ NewBits.push_back(BI->getBit((e - i) - 1));
+ continue;
+ }
+ // bits<n> can also come from variable initializers.
+ if (VarInit *VI = dyn_cast<VarInit>(Vals[i])) {
+ if (BitsRecTy *BitsRec = dyn_cast<BitsRecTy>(VI->getType())) {
+ for (unsigned i = 0, e = BitsRec->getNumBits(); i != e; ++i)
+ NewBits.push_back(VI->getBit((e - i) - 1));
+ continue;
+ }
+ // Fallthrough to try convert this to a bit.
+ }
+ // All other values must be convertible to just a single bit.
Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get());
if (!Bit) {
Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+
") is not convertable to a bit");
return nullptr;
}
- NewBits[Vals.size()-i-1] = Bit;
+ NewBits.push_back(Bit);
}
+ std::reverse(NewBits.begin(), NewBits.end());
return BitsInit::get(NewBits);
}
case tgtok::l_square: { // Value ::= '[' ValueList ']'
@@ -1439,6 +1488,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XCast: // Value ::= !unop '(' Value ')'
case tgtok::XConcat:
case tgtok::XADD:
+ case tgtok::XAND:
case tgtok::XSRA:
case tgtok::XSRL:
case tgtok::XSHL:
@@ -1727,7 +1777,10 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
Init *Val = ParseValue(CurRec, Type);
if (!Val ||
SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
- return nullptr;
+ // Return the name, even if an error is thrown. This is so that we can
+ // continue to make some progress, even without the value having been
+ // initialized.
+ return DeclName;
}
return DeclName;
@@ -1984,6 +2037,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
// Parse ObjectName and make a record for it.
Record *CurRec;
+ bool CurRecOwnershipTransferred = false;
Init *Name = ParseObjectName(CurMultiClass);
if (Name)
CurRec = new Record(Name, DefLoc, Records);
@@ -1998,9 +2052,11 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
if (Records.getDef(CurRec->getNameInitAsString())) {
Error(DefLoc, "def '" + CurRec->getNameInitAsString()
+ "' already defined");
+ delete CurRec;
return true;
}
Records.addDef(CurRec);
+ CurRecOwnershipTransferred = true;
if (ParseObjectBody(CurRec))
return true;
@@ -2010,8 +2066,10 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
// before this object, instantiated prior to defs derived from this object,
// and this available for indirect name resolution when defs derived from
// this object are instantiated.
- if (ParseObjectBody(CurRec))
+ if (ParseObjectBody(CurRec)) {
+ delete CurRec;
return true;
+ }
// Otherwise, a def inside a multiclass, add it to the multiclass.
for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i)
@@ -2019,11 +2077,15 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
== CurRec->getNameInit()) {
Error(DefLoc, "def '" + CurRec->getNameInitAsString() +
"' already defined in this multiclass!");
+ delete CurRec;
return true;
}
CurMultiClass->DefPrototypes.push_back(CurRec);
- } else if (ParseObjectBody(CurRec))
+ CurRecOwnershipTransferred = true;
+ } else if (ParseObjectBody(CurRec)) {
+ delete CurRec;
return true;
+ }
if (!CurMultiClass) // Def's in multiclasses aren't really defs.
// See Record::setName(). This resolve step will see any new name
@@ -2049,9 +2111,13 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
if (ProcessForeachDefs(CurRec, DefLoc)) {
Error(DefLoc,
"Could not process loops for def" + CurRec->getNameInitAsString());
+ if (!CurRecOwnershipTransferred)
+ delete CurRec;
return true;
}
+ if (!CurRecOwnershipTransferred)
+ delete CurRec;
return false;
}
@@ -2196,7 +2262,7 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
// Add this entry to the let stack.
std::vector<LetRecord> LetInfo = ParseLetList();
if (LetInfo.empty()) return true;
- LetStack.push_back(LetInfo);
+ LetStack.push_back(std::move(LetInfo));
if (Lex.getCode() != tgtok::In)
return TokError("expected 'in' at end of top-level 'let'");
@@ -2365,6 +2431,7 @@ InstantiateMulticlassDef(MultiClass &MC,
Error(DefmPrefixRange.Start, "Could not resolve "
+ CurRec->getNameInitAsString() + ":NAME to '"
+ DefmPrefix->getAsUnquotedString() + "'");
+ delete CurRec;
return nullptr;
}
@@ -2396,6 +2463,7 @@ InstantiateMulticlassDef(MultiClass &MC,
Error(DefmPrefixRange.Start, "def '" + CurRec->getNameInitAsString() +
"' already defined, instantiating defm with subdef '" +
DefProto->getNameInitAsString() + "'");
+ delete CurRec;
return nullptr;
}
@@ -2535,6 +2603,12 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmLoc))
return Error(SubClassLoc, "could not instantiate def");
+ // Defs that can be used by other definitions should be fully resolved
+ // before any use.
+ if (DefProto->isResolveFirst() && !CurMultiClass) {
+ CurRec->resolveReferences();
+ CurRec->setResolveFirst(false);
+ }
NewRecDefs.push_back(CurRec);
}
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index 9f4b7e9..79994cb 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TGPARSER_H
-#define TGPARSER_H
+#ifndef LLVM_LIB_TABLEGEN_TGPARSER_H
+#define LLVM_LIB_TABLEGEN_TGPARSER_H
#include "TGLexer.h"
#include "llvm/ADT/Twine.h"
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index 7b52e55..e96d18b 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -12,13 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_AArch64_H
-#define TARGET_AArch64_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64_H
-#include "Utils/AArch64BaseInfo.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -36,6 +36,7 @@ FunctionPass *createAArch64StorePairSuppressPass();
FunctionPass *createAArch64ExpandPseudoPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
ModulePass *createAArch64PromoteConstantPass();
+FunctionPass *createAArch64ConditionOptimizerPass();
FunctionPass *createAArch64AddressTypePromotionPass();
FunctionPass *createAArch64A57FPLoadBalancing();
FunctionPass *createAArch64A53Fix835769();
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 7742fea..2503764 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -102,7 +102,9 @@ namespace {
/// A "color", which is either even or odd. Yes, these aren't really colors
/// but the algorithm is conceptually doing two-color graph coloring.
enum class Color { Even, Odd };
+#ifndef NDEBUG
static const char *ColorNames[2] = { "Even", "Odd" };
+#endif
class Chain;
@@ -350,7 +352,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
for (auto I = EC.begin(), E = EC.end(); I != E; ++I) {
std::vector<Chain*> Cs(EC.member_begin(I), EC.member_end());
if (Cs.empty()) continue;
- V.push_back(Cs);
+ V.push_back(std::move(Cs));
}
// Now we have a set of sets, order them by start address so
@@ -375,7 +377,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
int Parity = 0;
for (auto &I : V)
- Changed |= colorChainSet(I, MBB, Parity);
+ Changed |= colorChainSet(std::move(I), MBB, Parity);
return Changed;
}
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index ab2c4b7..287989f 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -19,7 +19,7 @@
// a = add nsw i64 f, 3
// e = getelementptr ..., i64 a
//
-// This is legal to do so if the computations are markers with either nsw or nuw
+// This is legal to do if the computations are marked with either nsw or nuw
// markers.
// Moreover, the current heuristic is simple: it does not create new sext
// operations, i.e., it gives up when a sext would have forked (e.g., if
@@ -223,7 +223,7 @@ AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
}
// Input:
-// - SExtInsts contains all the sext instructions that are use direclty in
+// - SExtInsts contains all the sext instructions that are used directly in
// GetElementPtrInst, i.e., access to memory.
// Algorithm:
// - For each sext operation in SExtInsts:
@@ -353,7 +353,7 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
// If the use is already of the right type, connect its uses to its argument
// and delete it.
- // This can happen for an Instruction which all uses are sign extended.
+ // This can happen for an Instruction all uses of which are sign extended.
if (!ToRemove.count(SExt) &&
SExt->getType() == SExt->getOperand(0)->getType()) {
DEBUG(dbgs() << "Sign extension is useless, attach its use to "
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 734fb21..5afe0f4 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -36,9 +36,10 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -166,6 +167,12 @@ static int getTransformOpcode(unsigned Opc) {
return AArch64::ADDv1i64;
case AArch64::SUBXrr:
return AArch64::SUBv1i64;
+ case AArch64::ANDXrr:
+ return AArch64::ANDv8i8;
+ case AArch64::EORXrr:
+ return AArch64::EORv8i8;
+ case AArch64::ORRXrr:
+ return AArch64::ORRv8i8;
}
// No AdvSIMD equivalent, so just return the original opcode.
return Opc;
@@ -371,7 +378,8 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
const TargetMachine &TM = mf.getTarget();
MRI = &mf.getRegInfo();
- TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
+ TII = static_cast<const AArch64InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index cd94e24..8bee4f5 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64MachineFunctionInfo.h"
#include "AArch64MCInstLower.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
@@ -23,8 +23,8 @@
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -54,7 +54,7 @@ public:
AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
- MCInstLowering(OutContext, *Mang, *this), SM(*this), AArch64FI(nullptr),
+ MCInstLowering(OutContext, *this), SM(*this), AArch64FI(nullptr),
LOHLabelCounter(0) {}
const char *getPassName() const override {
@@ -145,7 +145,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
@@ -252,8 +252,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterClass *RC,
bool isVector, raw_ostream &O) {
assert(MO.isReg() && "Should only get here with a register!");
- const AArch64RegisterInfo *RI =
- static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
+ const AArch64RegisterInfo *RI = static_cast<const AArch64RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
assert(RI->regsOverlap(RegToPrint, Reg));
@@ -518,7 +518,5 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
extern "C" void LLVMInitializeAArch64AsmPrinter() {
RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
-
- RegisterAsmPrinter<AArch64AsmPrinter> Z(TheARM64leTarget);
- RegisterAsmPrinter<AArch64AsmPrinter> W(TheARM64beTarget);
+ RegisterAsmPrinter<AArch64AsmPrinter> Z(TheARM64Target);
}
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 484e7e8..e2b6367 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -12,15 +12,16 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-branch-relax"
@@ -136,7 +137,7 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
if (NextBB == MBB->getParent()->end())
return false;
- for (MachineBasicBlock *S : MBB->successors())
+ for (MachineBasicBlock *S : MBB->successors())
if (S == NextBB)
return true;
@@ -475,7 +476,9 @@ bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
- TII = (const AArch64InstrInfo *)MF->getTarget().getInstrInfo();
+ TII = (const AArch64InstrInfo *)MF->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 8e8bd3d..9e707e4 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
/// CCIfBigEndian - Match only if we're in big endian mode.
class CCIfBigEndian<CCAction A> :
- CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>;
+ CCIf<"State.getMachineFunction().getSubtarget().getDataLayout()->isBigEndian()", A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
@@ -54,22 +54,24 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
- CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>,
+ CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>,
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
- CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToStack<16, 16>>
]>;
@@ -88,14 +90,16 @@ def RetCC_AArch64_AAPCS : CallingConv<[
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
]>;
@@ -129,23 +133,26 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
+ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
- CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>,
+ CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
- CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
+ CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
]>;
def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
@@ -154,13 +161,15 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// Handle all scalar types as either i64 or f64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
- CCIfType<[f32], CCPromoteToType<f64>>,
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
// Everything is on the stack.
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
- CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
]>;
// The WebKit_JS calling convention only passes the first argument (the callee)
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 4d23dc5..aab8e38 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -94,7 +94,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineFunction *MF = I->getParent()->getParent();
const AArch64TargetMachine *TM =
static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getInstrInfo();
+ const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
// code sequence assumes the address will be.
@@ -114,7 +114,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineFunction *MF = I->getParent()->getParent();
const AArch64TargetMachine *TM =
static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getInstrInfo();
+ const AArch64InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 6b1f096..87b545b 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -101,25 +101,26 @@
#include "AArch64.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-collect-loh"
@@ -194,12 +195,14 @@ typedef SetVector<const MachineInstr *> SetOfMachineInstr;
/// Map a basic block to a set of instructions per register.
/// This is used to represent the exposed uses of a basic block
/// per register.
-typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *>
+typedef MapVector<const MachineBasicBlock *,
+ std::unique_ptr<SetOfMachineInstr[]>>
BlockToSetOfInstrsPerColor;
/// Map a basic block to an instruction per register.
/// This is used to represent the live-out definitions of a basic block
/// per register.
-typedef MapVector<const MachineBasicBlock *, const MachineInstr **>
+typedef MapVector<const MachineBasicBlock *,
+ std::unique_ptr<const MachineInstr *[]>>
BlockToInstrPerColor;
/// Map an instruction to a set of instructions. Used to represent the
/// mapping def to reachable uses or use to definitions.
@@ -236,9 +239,9 @@ static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
SetOfMachineInstr *result;
BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
if (it != sets.end())
- result = it->second;
+ result = it->second.get();
else
- result = sets[&MBB] = new SetOfMachineInstr[nbRegs];
+ result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
return result[reg];
}
@@ -283,14 +286,14 @@ static void initReachingDef(MachineFunction &MF,
const MapRegToId &RegToId,
const MachineInstr *DummyOp, bool ADRPMode) {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
unsigned NbReg = RegToId.size();
for (MachineBasicBlock &MBB : MF) {
- const MachineInstr **&BBGen = Gen[&MBB];
- BBGen = new const MachineInstr *[NbReg];
- memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg);
+ auto &BBGen = Gen[&MBB];
+ BBGen = make_unique<const MachineInstr *[]>(NbReg);
+ std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
BitVector &BBKillSet = Kill[&MBB];
BBKillSet.resize(NbReg);
@@ -421,22 +424,6 @@ static void reachingDefAlgorithm(MachineFunction &MF,
} while (HasChanged);
}
-/// Release all memory dynamically allocated during the reaching
-/// definition algorithm.
-static void finitReachingDef(BlockToSetOfInstrsPerColor &In,
- BlockToSetOfInstrsPerColor &Out,
- BlockToInstrPerColor &Gen,
- BlockToSetOfInstrsPerColor &ReachableUses) {
- for (auto &IT : Out)
- delete[] IT.second;
- for (auto &IT : In)
- delete[] IT.second;
- for (auto &IT : ReachableUses)
- delete[] IT.second;
- for (auto &IT : Gen)
- delete[] IT.second;
-}
-
/// Reaching definition algorithm.
/// \param MF function on which the algorithm will operate.
/// \param[out] ColorOpToReachedUses will contain the result of the reaching
@@ -473,9 +460,6 @@ static void reachingDef(MachineFunction &MF,
if (!DummyOp)
reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
ReachableUses, RegToId.size());
-
- // finit.
- finitReachingDef(In, Out, Gen, ReachableUses);
}
#ifndef NDEBUG
@@ -1043,7 +1027,7 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
MapRegToId RegToId;
@@ -1059,8 +1043,8 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *DummyOp = nullptr;
if (BasicBlockScopeOnly) {
- const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
+ const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
// For local analysis, create a dummy operation to record uses that are not
// local.
DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
new file mode 100644
index 0000000..0fbd3c6
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -0,0 +1,422 @@
+//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to make consecutive compares of values use same operands to
+// allow CSE pass to remove duplicated instructions. For this it analyzes
+// branches and adjusts comparisons with immediate values by converting:
+// * GE -> GT
+// * GT -> GE
+// * LT -> LE
+// * LE -> LT
+// and adjusting immediate values appropriately. It basically corrects two
+// immediate values towards each other to make them equal.
+//
+// Consider the following example in C:
+//
+// if ((a < 5 && ...) || (a > 5 && ...)) {
+// ~~~~~ ~~~~~
+// ^ ^
+// x y
+//
+// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates
+// to "false", "y" can just check flags set by the first comparison. As a
+// result of the canonicalization employed by
+// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
+// code, assembly ends up in the form that is not CSE friendly:
+//
+// ...
+// cmp w8, #4
+// b.gt .LBB0_3
+// ...
+// .LBB0_3:
+// cmp w8, #6
+// b.lt .LBB0_6
+// ...
+//
+// Same assembly after the pass:
+//
+// ...
+// cmp w8, #5
+// b.ge .LBB0_3
+// ...
+// .LBB0_3:
+// cmp w8, #5 // <-- CSE pass removes this instruction
+// b.le .LBB0_6
+// ...
+//
+// Currently only SUBS and ADDS followed by b.?? are supported.
+//
+// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
+// TODO: handle other conditional instructions (e.g. CSET)
+// TODO: allow second branching to be anything if it doesn't require adjusting
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cstdlib>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-condopt"
+
+STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
+
+namespace {
+class AArch64ConditionOptimizer : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DomTree;
+
+public:
+ // Stores immediate, compare instruction opcode and branch condition (in this
+ // order) of adjusted comparison.
+ typedef std::tuple<int, int, AArch64CC::CondCode> CmpInfo;
+
+ static char ID;
+ AArch64ConditionOptimizer() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);
+ CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
+ void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);
+ bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
+ int ToImm);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override {
+ return "AArch64 Condition Optimizer";
+ }
+};
+} // end anonymous namespace
+
+char AArch64ConditionOptimizer::ID = 0;
+
+namespace llvm {
+void initializeAArch64ConditionOptimizerPass(PassRegistry &);
+}
+
+INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",
+ "AArch64 CondOpt Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",
+ "AArch64 CondOpt Pass", false, false)
+
+FunctionPass *llvm::createAArch64ConditionOptimizerPass() {
+ return new AArch64ConditionOptimizer();
+}
+
+void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+// Finds compare instruction that corresponds to supported types of branching.
+// Returns the instruction or nullptr on failures or detecting unsupported
+// instructions.
+MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator I = MBB->getFirstTerminator();
+ if (I == MBB->end())
+ return nullptr;
+
+ if (I->getOpcode() != AArch64::Bcc)
+ return nullptr;
+
+ // Now find the instruction controlling the terminator.
+ for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
+ --I;
+ assert(!I->isTerminator() && "Spurious terminator");
+ switch (I->getOpcode()) {
+ // cmp is an alias for subs with a dead destination register.
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ // cmn is an alias for adds with a dead destination register.
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ if (I->getOperand(0).isDead())
+ return I;
+
+ DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
+ return nullptr;
+
+ // Prevent false positive case like:
+ // cmp w19, #0
+ // cinc w0, w19, gt
+ // ...
+ // fcmp d8, #0.0
+ // b.gt .LBB0_5
+ case AArch64::FCMPDri:
+ case AArch64::FCMPSri:
+ case AArch64::FCMPESri:
+ case AArch64::FCMPEDri:
+
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::FCMPSrr:
+ case AArch64::FCMPDrr:
+ case AArch64::FCMPESrr:
+ case AArch64::FCMPEDrr:
+ // Skip comparison instructions without immediate operands.
+ return nullptr;
+ }
+ }
+ DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
+ return nullptr;
+}
+
+// Changes opcode adds <-> subs considering register operand width.
+static int getComplementOpc(int Opc) {
+ switch (Opc) {
+ case AArch64::ADDSWri: return AArch64::SUBSWri;
+ case AArch64::ADDSXri: return AArch64::SUBSXri;
+ case AArch64::SUBSWri: return AArch64::ADDSWri;
+ case AArch64::SUBSXri: return AArch64::ADDSXri;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+// Changes form of comparison inclusive <-> exclusive.
+static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
+ switch (Cmp) {
+ case AArch64CC::GT: return AArch64CC::GE;
+ case AArch64CC::GE: return AArch64CC::GT;
+ case AArch64CC::LT: return AArch64CC::LE;
+ case AArch64CC::LE: return AArch64CC::LT;
+ default:
+ llvm_unreachable("Unexpected condition code");
+ }
+}
+
+// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison
+// operator and condition code.
+AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
+ MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
+ int Opc = CmpMI->getOpcode();
+
+ // CMN (compare with negative immediate) is an alias to ADDS (as
+ // "operand - negative" == "operand + positive")
+ bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
+
+ int Correction = (Cmp == AArch64CC::GT) ? 1 : -1;
+ // Negate Correction value for comparison with negative immediate (CMN).
+ if (Negative) {
+ Correction = -Correction;
+ }
+
+ const int OldImm = (int)CmpMI->getOperand(2).getImm();
+ const int NewImm = std::abs(OldImm + Correction);
+
+ // Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
+ // adjusting compare instruction opcode.
+ if (OldImm == 0 && ((Negative && Correction == 1) ||
+ (!Negative && Correction == -1))) {
+ Opc = getComplementOpc(Opc);
+ }
+
+ return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));
+}
+
+// Applies changes to comparison instruction suggested by adjustCmp().
+void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
+ const CmpInfo &Info) {
+ int Imm;
+ int Opc;
+ AArch64CC::CondCode Cmp;
+ std::tie(Imm, Opc, Cmp) = Info;
+
+ MachineBasicBlock *const MBB = CmpMI->getParent();
+
+ // Change immediate in comparison instruction (ADDS or SUBS).
+ BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
+ .addOperand(CmpMI->getOperand(0))
+ .addOperand(CmpMI->getOperand(1))
+ .addImm(Imm)
+ .addOperand(CmpMI->getOperand(3));
+ CmpMI->eraseFromParent();
+
+ // The fact that this comparison was picked ensures that it's related to the
+ // first terminator instruction.
+ MachineInstr *BrMI = MBB->getFirstTerminator();
+
+ // Change condition in branch instruction.
+ BuildMI(*MBB, BrMI, BrMI->getDebugLoc(), TII->get(AArch64::Bcc))
+ .addImm(Cmp)
+ .addOperand(BrMI->getOperand(1));
+ BrMI->eraseFromParent();
+
+ MBB->updateTerminator();
+
+ ++NumConditionsAdjusted;
+}
+
+// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
+// corresponding to TBB.
+// Returns true if parsing was successful, otherwise false is returned.
+static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
+ // A normal br.cond simply has the condition code.
+ if (Cond[0].getImm() != -1) {
+ assert(Cond.size() == 1 && "Unknown Cond array format");
+ CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
+ return true;
+ }
+ return false;
+}
+
+// Adjusts one cmp instruction to another one if result of adjustment will allow
+// CSE. Returns true if compare instruction was changed, otherwise false is
+// returned.
+bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
+ AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm)
+{
+ CmpInfo Info = adjustCmp(CmpMI, Cmp);
+ if (std::get<0>(Info) == ToImm && std::get<1>(Info) == To->getOpcode()) {
+ modifyCmp(CmpMI, Info);
+ return true;
+ }
+ return false;
+}
+
+bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TII = MF.getTarget().getSubtargetImpl()->getInstrInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+
+ bool Changed = false;
+
+ // Visit blocks in dominator tree pre-order. The pre-order enables multiple
+ // cmp-conversions from the same head block.
+ // Note that updateDomTree() modifies the children of the DomTree node
+ // currently being visited. The df_iterator supports that; it doesn't look at
+ // child_begin() / child_end() until after a node has been visited.
+ for (MachineDomTreeNode *I : depth_first(DomTree)) {
+ MachineBasicBlock *HBB = I->getBlock();
+
+ SmallVector<MachineOperand, 4> HeadCond;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ if (TII->AnalyzeBranch(*HBB, TBB, FBB, HeadCond)) {
+ continue;
+ }
+
+ // Equivalence check is to skip loops.
+ if (!TBB || TBB == HBB) {
+ continue;
+ }
+
+ SmallVector<MachineOperand, 4> TrueCond;
+ MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
+ if (TII->AnalyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
+ continue;
+ }
+
+ MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
+ if (!HeadCmpMI) {
+ continue;
+ }
+
+ MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
+ if (!TrueCmpMI) {
+ continue;
+ }
+
+ AArch64CC::CondCode HeadCmp;
+ if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
+ continue;
+ }
+
+ AArch64CC::CondCode TrueCmp;
+ if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
+ continue;
+ }
+
+ const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
+ const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
+
+ DEBUG(dbgs() << "Head branch:\n");
+ DEBUG(dbgs() << "\tcondition: "
+ << AArch64CC::getCondCodeName(HeadCmp) << '\n');
+ DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
+
+ DEBUG(dbgs() << "True branch:\n");
+ DEBUG(dbgs() << "\tcondition: "
+ << AArch64CC::getCondCodeName(TrueCmp) << '\n');
+ DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
+
+ if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
+ (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
+ std::abs(TrueImm - HeadImm) == 2) {
+ // This branch transforms machine instructions that correspond to
+ //
+ // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
+ // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
+ //
+ // into
+ //
+ // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
+ // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
+
+ CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
+ CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
+ if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
+ std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
+ modifyCmp(HeadCmpMI, HeadCmpInfo);
+ modifyCmp(TrueCmpMI, TrueCmpInfo);
+ Changed = true;
+ }
+ } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
+ (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
+ std::abs(TrueImm - HeadImm) == 1) {
+ // This branch transforms machine instructions that correspond to
+ //
+ // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
+ // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
+ //
+ // into
+ //
+ // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
+ // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
+
+ // GT -> GE transformation increases immediate value, so picking the
+ // smaller one; LT -> LE decreases immediate value so invert the choice.
+ bool adjustHeadCond = (HeadImm < TrueImm);
+ if (HeadCmp == AArch64CC::LT) {
+ adjustHeadCond = !adjustHeadCond;
+ }
+
+ if (adjustHeadCond) {
+ Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
+ } else {
+ Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
+ }
+ }
+ // Other transformation cases almost never occur due to generation of < or >
+ // comparisons instead of <= and >=.
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 452cdec..54f53dc 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -191,8 +191,8 @@ public:
/// runOnMachineFunction - Initialize per-function data structures.
void runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
}
@@ -723,7 +723,7 @@ namespace {
class AArch64ConditionalCompares : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MCSchedModel *SchedModel;
+ MCSchedModel SchedModel;
// Does the proceeded function has Oz attribute.
bool MinSize;
MachineRegisterInfo *MRI;
@@ -845,7 +845,7 @@ bool AArch64ConditionalCompares::shouldConvert() {
// the cost of a misprediction.
//
// Set a limit on the delay we will accept.
- unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4;
+ unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
// Instruction depths can be computed for all trace instructions above CmpBB.
unsigned HeadDepth =
@@ -891,8 +891,8 @@ bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
SchedModel =
MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
MRI = &MF.getRegInfo();
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index a2d853c..74fc167 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -14,11 +14,12 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-dead-defs"
@@ -36,11 +37,11 @@ public:
static char ID; // Pass identification, replacement for typeid.
explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &F) override;
+ bool runOnMachineFunction(MachineFunction &F) override;
const char *getPassName() const override { return "Dead register definitions"; }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -119,7 +120,7 @@ bool AArch64DeadRegisterDefinitions::processMachineBasicBlock(
// Scan the function for instructions that have a dead definition of a
// register. Replace that register with the zero register when possible.
bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
bool Changed = false;
DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index a76fd76..c850680 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/MathExtras.h"
@@ -634,19 +635,6 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return true;
}
- case AArch64::FCVTSHpseudo: {
- MachineOperand Src = MI.getOperand(1);
- Src.setImplicit();
- unsigned SrcH =
- TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub);
- auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr))
- .addOperand(MI.getOperand(0))
- .addReg(SrcH, RegState::Undef)
- .addOperand(Src);
- transferImpOps(MI, MIB, MIB);
- MI.eraseFromParent();
- return true;
- }
case AArch64::LOADgot: {
// Expand into ADRP + LDR.
unsigned DstReg = MI.getOperand(0).getReg();
@@ -735,7 +723,7 @@ bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
}
bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
bool Modified = false;
for (auto &MBB : MF)
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 2164d77..612cb00 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -14,9 +14,10 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64TargetMachine.h"
#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -39,8 +40,7 @@ using namespace llvm;
namespace {
-class AArch64FastISel : public FastISel {
-
+class AArch64FastISel final : public FastISel {
class Address {
public:
typedef enum {
@@ -50,16 +50,23 @@ class AArch64FastISel : public FastISel {
private:
BaseKind Kind;
+ AArch64_AM::ShiftExtendType ExtType;
union {
unsigned Reg;
int FI;
} Base;
+ unsigned OffsetReg;
+ unsigned Shift;
int64_t Offset;
+ const GlobalValue *GV;
public:
- Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
+ Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
+ OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
+ void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
+ AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
@@ -70,6 +77,12 @@ class AArch64FastISel : public FastISel {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+ void setOffsetReg(unsigned Reg) {
+ OffsetReg = Reg;
+ }
+ unsigned getOffsetReg() const {
+ return OffsetReg;
+ }
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
@@ -80,8 +93,11 @@ class AArch64FastISel : public FastISel {
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
+ void setShift(unsigned S) { Shift = S; }
+ unsigned getShift() { return Shift; }
- bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
+ void setGlobalValue(const GlobalValue *G) { GV = G; }
+ const GlobalValue *getGlobalValue() { return GV; }
};
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
@@ -89,74 +105,152 @@ class AArch64FastISel : public FastISel {
const AArch64Subtarget *Subtarget;
LLVMContext *Context;
+ bool fastLowerArguments() override;
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+ bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
+
private:
// Selection routines.
- bool SelectLoad(const Instruction *I);
- bool SelectStore(const Instruction *I);
- bool SelectBranch(const Instruction *I);
- bool SelectIndirectBr(const Instruction *I);
- bool SelectCmp(const Instruction *I);
- bool SelectSelect(const Instruction *I);
- bool SelectFPExt(const Instruction *I);
- bool SelectFPTrunc(const Instruction *I);
- bool SelectFPToInt(const Instruction *I, bool Signed);
- bool SelectIntToFP(const Instruction *I, bool Signed);
- bool SelectRem(const Instruction *I, unsigned ISDOpcode);
- bool SelectCall(const Instruction *I, const char *IntrMemName);
- bool SelectIntrinsicCall(const IntrinsicInst &I);
- bool SelectRet(const Instruction *I);
- bool SelectTrunc(const Instruction *I);
- bool SelectIntExt(const Instruction *I);
- bool SelectMul(const Instruction *I);
+ bool selectAddSub(const Instruction *I);
+ bool selectLogicalOp(const Instruction *I);
+ bool selectLoad(const Instruction *I);
+ bool selectStore(const Instruction *I);
+ bool selectBranch(const Instruction *I);
+ bool selectIndirectBr(const Instruction *I);
+ bool selectCmp(const Instruction *I);
+ bool selectSelect(const Instruction *I);
+ bool selectFPExt(const Instruction *I);
+ bool selectFPTrunc(const Instruction *I);
+ bool selectFPToInt(const Instruction *I, bool Signed);
+ bool selectIntToFP(const Instruction *I, bool Signed);
+ bool selectRem(const Instruction *I, unsigned ISDOpcode);
+ bool selectRet(const Instruction *I);
+ bool selectTrunc(const Instruction *I);
+ bool selectIntExt(const Instruction *I);
+ bool selectMul(const Instruction *I);
+ bool selectShift(const Instruction *I);
+ bool selectBitCast(const Instruction *I);
+ bool selectFRem(const Instruction *I);
+ bool selectSDiv(const Instruction *I);
+ bool selectGetElementPtr(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
- bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
- bool ComputeAddress(const Value *Obj, Address &Addr);
- bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
- bool UseUnscaled);
- void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled);
- bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
- bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+ bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
+ bool isValueAvailable(const Value *V) const;
+ bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
+ bool computeCallAddress(const Value *V, Address &Addr);
+ bool simplifyAddress(Address &Addr, MVT VT);
+ void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
+ unsigned Flags, unsigned ScaleFactor,
+ MachineMemOperand *MMO);
+ bool isMemCpySmall(uint64_t Len, unsigned Alignment);
+ bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
- // Emit functions.
- bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
- bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled = false);
- bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled = false);
- unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
- unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
+ bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
+ const Value *Cond);
+ bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
+ bool optimizeSelect(const SelectInst *SI);
+ std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
+
+ // Emit helper routines.
+ unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
+ const Value *RHS, bool SetFlags = false,
+ bool WantResult = true, bool IsZExt = false);
+ unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ bool SetFlags = false, bool WantResult = true);
+ unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
+ bool WantResult = true);
+ unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool SetFlags = false,
+ bool WantResult = true);
+ unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ExtType,
+ uint64_t ShiftImm, bool SetFlags = false,
+ bool WantResult = true);
- unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
- unsigned AArch64MaterializeGV(const GlobalValue *GV);
+ // Emit functions.
+ bool emitCompareAndBranch(const BranchInst *BI);
+ bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
+ bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
+ bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
+ bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
+ unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
+ MachineMemOperand *MMO = nullptr);
+ bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
+ MachineMemOperand *MMO = nullptr);
+ unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags = false, bool WantResult = true,
+ bool IsZExt = false);
+ unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
+ unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags = false, bool WantResult = true,
+ bool IsZExt = false);
+ unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
+ unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
+ bool WantResult = true);
+ unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
+ const Value *RHS);
+ unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, uint64_t Imm);
+ unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ uint64_t ShiftImm);
+ unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
+ unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = true);
+ unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = true);
+ unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = false);
+
+ unsigned materializeInt(const ConstantInt *CI, MVT VT);
+ unsigned materializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned materializeGV(const GlobalValue *GV);
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
- bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
- SmallVectorImpl<unsigned> &ArgRegs,
- SmallVectorImpl<MVT> &ArgVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
+ bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
- bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
+ bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
public:
// Backend specific FastISel code.
- unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
- explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo)
- : FastISel(funcInfo, libInfo) {
+ explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo)
+ : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
- Context = &funcInfo.Fn->getContext();
+ Context = &FuncInfo.Fn->getContext();
}
- bool TargetSelectInstruction(const Instruction *I) override;
+ bool fastSelectInstruction(const Instruction *I) override;
#include "AArch64GenFastISel.inc"
};
@@ -165,13 +259,52 @@ public:
#include "AArch64GenCallingConv.inc"
+/// \brief Check if the sign-/zero-extend will be a noop.
+static bool isIntExtFree(const Instruction *I) {
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
+ "Unexpected value type.");
+ bool IsZExt = isa<ZExtInst>(I);
+
+ if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (LI->hasOneUse())
+ return true;
+
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
+ if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
+ return true;
+
+ return false;
+}
+
+/// \brief Determine the implicit scale factor that is applied by a memory
+/// operation for a given value type.
+static unsigned getImplicitScaleFactor(MVT VT) {
+ switch (VT.SimpleTy) {
+ default:
+ return 0; // invalid
+ case MVT::i1: // fall-through
+ case MVT::i8:
+ return 1;
+ case MVT::i16:
+ return 2;
+ case MVT::i32: // fall-through
+ case MVT::f32:
+ return 4;
+ case MVT::i64: // fall-through
+ case MVT::f64:
+ return 8;
+ }
+}
+
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
return CC_AArch64_WebKit_JS;
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
}
-unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
@@ -183,7 +316,7 @@ unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
@@ -195,29 +328,42 @@ unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
return 0;
}
-unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
+unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
+ if (VT > MVT::i64)
+ return 0;
+
+ if (!CI->isZero())
+ return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+
+ // Create a copy from the zero register to materialize a "0" value.
+ const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
+ : &AArch64::GPR32RegClass;
+ unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(ZeroReg, getKillRegState(true));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
+ // Positive zero (+0.0) has to be materialized with a fmov from the zero
+ // register, because the immediate version of fmov cannot encode zero.
+ if (CFP->isNullValue())
+ return fastMaterializeFloatZero(CFP);
+
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
const APFloat Val = CFP->getValueAPF();
- bool is64bit = (VT == MVT::f64);
-
+ bool Is64Bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- int Imm;
- unsigned Opc;
- if (is64bit) {
- Imm = AArch64_AM::getFP64Imm(Val);
- Opc = AArch64::FMOVDi;
- } else {
- Imm = AArch64_AM::getFP32Imm(Val);
- Opc = AArch64::FMOVSi;
- }
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(Imm);
- return ResultReg;
+ int Imm =
+ Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
+ assert((Imm != -1) && "Cannot encode floating-point constant.");
+ unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
+ return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
// Materialize via constant pool. MachineConstantPool wants an explicit
@@ -226,20 +372,20 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
if (Align == 0)
Align = DL.getTypeAllocSize(CFP->getType());
- unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
+ ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
- unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
+ unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
- .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
-unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
+unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// We can't handle thread-local variables quickly yet.
if (GV->isThreadLocal())
return 0;
@@ -262,30 +408,34 @@ unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
+ // We can't handle addresses loaded from a constant pool quickly yet.
+ return 0;
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
+ ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
- .addImm(0);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+ .addImm(0);
}
return ResultReg;
}
-unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -293,17 +443,48 @@ unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
MVT VT = CEVT.getSimpleVT();
- // FIXME: Handle ConstantInt.
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return AArch64MaterializeFP(CFP, VT);
+ if (const auto *CI = dyn_cast<ConstantInt>(C))
+ return materializeInt(CI, VT);
+ else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return materializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return AArch64MaterializeGV(GV);
+ return materializeGV(GV);
return 0;
}
+unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
+ assert(CFP->isNullValue() &&
+ "Floating-point constant is not a positive zero.");
+ MVT VT;
+ if (!isTypeLegal(CFP->getType(), VT))
+ return 0;
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return 0;
+
+ bool Is64Bit = (VT == MVT::f64);
+ unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
+ return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
+}
+
+/// \brief Check if the multiply is by a power-of-2 constant.
+static bool isMulPowOf2(const Value *I) {
+ if (const auto *MI = dyn_cast<MulOperator>(I)) {
+ if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
+ if (C->getValue().isPowerOf2())
+ return true;
+ if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
+ if (C->getValue().isPowerOf2())
+ return true;
+ }
+ return false;
+}
+
// Computes the address to get to an object.
-bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
+bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
+{
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -330,18 +511,18 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
break;
case Instruction::BitCast: {
// Look through bitcasts.
- return ComputeAddress(U->getOperand(0), Addr);
+ return computeAddress(U->getOperand(0), Addr, Ty);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr);
+ return computeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr);
+ return computeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::GetElementPtr: {
@@ -383,7 +564,7 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
- if (ComputeAddress(U->getOperand(0), Addr))
+ if (computeAddress(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
@@ -403,14 +584,301 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
}
break;
}
+ case Instruction::Add: {
+ // Adds of constants are common and easy enough.
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (isa<ConstantInt>(LHS))
+ std::swap(LHS, RHS);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
+ return computeAddress(LHS, Addr, Ty);
+ }
+
+ Address Backup = Addr;
+ if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
+ return true;
+ Addr = Backup;
+
+ break;
+ }
+ case Instruction::Sub: {
+ // Subs of constants are common and easy enough.
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
+ return computeAddress(LHS, Addr, Ty);
+ }
+ break;
+ }
+ case Instruction::Shl: {
+ if (Addr.getOffsetReg())
+ break;
+
+ const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
+ if (!CI)
+ break;
+
+ unsigned Val = CI->getZExtValue();
+ if (Val < 1 || Val > 3)
+ break;
+
+ uint64_t NumBytes = 0;
+ if (Ty && Ty->isSized()) {
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (NumBytes != (1ULL << Val))
+ break;
+
+ Addr.setShift(Val);
+ Addr.setExtendType(AArch64_AM::LSL);
+
+ const Value *Src = U->getOperand(0);
+ if (const auto *I = dyn_cast<Instruction>(Src))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ Src = I;
+
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
+
+ if (const auto *AI = dyn_cast<BinaryOperator>(Src))
+ if (AI->getOpcode() == Instruction::And) {
+ const Value *LHS = AI->getOperand(0);
+ const Value *RHS = AI->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue() == 0xffffffff)
+ std::swap(LHS, RHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 0xffffffff) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ unsigned Reg = getRegForValue(LHS);
+ if (!Reg)
+ return false;
+ bool RegIsKill = hasTrivialKill(LHS);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
+ AArch64::sub_32);
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ }
+
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ case Instruction::Mul: {
+ if (Addr.getOffsetReg())
+ break;
+
+ if (!isMulPowOf2(U))
+ break;
+
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ // Canonicalize power-of-2 value to the RHS.
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(LHS, RHS);
+
+ assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
+ const auto *C = cast<ConstantInt>(RHS);
+ unsigned Val = C->getValue().logBase2();
+ if (Val < 1 || Val > 3)
+ break;
+
+ uint64_t NumBytes = 0;
+ if (Ty && Ty->isSized()) {
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (NumBytes != (1ULL << Val))
+ break;
+
+ Addr.setShift(Val);
+ Addr.setExtendType(AArch64_AM::LSL);
+
+ const Value *Src = LHS;
+ if (const auto *I = dyn_cast<Instruction>(Src))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ Src = I;
+
+
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
+
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ case Instruction::And: {
+ if (Addr.getOffsetReg())
+ break;
+
+ if (DL.getTypeSizeInBits(Ty) != 8)
+ break;
+
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue() == 0xffffffff)
+ std::swap(LHS, RHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 0xffffffff) {
+ Addr.setShift(0);
+ Addr.setExtendType(AArch64_AM::LSL);
+ Addr.setExtendType(AArch64_AM::UXTW);
+
+ unsigned Reg = getRegForValue(LHS);
+ if (!Reg)
+ return false;
+ bool RegIsKill = hasTrivialKill(LHS);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
+ AArch64::sub_32);
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ break;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ if (!Addr.getReg() || Addr.getOffsetReg())
+ break;
+
+ const Value *Src = nullptr;
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
+
+ if (!Src)
+ break;
+
+ Addr.setShift(0);
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ } // end switch
+
+ if (Addr.isRegBase() && !Addr.getReg()) {
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setReg(Reg);
+ return true;
+ }
+
+ if (!Addr.getOffsetReg()) {
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ bool InMBB = true;
+
+ if (const auto *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
+ } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return computeCallAddress(U->getOperand(0), Addr);
+ break;
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Addr.setGlobalValue(GV);
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!Addr.getGlobalValue()) {
+ Addr.setReg(getRegForValue(V));
+ return Addr.getReg() != 0;
}
- // Try to get this in a register if nothing else has worked.
- if (!Addr.isValid())
- Addr.setReg(getRegForValue(Obj));
- return Addr.isValid();
+ return false;
}
+
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
@@ -428,62 +896,122 @@ bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
return TLI.isTypeLegal(VT);
}
-bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
+/// \brief Determine if the value type is supported by FastISel.
+///
+/// FastISel for AArch64 can handle more value types than are legal. This adds
+/// simple value type such as i1, i8, and i16.
+bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
+ if (Ty->isVectorTy() && !IsVectorAllowed)
+ return false;
+
if (isTypeLegal(Ty, VT))
return true;
// If this is a type than can be sign or zero-extended to a basic operation
- // go ahead and accept it now. For stores, this reflects truncation.
+ // go ahead and accept it now.
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
}
-bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
- int64_t ScaleFactor, bool UseUnscaled) {
- bool needsLowering = false;
- int64_t Offset = Addr.getOffset();
- switch (VT.SimpleTy) {
- default:
+bool AArch64FastISel::isValueAvailable(const Value *V) const {
+ if (!isa<Instruction>(V))
+ return true;
+
+ const auto *I = cast<Instruction>(V);
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ return true;
+
+ return false;
+}
+
+bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- case MVT::f32:
- case MVT::f64:
- if (!UseUnscaled)
- // Using scaled, 12-bit, unsigned immediate offsets.
- needsLowering = ((Offset & 0xfff) != Offset);
- else
- // Using unscaled, 9-bit, signed immediate offsets.
- needsLowering = (Offset > 256 || Offset < -256);
- break;
- }
- //If this is a stack pointer and the offset needs to be simplified then put
+ bool ImmediateOffsetNeedsLowering = false;
+ bool RegisterOffsetNeedsLowering = false;
+ int64_t Offset = Addr.getOffset();
+ if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
+ ImmediateOffsetNeedsLowering = true;
+ else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
+ !isUInt<12>(Offset / ScaleFactor))
+ ImmediateOffsetNeedsLowering = true;
+
+ // Cannot encode an offset register and an immediate offset in the same
+ // instruction. Fold the immediate offset into the load/store instruction and
+ // emit an additonal add to take care of the offset register.
+ if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
+ RegisterOffsetNeedsLowering = true;
+
+ // Cannot encode zero register as base.
+ if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
+ RegisterOffsetNeedsLowering = true;
+
+ // If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
- if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
- unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
+ {
+ unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
- .addFrameIndex(Addr.getFI())
- .addImm(0)
- .addImm(0);
+ .addFrameIndex(Addr.getFI())
+ .addImm(0)
+ .addImm(0);
Addr.setKind(Address::RegBase);
Addr.setReg(ResultReg);
}
+ if (RegisterOffsetNeedsLowering) {
+ unsigned ResultReg = 0;
+ if (Addr.getReg()) {
+ if (Addr.getExtendType() == AArch64_AM::SXTW ||
+ Addr.getExtendType() == AArch64_AM::UXTW )
+ ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+ /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+ /*TODO:IsKill=*/false, Addr.getExtendType(),
+ Addr.getShift());
+ else
+ ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+ /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+ /*TODO:IsKill=*/false, AArch64_AM::LSL,
+ Addr.getShift());
+ } else {
+ if (Addr.getExtendType() == AArch64_AM::UXTW)
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift(),
+ /*IsZExt=*/true);
+ else if (Addr.getExtendType() == AArch64_AM::SXTW)
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift(),
+ /*IsZExt=*/false);
+ else
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift());
+ }
+ if (!ResultReg)
+ return false;
+
+ Addr.setReg(ResultReg);
+ Addr.setOffsetReg(0);
+ Addr.setShift(0);
+ Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
+ }
+
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
- if (needsLowering) {
- uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
- unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
- UnscaledOffset, MVT::i64);
- if (ResultReg == 0)
+ if (ImmediateOffsetNeedsLowering) {
+ unsigned ResultReg;
+ if (Addr.getReg())
+ // Try to fold the immediate into the add instruction.
+ ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
+ else
+ ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
+
+ if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
@@ -491,222 +1019,1021 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
return true;
}
-void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
+void AArch64FastISel::addLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled) {
- int64_t Offset = Addr.getOffset();
+ unsigned Flags,
+ unsigned ScaleFactor,
+ MachineMemOperand *MMO) {
+ int64_t Offset = Addr.getOffset() / ScaleFactor;
// Frame base works a bit differently. Handle it separately.
- if (Addr.getKind() == Address::FrameIndexBase) {
+ if (Addr.isFIBase()) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
- MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
- MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
+ MIB.addFrameIndex(FI).addImm(Offset);
} else {
- // Now add the rest of the operands.
- MIB.addReg(Addr.getReg());
- MIB.addImm(Offset);
+ assert(Addr.isRegBase() && "Unexpected address kind.");
+ const MCInstrDesc &II = MIB->getDesc();
+ unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
+ Addr.setReg(
+ constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
+ Addr.setOffsetReg(
+ constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
+ if (Addr.getOffsetReg()) {
+ assert(Addr.getOffset() == 0 && "Unexpected offset");
+ bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTX;
+ MIB.addReg(Addr.getReg());
+ MIB.addReg(Addr.getOffsetReg());
+ MIB.addImm(IsSigned);
+ MIB.addImm(Addr.getShift() != 0);
+ } else
+ MIB.addReg(Addr.getReg()).addImm(Offset);
}
+
+ if (MMO)
+ MIB.addMemOperand(MMO);
}
-bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled) {
- // Negative offsets require unscaled, 9-bit, signed immediate offsets.
- // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- if (!UseUnscaled && Addr.getOffset() < 0)
- UseUnscaled = true;
+unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
+ const Value *RHS, bool SetFlags,
+ bool WantResult, bool IsZExt) {
+ AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
+ bool NeedExtend = false;
+ switch (RetVT.SimpleTy) {
+ default:
+ return 0;
+ case MVT::i1:
+ NeedExtend = true;
+ break;
+ case MVT::i8:
+ NeedExtend = true;
+ ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
+ break;
+ case MVT::i16:
+ NeedExtend = true;
+ ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
+ break;
+ case MVT::i32: // fall-through
+ case MVT::i64:
+ break;
+ }
+ MVT SrcVT = RetVT;
+ RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
+
+ // Canonicalize immediates to the RHS first.
+ if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize mul by power of 2 to the RHS.
+ if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
+ if (isMulPowOf2(LHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize shift immediate to the RHS.
+ if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
+ if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
+ if (isa<ConstantInt>(SI->getOperand(1)))
+ if (SI->getOpcode() == Instruction::Shl ||
+ SI->getOpcode() == Instruction::LShr ||
+ SI->getOpcode() == Instruction::AShr )
+ std::swap(LHS, RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return 0;
+ bool LHSIsKill = hasTrivialKill(LHS);
- unsigned Opc;
+ if (NeedExtend)
+ LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
+
+ unsigned ResultReg = 0;
+ if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+ uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
+ if (C->isNegative())
+ ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
+ SetFlags, WantResult);
+ else
+ ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
+ WantResult);
+ } else if (const auto *C = dyn_cast<Constant>(RHS))
+ if (C->isNullValue())
+ ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
+ WantResult);
+
+ if (ResultReg)
+ return ResultReg;
+
+ // Only extend the RHS within the instruction if there is a valid extend type.
+ if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
+ isValueAvailable(RHS)) {
+ if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
+ if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
+ if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
+ unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+ return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ExtendType, C->getZExtValue(),
+ SetFlags, WantResult);
+ }
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(RHS);
+ return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ ExtendType, 0, SetFlags, WantResult);
+ }
+
+ // Check if the mul can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (isMulPowOf2(RHS)) {
+ const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
+ const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(MulLHS, MulRHS);
+
+ assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
+ uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
+ unsigned RHSReg = getRegForValue(MulLHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(MulLHS);
+ return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
+ }
+
+ // Check if the shift can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
+ if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
+ switch (SI->getOpcode()) {
+ default: break;
+ case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
+ case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
+ case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
+ }
+ uint64_t ShiftVal = C->getZExtValue();
+ if (ShiftType != AArch64_AM::InvalidShiftExtend) {
+ unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+ return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftType, ShiftVal, SetFlags,
+ WantResult);
+ }
+ }
+ }
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (NeedExtend)
+ RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
+
+ return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ SetFlags, WantResult);
+}
+
+unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill, bool SetFlags,
+ bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWrr, AArch64::SUBXrr },
+ { AArch64::ADDWrr, AArch64::ADDXrr } },
+ { { AArch64::SUBSWrr, AArch64::SUBSXrr },
+ { AArch64::ADDSWrr, AArch64::ADDSXrr } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, uint64_t Imm,
+ bool SetFlags, bool WantResult) {
+ assert(LHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ unsigned ShiftImm;
+ if (isUInt<12>(Imm))
+ ShiftImm = 0;
+ else if ((Imm & 0xfff000) == Imm) {
+ ShiftImm = 12;
+ Imm >>= 12;
+ } else
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWri, AArch64::SUBXri },
+ { AArch64::ADDWri, AArch64::ADDXri } },
+ { { AArch64::SUBSWri, AArch64::SUBSXri },
+ { AArch64::ADDSWri, AArch64::ADDSXri } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
const TargetRegisterClass *RC;
- bool VTIsi1 = false;
- int64_t ScaleFactor = 0;
+ if (SetFlags)
+ RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ else
+ RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addImm(Imm)
+ .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool SetFlags,
+ bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWrs, AArch64::SUBXrs },
+ { AArch64::ADDWrs, AArch64::ADDXrs } },
+ { { AArch64::SUBSWrs, AArch64::SUBSXrs },
+ { AArch64::ADDSWrs, AArch64::ADDSXrs } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addImm(getShifterImm(ShiftType, ShiftImm));
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ExtType,
+ uint64_t ShiftImm, bool SetFlags,
+ bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { { AArch64::SUBWrx, AArch64::SUBXrx },
+ { AArch64::ADDWrx, AArch64::ADDXrx } },
+ { { AArch64::SUBSWrx, AArch64::SUBSXrx },
+ { AArch64::ADDSWrx, AArch64::ADDSXrx } }
+ };
+ bool Is64Bit = RetVT == MVT::i64;
+ unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
+ const TargetRegisterClass *RC = nullptr;
+ if (SetFlags)
+ RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ else
+ RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
+ unsigned ResultReg;
+ if (WantResult)
+ ResultReg = createResultReg(RC);
+ else
+ ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addImm(getArithExtendImm(ExtType, ShiftImm));
+ return ResultReg;
+}
+
+bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
+ Type *Ty = LHS->getType();
+ EVT EVT = TLI.getValueType(Ty, true);
+ if (!EVT.isSimple())
+ return false;
+ MVT VT = EVT.getSimpleVT();
+
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
- VTIsi1 = true;
- // Intentional fall-through.
case MVT::i8:
- Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ return emitICmp(VT, LHS, RHS, IsZExt);
+ case MVT::f32:
+ case MVT::f64:
+ return emitFCmp(VT, LHS, RHS);
+ }
+}
+
+bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool IsZExt) {
+ return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
+ IsZExt) != 0;
+}
+
+bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ uint64_t Imm) {
+ return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
+ /*SetFlags=*/true, /*WantResult=*/false) != 0;
+}
+
+bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
+ if (RetVT != MVT::f32 && RetVT != MVT::f64)
+ return false;
+
+ // Check to see if the 2nd operand is a constant that we can encode directly
+ // in the compare.
+ bool UseImm = false;
+ if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
+ if (CFP->isZero() && !CFP->isNegative())
+ UseImm = true;
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ if (UseImm) {
+ unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ return true;
+ }
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill));
+ return true;
+}
+
+unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags, bool WantResult, bool IsZExt) {
+ return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
+ IsZExt);
+}
+
+/// \brief This method is a wrapper to simplify add emission.
+///
+/// First try to emit an add with an immediate operand using emitAddSub_ri. If
+/// that fails, then try to materialize the immediate into a register and use
+/// emitAddSub_rr instead.
+unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
+ int64_t Imm) {
+ unsigned ResultReg;
+ if (Imm < 0)
+ ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
+ else
+ ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
+
+ if (ResultReg)
+ return ResultReg;
+
+ unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
+ if (!CReg)
+ return 0;
+
+ ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
+ bool SetFlags, bool WantResult, bool IsZExt) {
+ return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
+ IsZExt);
+}
+
+unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill, bool WantResult) {
+ return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, /*SetFlags=*/true, WantResult);
+}
+
+unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg,
+ bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool WantResult) {
+ return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
+ WantResult);
+}
+
+unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
+ const Value *LHS, const Value *RHS) {
+ // Canonicalize immediates to the RHS first.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize mul by power-of-2 to the RHS.
+ if (LHS->hasOneUse() && isValueAvailable(LHS))
+ if (isMulPowOf2(LHS))
+ std::swap(LHS, RHS);
+
+ // Canonicalize shift immediate to the RHS.
+ if (LHS->hasOneUse() && isValueAvailable(LHS))
+ if (const auto *SI = dyn_cast<ShlOperator>(LHS))
+ if (isa<ConstantInt>(SI->getOperand(1)))
+ std::swap(LHS, RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return 0;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned ResultReg = 0;
+ if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+ uint64_t Imm = C->getZExtValue();
+ ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
+ }
+ if (ResultReg)
+ return ResultReg;
+
+ // Check if the mul can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (isMulPowOf2(RHS)) {
+ const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
+ const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(MulLHS, MulRHS);
+
+ assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
+ uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
+
+ unsigned RHSReg = getRegForValue(MulLHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(MulLHS);
+ return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ }
+
+ // Check if the shift can be folded into the instruction.
+ if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (const auto *SI = dyn_cast<ShlOperator>(RHS))
+ if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ uint64_t ShiftVal = C->getZExtValue();
+ unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+ return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ }
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
+ ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
+ uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ }
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
+ unsigned LHSReg, bool LHSIsKill,
+ uint64_t Imm) {
+ assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
+ "ISD nodes are not consecutive!");
+ static const unsigned OpcTable[3][2] = {
+ { AArch64::ANDWri, AArch64::ANDXri },
+ { AArch64::ORRWri, AArch64::ORRXri },
+ { AArch64::EORWri, AArch64::EORXri }
+ };
+ const TargetRegisterClass *RC;
+ unsigned Opc;
+ unsigned RegSize;
+ switch (RetVT.SimpleTy) {
+ default:
+ return 0;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32: {
+ unsigned Idx = ISDOpc - ISD::AND;
+ Opc = OpcTable[Idx][0];
+ RC = &AArch64::GPR32spRegClass;
+ RegSize = 32;
+ break;
+ }
+ case MVT::i64:
+ Opc = OpcTable[ISDOpc - ISD::AND][1];
+ RC = &AArch64::GPR64spRegClass;
+ RegSize = 64;
+ break;
+ }
+
+ if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
+ return 0;
+
+ unsigned ResultReg =
+ fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
+ AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
+ if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
+ uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ }
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
+ unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill,
+ uint64_t ShiftImm) {
+ assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
+ "ISD nodes are not consecutive!");
+ static const unsigned OpcTable[3][2] = {
+ { AArch64::ANDWrs, AArch64::ANDXrs },
+ { AArch64::ORRWrs, AArch64::ORRXrs },
+ { AArch64::EORWrs, AArch64::EORXrs }
+ };
+ const TargetRegisterClass *RC;
+ unsigned Opc;
+ switch (RetVT.SimpleTy) {
+ default:
+ return 0;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ Opc = OpcTable[ISDOpc - ISD::AND][0];
RC = &AArch64::GPR32RegClass;
+ break;
+ case MVT::i64:
+ Opc = OpcTable[ISDOpc - ISD::AND][1];
+ RC = &AArch64::GPR64RegClass;
+ break;
+ }
+ unsigned ResultReg =
+ fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
+ if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
+ uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ }
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
+ uint64_t Imm) {
+ return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
+}
+
+unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
+ bool WantZExt, MachineMemOperand *MMO) {
+ // Simplify this down to something we can handle.
+ if (!simplifyAddress(Addr, VT))
+ return 0;
+
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
+ llvm_unreachable("Unexpected value type.");
+
+ // Negative offsets require unscaled, 9-bit, signed immediate offsets.
+ // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
+ bool UseScaled = true;
+ if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+ UseScaled = false;
ScaleFactor = 1;
+ }
+
+ static const unsigned GPOpcTable[2][8][4] = {
+ // Sign-extend.
+ { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
+ AArch64::LDURXi },
+ { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
+ AArch64::LDRXui },
+ { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
+ AArch64::LDRXroW },
+ { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
+ AArch64::LDRXroW }
+ },
+ // Zero-extend.
+ { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
+ AArch64::LDRXroW },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
+ AArch64::LDRXroW }
+ }
+ };
+
+ static const unsigned FPOpcTable[4][2] = {
+ { AArch64::LDURSi, AArch64::LDURDi },
+ { AArch64::LDRSui, AArch64::LDRDui },
+ { AArch64::LDRSroX, AArch64::LDRDroX },
+ { AArch64::LDRSroW, AArch64::LDRDroW }
+ };
+
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+ Addr.getOffsetReg();
+ unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+ if (Addr.getExtendType() == AArch64_AM::UXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTW)
+ Idx++;
+
+ bool IsRet64Bit = RetVT == MVT::i64;
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected value type.");
+ case MVT::i1: // Intentional fall-through.
+ case MVT::i8:
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i16:
- Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
- RC = &AArch64::GPR32RegClass;
- ScaleFactor = 2;
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i32:
- Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
- RC = &AArch64::GPR32RegClass;
- ScaleFactor = 4;
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
break;
case MVT::i64:
- Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
RC = &AArch64::GPR64RegClass;
- ScaleFactor = 8;
break;
case MVT::f32:
- Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
- RC = TLI.getRegClassFor(VT);
- ScaleFactor = 4;
+ Opc = FPOpcTable[Idx][0];
+ RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
- Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
- RC = TLI.getRegClassFor(VT);
- ScaleFactor = 8;
+ Opc = FPOpcTable[Idx][1];
+ RC = &AArch64::FPR64RegClass;
break;
}
- // Scale the offset.
- if (!UseUnscaled) {
- int64_t Offset = Addr.getOffset();
- if (Offset & (ScaleFactor - 1))
- // Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
-
- Addr.setOffset(Offset / ScaleFactor);
- }
-
- // Simplify this down to something we can handle.
- if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
- return false;
// Create the base instruction, then add the operands.
- ResultReg = createResultReg(RC);
+ unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
+ addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
// Loading an i1 requires special handling.
- if (VTIsi1) {
- MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(ResultReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ if (VT == MVT::i1) {
+ unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
+ assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
}
+
+ // For zero-extending loads to 64bit we emit a 32bit load and then convert
+ // the 32bit reg to a 64bit reg.
+ if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
+ unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), Reg64)
+ .addImm(0)
+ .addReg(ResultReg, getKillRegState(true))
+ .addImm(AArch64::sub_32);
+ ResultReg = Reg64;
+ }
+ return ResultReg;
+}
+
+bool AArch64FastISel::selectAddSub(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
+ return false;
+
+ if (VT.isVector())
+ return selectOperator(I, I->getOpcode());
+
+ unsigned ResultReg;
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ case Instruction::Add:
+ ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Sub:
+ ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
+ break;
+ }
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
+ return false;
+
+ if (VT.isVector())
+ return selectOperator(I, I->getOpcode());
+
+ unsigned ResultReg;
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ case Instruction::And:
+ ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Or:
+ ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Xor:
+ ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ }
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectLoad(const Instruction *I) {
+bool AArch64FastISel::selectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
- if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
+ cast<LoadInst>(I)->isAtomic())
return false;
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(0), Addr))
+ if (!computeAddress(I->getOperand(0), Addr, I->getType()))
return false;
- unsigned ResultReg;
- if (!EmitLoad(VT, ResultReg, Addr))
+ // Fold the following sign-/zero-extend into the load instruction.
+ bool WantZExt = true;
+ MVT RetVT = VT;
+ const Value *IntExtVal = nullptr;
+ if (I->hasOneUse()) {
+ if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
+ if (isTypeSupported(ZE->getType(), RetVT))
+ IntExtVal = ZE;
+ else
+ RetVT = VT;
+ } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
+ if (isTypeSupported(SE->getType(), RetVT))
+ IntExtVal = SE;
+ else
+ RetVT = VT;
+ WantZExt = false;
+ }
+ }
+
+ unsigned ResultReg =
+ emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
+ if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ // There are a few different cases we have to handle, because the load or the
+ // sign-/zero-extend might not be selected by FastISel if we fall-back to
+ // SelectionDAG. There is also an ordering issue when both instructions are in
+ // different basic blocks.
+ // 1.) The load instruction is selected by FastISel, but the integer extend
+ // not. This usually happens when the integer extend is in a different
+ // basic block and SelectionDAG took over for that basic block.
+ // 2.) The load instruction is selected before the integer extend. This only
+ // happens when the integer extend is in a different basic block.
+ // 3.) The load instruction is selected by SelectionDAG and the integer extend
+ // by FastISel. This happens if there are instructions between the load
+ // and the integer extend that couldn't be selected by FastISel.
+ if (IntExtVal) {
+ // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
+ // could select it. Emit a copy to subreg if necessary. FastISel will remove
+ // it when it selects the integer extend.
+ unsigned Reg = lookUpRegForValue(IntExtVal);
+ if (!Reg) {
+ if (RetVT == MVT::i64 && VT <= MVT::i32) {
+ if (WantZExt) {
+ // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
+ std::prev(FuncInfo.InsertPt)->eraseFromParent();
+ ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
+ } else
+ ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
+ /*IsKill=*/true,
+ AArch64::sub_32);
+ }
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // The integer extend has already been emitted - delete all the instructions
+ // that have been emitted by the integer extend lowering code and use the
+ // result from the load instruction directly.
+ while (Reg) {
+ auto *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI)
+ break;
+ Reg = 0;
+ for (auto &Opnd : MI->uses()) {
+ if (Opnd.isReg()) {
+ Reg = Opnd.getReg();
+ break;
+ }
+ }
+ MI->eraseFromParent();
+ }
+ updateValueMap(IntExtVal, ResultReg);
+ return true;
+ }
+
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled) {
+bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
+ MachineMemOperand *MMO) {
+ // Simplify this down to something we can handle.
+ if (!simplifyAddress(Addr, VT))
+ return false;
+
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
+ llvm_unreachable("Unexpected value type.");
+
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- if (!UseUnscaled && Addr.getOffset() < 0)
- UseUnscaled = true;
-
- unsigned StrOpc;
- bool VTIsi1 = false;
- int64_t ScaleFactor = 0;
- // Using scaled, 12-bit, unsigned immediate offsets.
- switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::i1:
- VTIsi1 = true;
- case MVT::i8:
- StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
+ bool UseScaled = true;
+ if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+ UseScaled = false;
ScaleFactor = 1;
- break;
- case MVT::i16:
- StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
- ScaleFactor = 2;
- break;
- case MVT::i32:
- StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
- ScaleFactor = 4;
- break;
- case MVT::i64:
- StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
- ScaleFactor = 8;
- break;
- case MVT::f32:
- StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
- ScaleFactor = 4;
- break;
- case MVT::f64:
- StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
- ScaleFactor = 8;
- break;
}
- // Scale the offset.
- if (!UseUnscaled) {
- int64_t Offset = Addr.getOffset();
- if (Offset & (ScaleFactor - 1))
- // Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
- Addr.setOffset(Offset / ScaleFactor);
- }
+ static const unsigned OpcTable[4][6] = {
+ { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
+ AArch64::STURSi, AArch64::STURDi },
+ { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
+ AArch64::STRSui, AArch64::STRDui },
+ { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
+ AArch64::STRSroX, AArch64::STRDroX },
+ { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
+ AArch64::STRSroW, AArch64::STRDroW }
+ };
- // Simplify this down to something we can handle.
- if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
- return false;
+ unsigned Opc;
+ bool VTIsi1 = false;
+ bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+ Addr.getOffsetReg();
+ unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+ if (Addr.getExtendType() == AArch64_AM::UXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTW)
+ Idx++;
+
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1: VTIsi1 = true;
+ case MVT::i8: Opc = OpcTable[Idx][0]; break;
+ case MVT::i16: Opc = OpcTable[Idx][1]; break;
+ case MVT::i32: Opc = OpcTable[Idx][2]; break;
+ case MVT::i64: Opc = OpcTable[Idx][3]; break;
+ case MVT::f32: Opc = OpcTable[Idx][4]; break;
+ case MVT::f64: Opc = OpcTable[Idx][5]; break;
+ }
// Storing an i1 requires special handling.
- if (VTIsi1) {
- MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(SrcReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ if (VTIsi1 && SrcReg != AArch64::WZR) {
+ unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
+ assert(ANDReg && "Unexpected AND instruction emission failure.");
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(StrOpc)).addReg(SrcReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
+ const MCInstrDesc &II = TII.get(Opc);
+ SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
+ addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
+
return true;
}
-bool AArch64FastISel::SelectStore(const Instruction *I) {
+bool AArch64FastISel::selectStore(const Instruction *I) {
MVT VT;
- Value *Op0 = I->getOperand(0);
+ const Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
- if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
+ if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
cast<StoreInst>(I)->isAtomic())
return false;
- // Get the value to be stored into a register.
- unsigned SrcReg = getRegForValue(Op0);
- if (SrcReg == 0)
+ // Get the value to be stored into a register. Use the zero register directly
+ // when possible to avoid an unnecessary copy and a wasted register.
+ unsigned SrcReg = 0;
+ if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
+ if (CI->isZero())
+ SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
+ if (CF->isZero() && !CF->isNegative()) {
+ VT = MVT::getIntegerVT(VT.getSizeInBits());
+ SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ }
+ }
+
+ if (!SrcReg)
+ SrcReg = getRegForValue(Op0);
+
+ if (!SrcReg)
return false;
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(1), Addr))
+ if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
return false;
- if (!EmitStore(VT, SrcReg, Addr))
+ if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
return false;
return true;
}
@@ -757,58 +2084,234 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
}
}
-bool AArch64FastISel::SelectBranch(const Instruction *I) {
+/// \brief Try to emit a combined compare-and-branch instruction.
+bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+ assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
+ const CmpInst *CI = cast<CmpInst>(BI->getCondition());
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ MVT VT;
+ if (!isTypeSupported(LHS->getType(), VT))
+ return false;
+
+ unsigned BW = VT.getSizeInBits();
+ if (BW > 64)
+ return false;
+
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Try to take advantage of fallthrough opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ int TestBit = -1;
+ bool IsCmpNE;
+ if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
+ if (const auto *C = dyn_cast<Constant>(LHS))
+ if (C->isNullValue())
+ std::swap(LHS, RHS);
+
+ if (!isa<Constant>(RHS))
+ return false;
+
+ if (!cast<Constant>(RHS)->isNullValue())
+ return false;
+
+ if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
+ if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
+ const Value *AndLHS = AI->getOperand(0);
+ const Value *AndRHS = AI->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(AndLHS, AndRHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
+ if (C->getValue().isPowerOf2()) {
+ TestBit = C->getValue().logBase2();
+ LHS = AndLHS;
+ }
+ }
+
+ if (VT == MVT::i1)
+ TestBit = 0;
+
+ IsCmpNE = Predicate == CmpInst::ICMP_NE;
+ } else if (Predicate == CmpInst::ICMP_SLT) {
+ if (!isa<Constant>(RHS))
+ return false;
+
+ if (!cast<Constant>(RHS)->isNullValue())
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = true;
+ } else if (Predicate == CmpInst::ICMP_SGT) {
+ if (!isa<ConstantInt>(RHS))
+ return false;
+
+ if (cast<ConstantInt>(RHS)->getValue() != -1)
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = false;
+ } else
+ return false;
+
+ static const unsigned OpcTable[2][2][2] = {
+ { {AArch64::CBZW, AArch64::CBZX },
+ {AArch64::CBNZW, AArch64::CBNZX} },
+ { {AArch64::TBZW, AArch64::TBZX },
+ {AArch64::TBNZW, AArch64::TBNZX} }
+ };
+
+ bool IsBitTest = TestBit != -1;
+ bool Is64Bit = BW == 64;
+ if (TestBit < 32 && TestBit >= 0)
+ Is64Bit = false;
+
+ unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
+ const MCInstrDesc &II = TII.get(Opc);
+
+ unsigned SrcReg = getRegForValue(LHS);
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(LHS);
+
+ if (BW == 64 && !Is64Bit)
+ SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+ AArch64::sub_32);
+
+ if ((BW < 32) && !IsBitTest)
+ SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
+
+ // Emit the combined compare and branch instruction.
+ SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ if (IsBitTest)
+ MIB.addImm(TestBit);
+ MIB.addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+ fastEmitBranch(FBB, DbgLoc);
+
+ return true;
+}
+
+bool AArch64FastISel::selectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
+ if (BI->isUnconditional()) {
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ fastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+ AArch64CC::CondCode CC = AArch64CC::NE;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
- // We may not handle every CC for now.
- AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
- if (CC == AArch64CC::AL)
- return false;
+ if (CI->hasOneUse() && isValueAvailable(CI)) {
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_FALSE:
+ fastEmitBranch(FBB, DbgLoc);
+ return true;
+ case CmpInst::FCMP_TRUE:
+ fastEmitBranch(TBB, DbgLoc);
+ return true;
+ }
+
+ // Try to emit a combined compare-and-branch first.
+ if (emitCompareAndBranch(BI))
+ return true;
+
+ // Try to take advantage of fallthrough opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
// Emit the cmp.
- if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
+ // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
+ // instruction.
+ CC = getCompareCC(Predicate);
+ AArch64CC::CondCode ExtraCC = AArch64CC::AL;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_UEQ:
+ ExtraCC = AArch64CC::EQ;
+ CC = AArch64CC::VS;
+ break;
+ case CmpInst::FCMP_ONE:
+ ExtraCC = AArch64CC::MI;
+ CC = AArch64CC::GT;
+ break;
+ }
+ assert((CC != AArch64CC::AL) && "Unexpected condition code.");
+
+ // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
+ if (ExtraCC != AArch64CC::AL) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(ExtraCC)
+ .addMBB(TBB);
+ }
+
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
- FastEmitBranch(FBB, DbgLoc);
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
MVT SrcVT;
- if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
- (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
+ if (TI->hasOneUse() && isValueAvailable(TI) &&
+ isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
unsigned CondReg = getRegForValue(TI->getOperand(0));
- if (CondReg == 0)
+ if (!CondReg)
return false;
+ bool CondIsKill = hasTrivialKill(TI->getOperand(0));
// Issue an extract_subreg to get the lower 32-bits.
- if (SrcVT == MVT::i64)
- CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
+ if (SrcVT == MVT::i64) {
+ CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
AArch64::sub_32);
+ CondIsKill = true;
+ }
- MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::ANDWri), ANDReg)
- .addReg(CondReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBSWri))
- .addReg(ANDReg)
- .addReg(ANDReg)
- .addImm(0)
- .addImm(0);
+ unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
+ assert(ANDReg && "Unexpected AND instruction emission failure.");
+ emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
@@ -816,23 +2319,57 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
- FastEmitBranch(FBB, DbgLoc);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
- } else if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(BI->getCondition())) {
+ } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
- FuncInfo.MBB->addSuccessor(Target);
+
+ // Obtain the branch weight and add the target to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ Target->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+ return true;
+ } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (!CondReg)
+ return false;
+
+ // Emit the branch.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(CC)
+ .addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
unsigned CondReg = getRegForValue(BI->getCondition());
if (CondReg == 0)
return false;
+ bool CondRegIsKill = hasTrivialKill(BI->getCondition());
// We've been divorced from our compare! Our block was split, and
// now our compare lives in a predecessor block. We musn't
@@ -841,13 +2378,8 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) {
// Regardless, the compare has been done in the predecessor block,
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
- AArch64::WZR)
- .addReg(CondReg)
- .addImm(0)
- .addImm(0);
+ emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
@@ -856,20 +2388,28 @@ bool AArch64FastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
- FastEmitBranch(FBB, DbgLoc);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ fastEmitBranch(FBB, DbgLoc);
return true;
}
-bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
+bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
unsigned AddrReg = getRegForValue(BI->getOperand(0));
if (AddrReg == 0)
return false;
// Emit the indirect branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
- .addReg(AddrReg);
+ const MCInstrDesc &II = TII.get(AArch64::BR);
+ AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
// Make sure the CFG is up-to-date.
for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
@@ -878,211 +2418,271 @@ bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
return true;
}
-bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
- Type *Ty = Src1Value->getType();
- EVT SrcEVT = TLI.getValueType(Ty, true);
- if (!SrcEVT.isSimple())
- return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
-
- // Check to see if the 2nd operand is a constant that we can encode directly
- // in the compare.
- uint64_t Imm;
- bool UseImm = false;
- bool isNegativeImm = false;
- if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
- if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
- SrcVT == MVT::i8 || SrcVT == MVT::i1) {
- const APInt &CIVal = ConstInt->getValue();
-
- Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
- if (CIVal.isNegative()) {
- isNegativeImm = true;
- Imm = -Imm;
- }
- // FIXME: We can handle more immediates using shifts.
- UseImm = ((Imm & 0xfff) == Imm);
- }
- } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
- if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
- if (ConstFP->isZero() && !ConstFP->isNegative())
- UseImm = true;
- }
+bool AArch64FastISel::selectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
- unsigned ZReg;
- unsigned CmpOpc;
- bool isICmp = true;
- bool needsExt = false;
- switch (SrcVT.SimpleTy) {
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ unsigned ResultReg = 0;
+ switch (Predicate) {
default:
- return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- needsExt = true;
- // Intentional fall-through.
- case MVT::i32:
- ZReg = AArch64::WZR;
- if (UseImm)
- CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
- else
- CmpOpc = AArch64::SUBSWrr;
break;
- case MVT::i64:
- ZReg = AArch64::XZR;
- if (UseImm)
- CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
- else
- CmpOpc = AArch64::SUBSXrr;
- break;
- case MVT::f32:
- isICmp = false;
- CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
+ case CmpInst::FCMP_FALSE:
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(AArch64::WZR, getKillRegState(true));
break;
- case MVT::f64:
- isICmp = false;
- CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
+ case CmpInst::FCMP_TRUE:
+ ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
break;
}
- unsigned SrcReg1 = getRegForValue(Src1Value);
- if (SrcReg1 == 0)
- return false;
-
- unsigned SrcReg2;
- if (!UseImm) {
- SrcReg2 = getRegForValue(Src2Value);
- if (SrcReg2 == 0)
- return false;
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
}
- // We have i1, i8, or i16, we need to either zero extend or sign extend.
- if (needsExt) {
- SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
- if (SrcReg1 == 0)
- return false;
- if (!UseImm) {
- SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
- if (SrcReg2 == 0)
- return false;
- }
- }
+ // Emit the cmp.
+ if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ return false;
- if (isICmp) {
- if (UseImm)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(ZReg)
- .addReg(SrcReg1)
- .addImm(Imm)
- .addImm(0);
- else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(ZReg)
- .addReg(SrcReg1)
- .addReg(SrcReg2);
- } else {
- if (UseImm)
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(SrcReg1);
- else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addReg(SrcReg1)
- .addReg(SrcReg2);
- }
- return true;
-}
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
-bool AArch64FastISel::SelectCmp(const Instruction *I) {
- const CmpInst *CI = cast<CmpInst>(I);
+ // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
+ // condition codes are inverted, because they are used by CSINC.
+ static unsigned CondCodeTable[2][2] = {
+ { AArch64CC::NE, AArch64CC::VC },
+ { AArch64CC::PL, AArch64CC::LE }
+ };
+ unsigned *CondCodes = nullptr;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_UEQ:
+ CondCodes = &CondCodeTable[0][0];
+ break;
+ case CmpInst::FCMP_ONE:
+ CondCodes = &CondCodeTable[1][0];
+ break;
+ }
- // We may not handle every CC for now.
- AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
- if (CC == AArch64CC::AL)
- return false;
+ if (CondCodes) {
+ unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
+ TmpReg1)
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addImm(CondCodes[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
+ ResultReg)
+ .addReg(TmpReg1, getKillRegState(true))
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addImm(CondCodes[1]);
- // Emit the cmp.
- if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
- return false;
+ updateValueMap(I, ResultReg);
+ return true;
+ }
// Now set a register based on the comparison.
+ AArch64CC::CondCode CC = getCompareCC(Predicate);
+ assert((CC != AArch64CC::AL) && "Unexpected condition code.");
AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
- unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
ResultReg)
- .addReg(AArch64::WZR)
- .addReg(AArch64::WZR)
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addReg(AArch64::WZR, getKillRegState(true))
.addImm(invertedCC);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectSelect(const Instruction *I) {
- const SelectInst *SI = cast<SelectInst>(I);
-
- EVT DestEVT = TLI.getValueType(SI->getType(), true);
- if (!DestEVT.isSimple())
+/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
+/// value.
+bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
+ if (!SI->getType()->isIntegerTy(1))
return false;
- MVT DestVT = DestEVT.getSimpleVT();
- if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
- DestVT != MVT::f64)
- return false;
+ const Value *Src1Val, *Src2Val;
+ unsigned Opc = 0;
+ bool NeedExtraOp = false;
+ if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
+ if (CI->isOne()) {
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getFalseValue();
+ Opc = AArch64::ORRWrr;
+ } else {
+ assert(CI->isZero());
+ Src1Val = SI->getFalseValue();
+ Src2Val = SI->getCondition();
+ Opc = AArch64::BICWrr;
+ }
+ } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
+ if (CI->isOne()) {
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getTrueValue();
+ Opc = AArch64::ORRWrr;
+ NeedExtraOp = true;
+ } else {
+ assert(CI->isZero());
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getTrueValue();
+ Opc = AArch64::ANDWrr;
+ }
+ }
- unsigned CondReg = getRegForValue(SI->getCondition());
- if (CondReg == 0)
+ if (!Opc)
return false;
- unsigned TrueReg = getRegForValue(SI->getTrueValue());
- if (TrueReg == 0)
- return false;
- unsigned FalseReg = getRegForValue(SI->getFalseValue());
- if (FalseReg == 0)
+
+ unsigned Src1Reg = getRegForValue(Src1Val);
+ if (!Src1Reg)
return false;
+ bool Src1IsKill = hasTrivialKill(Src1Val);
+ unsigned Src2Reg = getRegForValue(Src2Val);
+ if (!Src2Reg)
+ return false;
+ bool Src2IsKill = hasTrivialKill(Src2Val);
- MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(CondReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ if (NeedExtraOp) {
+ Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
+ Src1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
+ Src1IsKill, Src2Reg, Src2IsKill);
+ updateValueMap(SI, ResultReg);
+ return true;
+}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
- .addReg(ANDReg)
- .addReg(ANDReg)
- .addImm(0)
- .addImm(0);
+bool AArch64FastISel::selectSelect(const Instruction *I) {
+ assert(isa<SelectInst>(I) && "Expected a select instruction.");
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT))
+ return false;
- unsigned SelectOpc;
- switch (DestVT.SimpleTy) {
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
default:
return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
case MVT::i32:
- SelectOpc = AArch64::CSELWr;
+ Opc = AArch64::CSELWr;
+ RC = &AArch64::GPR32RegClass;
break;
case MVT::i64:
- SelectOpc = AArch64::CSELXr;
+ Opc = AArch64::CSELXr;
+ RC = &AArch64::GPR64RegClass;
break;
case MVT::f32:
- SelectOpc = AArch64::FCSELSrrr;
+ Opc = AArch64::FCSELSrrr;
+ RC = &AArch64::FPR32RegClass;
break;
case MVT::f64:
- SelectOpc = AArch64::FCSELDrrr;
+ Opc = AArch64::FCSELDrrr;
+ RC = &AArch64::FPR64RegClass;
break;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
- ResultReg)
- .addReg(TrueReg)
- .addReg(FalseReg)
- .addImm(AArch64CC::NE);
+ const SelectInst *SI = cast<SelectInst>(I);
+ const Value *Cond = SI->getCondition();
+ AArch64CC::CondCode CC = AArch64CC::NE;
+ AArch64CC::CondCode ExtraCC = AArch64CC::AL;
+
+ if (optimizeSelect(SI))
+ return true;
+
+ // Try to pickup the flags, so we don't have to emit another compare.
+ if (foldXALUIntrinsic(CC, I, Cond)) {
+ // Fake request the condition to force emission of the XALU intrinsic.
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
+ isValueAvailable(Cond)) {
+ const auto *Cmp = cast<CmpInst>(Cond);
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
+ const Value *FoldSelect = nullptr;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_FALSE:
+ FoldSelect = SI->getFalseValue();
+ break;
+ case CmpInst::FCMP_TRUE:
+ FoldSelect = SI->getTrueValue();
+ break;
+ }
- UpdateValueMap(I, ResultReg);
+ if (FoldSelect) {
+ unsigned SrcReg = getRegForValue(FoldSelect);
+ if (!SrcReg)
+ return false;
+ unsigned UseReg = lookUpRegForValue(SI);
+ if (UseReg)
+ MRI.clearKillFlags(UseReg);
+
+ updateValueMap(I, SrcReg);
+ return true;
+ }
+
+ // Emit the cmp.
+ if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
+ return false;
+
+ // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
+ CC = getCompareCC(Predicate);
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_UEQ:
+ ExtraCC = AArch64CC::EQ;
+ CC = AArch64CC::VS;
+ break;
+ case CmpInst::FCMP_ONE:
+ ExtraCC = AArch64CC::MI;
+ CC = AArch64CC::GT;
+ break;
+ }
+ assert((CC != AArch64CC::AL) && "Unexpected condition code.");
+ } else {
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ // Emit a TST instruction (ANDS wzr, reg, #imm).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
+ AArch64::WZR)
+ .addReg(CondReg, getKillRegState(CondIsKill))
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ }
+
+ unsigned Src1Reg = getRegForValue(SI->getTrueValue());
+ bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
+
+ unsigned Src2Reg = getRegForValue(SI->getFalseValue());
+ bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
+
+ if (!Src1Reg || !Src2Reg)
+ return false;
+
+ if (ExtraCC != AArch64CC::AL) {
+ Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
+ Src2IsKill, ExtraCC);
+ Src2IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
+ Src2IsKill, CC);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectFPExt(const Instruction *I) {
+bool AArch64FastISel::selectFPExt(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
return false;
@@ -1094,11 +2694,11 @@ bool AArch64FastISel::SelectFPExt(const Instruction *I) {
unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
ResultReg).addReg(Op);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
+bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
return false;
@@ -1110,12 +2710,12 @@ bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
ResultReg).addReg(Op);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// FPToUI and FPToSI
-bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
+bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
@@ -1144,11 +2744,11 @@ bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
+bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
@@ -1156,22 +2756,21 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
"Unexpected value type.");
unsigned SrcReg = getRegForValue(I->getOperand(0));
- if (SrcReg == 0)
+ if (!SrcReg)
return false;
+ bool SrcIsKill = hasTrivialKill(I->getOperand(0));
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
SrcReg =
- EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
- if (SrcReg == 0)
+ emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
+ if (!SrcReg)
return false;
+ SrcIsKill = true;
}
- MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
- : &AArch64::GPR32RegClass);
-
unsigned Opc;
if (SrcVT == MVT::i64) {
if (Signed)
@@ -1185,21 +2784,128 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(SrcReg);
- UpdateValueMap(I, ResultReg);
+ unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
+ SrcIsKill);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::fastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ // Only handle simple cases of up to 8 GPR and FPR each.
+ unsigned GPRCnt = 0;
+ unsigned FPRCnt = 0;
+ unsigned Idx = 0;
+ for (auto const &Arg : F->args()) {
+ // The first argument is at index 1.
+ ++Idx;
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = Arg.getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple())
+ return false;
+
+ MVT VT = ArgVT.getSimpleVT().SimpleTy;
+ if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
+ return false;
+
+ if (VT.isVector() &&
+ (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
+ return false;
+
+ if (VT >= MVT::i1 && VT <= MVT::i64)
+ ++GPRCnt;
+ else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
+ VT.is128BitVector())
+ ++FPRCnt;
+ else
+ return false;
+
+ if (GPRCnt > 8 || FPRCnt > 8)
+ return false;
+ }
+
+ static const MCPhysReg Registers[6][8] = {
+ { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
+ AArch64::W5, AArch64::W6, AArch64::W7 },
+ { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
+ AArch64::X5, AArch64::X6, AArch64::X7 },
+ { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
+ AArch64::H5, AArch64::H6, AArch64::H7 },
+ { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
+ AArch64::S5, AArch64::S6, AArch64::S7 },
+ { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
+ AArch64::D5, AArch64::D6, AArch64::D7 },
+ { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
+ AArch64::Q5, AArch64::Q6, AArch64::Q7 }
+ };
+
+ unsigned GPRIdx = 0;
+ unsigned FPRIdx = 0;
+ for (auto const &Arg : F->args()) {
+ MVT VT = TLI.getSimpleValueType(Arg.getType());
+ unsigned SrcReg;
+ const TargetRegisterClass *RC;
+ if (VT >= MVT::i1 && VT <= MVT::i32) {
+ SrcReg = Registers[0][GPRIdx++];
+ RC = &AArch64::GPR32RegClass;
+ VT = MVT::i32;
+ } else if (VT == MVT::i64) {
+ SrcReg = Registers[1][GPRIdx++];
+ RC = &AArch64::GPR64RegClass;
+ } else if (VT == MVT::f16) {
+ SrcReg = Registers[2][FPRIdx++];
+ RC = &AArch64::FPR16RegClass;
+ } else if (VT == MVT::f32) {
+ SrcReg = Registers[3][FPRIdx++];
+ RC = &AArch64::FPR32RegClass;
+ } else if ((VT == MVT::f64) || VT.is64BitVector()) {
+ SrcReg = Registers[4][FPRIdx++];
+ RC = &AArch64::FPR64RegClass;
+ } else if (VT.is128BitVector()) {
+ SrcReg = Registers[5][FPRIdx++];
+ RC = &AArch64::FPR128RegClass;
+ } else
+ llvm_unreachable("Unexpected value type.");
+
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(DstReg, getKillRegState(true));
+ updateValueMap(&Arg, ResultReg);
+ }
return true;
}
-bool AArch64FastISel::ProcessCallArgs(
- SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs,
- SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
- unsigned &NumBytes) {
+bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
+ SmallVectorImpl<MVT> &OutVTs,
+ unsigned &NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+ CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
@@ -1207,13 +2913,17 @@ bool AArch64FastISel::ProcessCallArgs(
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(NumBytes);
+ .addImm(NumBytes);
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- unsigned Arg = ArgRegs[VA.getValNo()];
- MVT ArgVT = ArgVTs[VA.getValNo()];
+ const Value *ArgVal = CLI.OutVals[VA.getValNo()];
+ MVT ArgVT = OutVTs[VA.getValNo()];
+
+ unsigned ArgReg = getRegForValue(ArgVal);
+ if (!ArgReg)
+ return false;
// Handle arg promotion: SExt, ZExt, AExt.
switch (VA.getLocInfo()) {
@@ -1222,8 +2932,8 @@ bool AArch64FastISel::ProcessCallArgs(
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
- Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
- if (Arg == 0)
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
+ if (!ArgReg)
return false;
break;
}
@@ -1232,8 +2942,8 @@ bool AArch64FastISel::ProcessCallArgs(
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
- Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
- if (Arg == 0)
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
+ if (!ArgReg)
return false;
break;
}
@@ -1244,14 +2954,18 @@ bool AArch64FastISel::ProcessCallArgs(
// Now copy/store arg to correct locations.
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
- RegArgs.push_back(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
+ CLI.OutRegs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// FIXME: Handle custom args.
return false;
} else {
assert(VA.isMemLoc() && "Assuming store on stack.");
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
// Need to store on the stack.
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
@@ -1264,26 +2978,31 @@ bool AArch64FastISel::ProcessCallArgs(
Addr.setReg(AArch64::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
- if (!EmitStore(ArgVT, Arg, Addr))
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+
+ if (!emitStore(ArgVT, ArgReg, Addr, MMO))
return false;
}
}
return true;
}
-bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes) {
+bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
+ unsigned NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
- .addImm(NumBytes)
- .addImm(0);
+ .addImm(NumBytes).addImm(0);
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
// Only handle a single return value.
@@ -1294,147 +3013,147 @@ bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
MVT CopyVT = RVLocs[0].getValVT();
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- ResultReg).addReg(RVLocs[0].getLocReg());
- UsedRegs.push_back(RVLocs[0].getLocReg());
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(RVLocs[0].getLocReg());
+ CLI.InRegs.push_back(RVLocs[0].getLocReg());
- // Finally update the result.
- UpdateValueMap(I, ResultReg);
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = 1;
}
return true;
}
-bool AArch64FastISel::SelectCall(const Instruction *I,
- const char *IntrMemName = nullptr) {
- const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = CI->getCalledValue();
+bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ const char *SymName = CLI.SymName;
- // Don't handle inline asm or intrinsics.
- if (isa<InlineAsm>(Callee))
+ if (!Callee && !SymName)
return false;
- // Only handle global variable Callees.
- const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV)
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
return false;
- // Check the calling convention.
- ImmutableCallSite CS(CI);
- CallingConv::ID CC = CS.getCallingConv();
+ CodeModel::Model CM = TM.getCodeModel();
+ // Only support the small and large code model.
+ if (CM != CodeModel::Small && CM != CodeModel::Large)
+ return false;
+
+ // FIXME: Add large code model support for ELF.
+ if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
+ return false;
// Let SDISel handle vararg functions.
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
+ if (IsVarArg)
return false;
- // Handle *simple* calls for now.
+ // FIXME: Only handle *simple* calls for now.
MVT RetVT;
- Type *RetTy = I->getType();
- if (RetTy->isVoidTy())
+ if (CLI.RetTy->isVoidTy())
RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT))
+ else if (!isTypeLegal(CLI.RetTy, RetVT))
return false;
- // Set up the argument vectors.
- SmallVector<Value *, 8> Args;
- SmallVector<unsigned, 8> ArgRegs;
- SmallVector<MVT, 8> ArgVTs;
- SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- Args.reserve(CS.arg_size());
- ArgRegs.reserve(CS.arg_size());
- ArgVTs.reserve(CS.arg_size());
- ArgFlags.reserve(CS.arg_size());
-
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
- // If we're lowering a memory intrinsic instead of a regular call, skip the
- // last two arguments, which shouldn't be passed to the underlying function.
- if (IntrMemName && e - i <= 2)
- break;
-
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
+ for (auto Flag : CLI.OutFlags)
+ if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
return false;
- ISD::ArgFlagsTy Flags;
- unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
- Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
- Flags.setZExt();
-
- // FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
- return false;
+ // Set up the argument vectors.
+ SmallVector<MVT, 16> OutVTs;
+ OutVTs.reserve(CLI.OutVals.size());
- MVT ArgVT;
- Type *ArgTy = (*i)->getType();
- if (!isTypeLegal(ArgTy, ArgVT) &&
- !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16))
+ for (auto *Val : CLI.OutVals) {
+ MVT VT;
+ if (!isTypeLegal(Val->getType(), VT) &&
+ !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
return false;
// We don't handle vector parameters yet.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ if (VT.isVector() || VT.getSizeInBits() > 64)
return false;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- Flags.setOrigAlign(OriginalAlignment);
-
- Args.push_back(*i);
- ArgRegs.push_back(Arg);
- ArgVTs.push_back(ArgVT);
- ArgFlags.push_back(Flags);
+ OutVTs.push_back(VT);
}
+ Address Addr;
+ if (Callee && !computeCallAddress(Callee, Addr))
+ return false;
+
// Handle the arguments now that we've gotten them.
- SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
- if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ if (!processCallArgs(CLI, OutVTs, NumBytes))
return false;
// Issue the call.
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
- if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(IntrMemName, 0);
+ if (CM == CodeModel::Small) {
+ const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
+ if (SymName)
+ MIB.addExternalSymbol(SymName, 0);
+ else if (Addr.getGlobalValue())
+ MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
+ else if (Addr.getReg()) {
+ unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
+ MIB.addReg(Reg);
+ } else
+ return false;
+ } else {
+ unsigned CallReg = 0;
+ if (SymName) {
+ unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+ ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+
+ CallReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
+ CallReg)
+ .addReg(ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else if (Addr.getGlobalValue())
+ CallReg = materializeGV(Addr.getGlobalValue());
+ else if (Addr.getReg())
+ CallReg = Addr.getReg();
+
+ if (!CallReg)
+ return false;
+
+ const MCInstrDesc &II = TII.get(AArch64::BLR);
+ CallReg = constrainOperandRegClass(II, CallReg, 0);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
+ }
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (auto Reg : CLI.OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
- // Finish off the call including any return values.
- SmallVector<unsigned, 4> UsedRegs;
- if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
- return false;
-
- // Set all unused physreg defs as dead.
- static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+ CLI.Call = MIB;
- return true;
+ // Finish off the call including any return values.
+ return finishCall(CLI, RetVT, NumBytes);
}
-bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
+bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
if (Alignment)
return Len / Alignment <= 4;
else
return Len < 32;
}
-bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
+bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
uint64_t Len, unsigned Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
- if (!IsMemCpySmall(Len, Alignment))
+ if (!isMemCpySmall(Len, Alignment))
return false;
int64_t UnscaledOffset = 0;
@@ -1464,14 +3183,11 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
}
}
- bool RV;
- unsigned ResultReg;
- RV = EmitLoad(VT, ResultReg, Src);
- if (!RV)
+ unsigned ResultReg = emitLoad(VT, VT, Src);
+ if (!ResultReg)
return false;
- RV = EmitStore(VT, ResultReg, Dest);
- if (!RV)
+ if (!emitStore(VT, ResultReg, Dest))
return false;
int64_t Size = VT.getSizeInBits() / 8;
@@ -1486,73 +3202,430 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
return true;
}
-bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
- // FIXME: Handle more intrinsics.
- switch (I.getIntrinsicID()) {
+/// \brief Check if it is possible to fold the condition from the XALU intrinsic
+/// into the user. The condition code will only be updated on success.
+bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
+ const Instruction *I,
+ const Value *Cond) {
+ if (!isa<ExtractValueInst>(Cond))
+ return false;
+
+ const auto *EV = cast<ExtractValueInst>(Cond);
+ if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
+ return false;
+
+ const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
+ MVT RetVT;
+ const Function *Callee = II->getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
+ if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return false;
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(II))
+ std::swap(LHS, RHS);
+
+ // Simplify multiplies.
+ unsigned IID = II->getIntrinsicID();
+ switch (IID) {
default:
+ break;
+ case Intrinsic::smul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2)
+ IID = Intrinsic::sadd_with_overflow;
+ break;
+ case Intrinsic::umul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2)
+ IID = Intrinsic::uadd_with_overflow;
+ break;
+ }
+
+ AArch64CC::CondCode TmpCC;
+ switch (IID) {
+ default:
+ return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ TmpCC = AArch64CC::VS;
+ break;
+ case Intrinsic::uadd_with_overflow:
+ TmpCC = AArch64CC::HS;
+ break;
+ case Intrinsic::usub_with_overflow:
+ TmpCC = AArch64CC::LO;
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ TmpCC = AArch64CC::NE;
+ break;
+ }
+
+ // Check if both instructions are in the same basic block.
+ if (!isValueAvailable(II))
return false;
+
+ // Make sure nothing is in the way
+ BasicBlock::const_iterator Start = I;
+ BasicBlock::const_iterator End = II;
+ for (auto Itr = std::prev(Start); Itr != End; --Itr) {
+ // We only expect extractvalue instructions between the intrinsic and the
+ // instruction to be selected.
+ if (!isa<ExtractValueInst>(Itr))
+ return false;
+
+ // Check that the extractvalue operand comes from the intrinsic.
+ const auto *EVI = cast<ExtractValueInst>(Itr);
+ if (EVI->getAggregateOperand() != II)
+ return false;
+ }
+
+ CC = TmpCC;
+ return true;
+}
+
+bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
+ // FIXME: Handle more intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::frameaddress: {
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ const AArch64RegisterInfo *RegInfo =
+ static_cast<const AArch64RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
+ // Recursively load frame address
+ // ldr x0, [fp]
+ // ldr x0, [x0]
+ // ldr x0, [x0]
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
+ SrcReg, /*IsKill=*/true, 0);
+ assert(DestReg && "Unexpected LDR instruction emission failure.");
+ SrcReg = DestReg;
+ }
+
+ updateValueMap(II, SrcReg);
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove: {
- const MemTransferInst &MTI = cast<MemTransferInst>(I);
+ const auto *MTI = cast<MemTransferInst>(II);
// Don't handle volatile.
- if (MTI.isVolatile())
+ if (MTI->isVolatile())
return false;
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
- bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
- if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
// Small memcpy's are common enough that we want to do them without a call
// if possible.
- uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
- unsigned Alignment = MTI.getAlignment();
- if (IsMemCpySmall(Len, Alignment)) {
+ uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
+ unsigned Alignment = MTI->getAlignment();
+ if (isMemCpySmall(Len, Alignment)) {
Address Dest, Src;
- if (!ComputeAddress(MTI.getRawDest(), Dest) ||
- !ComputeAddress(MTI.getRawSource(), Src))
+ if (!computeAddress(MTI->getRawDest(), Dest) ||
+ !computeAddress(MTI->getRawSource(), Src))
return false;
- if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
+ if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
- if (!MTI.getLength()->getType()->isIntegerTy(64))
+ if (!MTI->getLength()->getType()->isIntegerTy(64))
return false;
- if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+ if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
- const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
- return SelectCall(&I, IntrMemName);
+ const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
+ return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
}
case Intrinsic::memset: {
- const MemSetInst &MSI = cast<MemSetInst>(I);
+ const MemSetInst *MSI = cast<MemSetInst>(II);
// Don't handle volatile.
- if (MSI.isVolatile())
+ if (MSI->isVolatile())
return false;
- if (!MSI.getLength()->getType()->isIntegerTy(64))
+ if (!MSI->getLength()->getType()->isIntegerTy(64))
return false;
- if (MSI.getDestAddressSpace() > 255)
+ if (MSI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
- return SelectCall(&I, "memset");
+ return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
+ }
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::pow: {
+ MVT RetVT;
+ if (!isTypeLegal(II->getType(), RetVT))
+ return false;
+
+ if (RetVT != MVT::f32 && RetVT != MVT::f64)
+ return false;
+
+ static const RTLIB::Libcall LibCallTable[3][2] = {
+ { RTLIB::SIN_F32, RTLIB::SIN_F64 },
+ { RTLIB::COS_F32, RTLIB::COS_F64 },
+ { RTLIB::POW_F32, RTLIB::POW_F64 }
+ };
+ RTLIB::Libcall LC;
+ bool Is64Bit = RetVT == MVT::f64;
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::sin:
+ LC = LibCallTable[0][Is64Bit];
+ break;
+ case Intrinsic::cos:
+ LC = LibCallTable[1][Is64Bit];
+ break;
+ case Intrinsic::pow:
+ LC = LibCallTable[2][Is64Bit];
+ break;
+ }
+
+ ArgListTy Args;
+ Args.reserve(II->getNumArgOperands());
+
+ // Populate the argument list.
+ for (auto &Arg : II->arg_operands()) {
+ ArgListEntry Entry;
+ Entry.Val = Arg;
+ Entry.Ty = Arg->getType();
+ Args.push_back(Entry);
+ }
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
+ TLI.getLibcallName(LC), std::move(Args));
+ if (!lowerCallTo(CLI))
+ return false;
+ updateValueMap(II, CLI.ResultReg);
+ return true;
+ }
+ case Intrinsic::fabs: {
+ MVT VT;
+ if (!isTypeLegal(II->getType(), VT))
+ return false;
+
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::f32:
+ Opc = AArch64::FABSSr;
+ break;
+ case MVT::f64:
+ Opc = AArch64::FABSDr;
+ break;
+ }
+ unsigned SrcReg = getRegForValue(II->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(SrcReg, getKillRegState(SrcRegIsKill));
+ updateValueMap(II, ResultReg);
+ return true;
}
case Intrinsic::trap: {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
}
+ case Intrinsic::sqrt: {
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(II->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(II->getOperand(0));
+
+ unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ // This implements the basic lowering of the xalu with overflow intrinsics.
+ const Function *Callee = II->getCalledFunction();
+ auto *Ty = cast<StructType>(Callee->getReturnType());
+ Type *RetTy = Ty->getTypeAtIndex(0U);
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return false;
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(II))
+ std::swap(LHS, RHS);
+
+ // Simplify multiplies.
+ unsigned IID = II->getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::smul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2) {
+ IID = Intrinsic::sadd_with_overflow;
+ RHS = LHS;
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2) {
+ IID = Intrinsic::uadd_with_overflow;
+ RHS = LHS;
+ }
+ break;
+ }
+
+ unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
+ AArch64CC::CondCode CC = AArch64CC::Invalid;
+ switch (IID) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::sadd_with_overflow:
+ ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::VS;
+ break;
+ case Intrinsic::uadd_with_overflow:
+ ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::HS;
+ break;
+ case Intrinsic::ssub_with_overflow:
+ ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::VS;
+ break;
+ case Intrinsic::usub_with_overflow:
+ ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
+ CC = AArch64CC::LO;
+ break;
+ case Intrinsic::smul_with_overflow: {
+ CC = AArch64CC::NE;
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (VT == MVT::i32) {
+ MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
+ /*IsKill=*/false, 32);
+ MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
+ AArch64_AM::ASR, 31, /*WantResult=*/false);
+ } else {
+ assert(VT == MVT::i64 && "Unexpected value type.");
+ MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
+ AArch64_AM::ASR, 63, /*WantResult=*/false);
+ }
+ break;
+ }
+ case Intrinsic::umul_with_overflow: {
+ CC = AArch64CC::NE;
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (VT == MVT::i32) {
+ MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
+ /*IsKill=*/false, AArch64_AM::LSR, 32,
+ /*WantResult=*/false);
+ MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ } else {
+ assert(VT == MVT::i64 && "Unexpected value type.");
+ MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
+ /*IsKill=*/false, /*WantResult=*/false);
+ }
+ break;
+ }
+ }
+
+ if (MulReg) {
+ ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
+ }
+
+ ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
+ AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
+ /*IsKill=*/true, getInvertedCondCode(CC));
+ (void)ResultReg2;
+ assert((ResultReg1 + 1) == ResultReg2 &&
+ "Nonconsecutive result registers.");
+ updateValueMap(II, ResultReg1, 2);
+ return true;
+ }
}
return false;
}
-bool AArch64FastISel::SelectRet(const Instruction *I) {
+bool AArch64FastISel::selectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
@@ -1572,8 +3645,7 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
- I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
CCInfo.AnalyzeReturn(Outs, RetCC);
@@ -1586,11 +3658,14 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
const Value *RV = Ret->getOperand(0);
// Don't bother handling odd stuff for now.
- if (VA.getLocInfo() != CCValAssign::Full)
+ if ((VA.getLocInfo() != CCValAssign::Full) &&
+ (VA.getLocInfo() != CCValAssign::BCvt))
return false;
+
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
+
unsigned Reg = getRegForValue(RV);
if (Reg == 0)
return false;
@@ -1606,12 +3681,14 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
return false;
// Vectors (of > 1 lane) in big endian need tricky handling.
- if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
+ if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
+ !Subtarget->isLittleEndian())
return false;
MVT RVVT = RVEVT.getSimpleVT();
if (RVVT == MVT::f128)
return false;
+
MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
@@ -1621,8 +3698,8 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
return false;
- bool isZExt = Outs[0].Flags.isZExt();
- SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
+ bool IsZExt = Outs[0].Flags.isZExt();
+ SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
if (SrcReg == 0)
return false;
}
@@ -1642,7 +3719,7 @@ bool AArch64FastISel::SelectRet(const Instruction *I) {
return true;
}
-bool AArch64FastISel::SelectTrunc(const Instruction *I) {
+bool AArch64FastISel::selectTrunc(const Instruction *I) {
Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
Type *SrcTy = Op->getType();
@@ -1667,10 +3744,14 @@ bool AArch64FastISel::SelectTrunc(const Instruction *I) {
unsigned SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
+ bool SrcIsKill = hasTrivialKill(Op);
// If we're truncating from i64 to a smaller non-legal type then generate an
- // AND. Otherwise, we know the high bits are undefined and a truncate doesn't
- // generate any code.
+ // AND. Otherwise, we know the high bits are undefined and a truncate only
+ // generate a COPY. We cannot mark the source register also as result
+ // register, because this can incorrectly transfer the kill flag onto the
+ // source register.
+ unsigned ResultReg;
if (SrcVT == MVT::i64) {
uint64_t Mask = 0;
switch (DestVT.SimpleTy) {
@@ -1688,23 +3769,23 @@ bool AArch64FastISel::SelectTrunc(const Instruction *I) {
break;
}
// Issue an extract_subreg to get the lower 32-bits.
- unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
+ unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
AArch64::sub_32);
- MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
// Create the AND instruction which performs the actual truncation.
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(Reg32)
- .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
- SrcReg = ANDReg;
+ ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
+ assert(ResultReg && "Unexpected AND instruction emission failure.");
+ } else {
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, ResultReg);
return true;
}
-unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
+unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
DestVT == MVT::i64) &&
"Unexpected value type.");
@@ -1712,14 +3793,9 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
- if (isZExt) {
- MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
- unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ResultReg)
- .addReg(SrcReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
-
+ if (IsZExt) {
+ unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
+ assert(ResultReg && "Unexpected AND instruction emission failure.");
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
@@ -1737,18 +3813,389 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
// FIXME: We're SExt i1 to i64.
return 0;
}
- unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
- ResultReg)
- .addReg(SrcReg)
+ return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
+ /*TODO:IsKill=*/false, 0, 0);
+ }
+}
+
+unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned Opc, ZReg;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ RetVT = MVT::i32;
+ Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
+ case MVT::i64:
+ Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
+ /*IsKill=*/ZReg, true);
+}
+
+unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ if (RetVT != MVT::i64)
+ return 0;
+
+ return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
+ Op0, Op0IsKill, Op1, Op1IsKill,
+ AArch64::XZR, /*IsKill=*/true);
+}
+
+unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ if (RetVT != MVT::i64)
+ return 0;
+
+ return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
+ Op0, Op0IsKill, Op1, Op1IsKill,
+ AArch64::XZR, /*IsKill=*/true);
+}
+
+unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSLVWr; break;
+ case MVT::i64: Opc = AArch64::LSLVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<32+s-r,32-r> = Wn<s:0> when r > s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 4
+ // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
+ // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
+ // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
+ // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 8
+ // Wd<32+7-24,32-24> = Wn<7:0>
+ // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
+ // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
+ // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 12
+ // Wd<32+3-20,32-20> = Wn<3:0>
+ // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
+ // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
+
+ unsigned ImmR = RegSize - Shift;
+ // Limit the width to the length of the source type.
+ unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
.addImm(0)
- .addImm(0);
- return ResultReg;
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
+ return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+}
+
+unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSRVWr; break;
+ case MVT::i64: Opc = AArch64::LSRVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<s-r:0> = Wn<s:r> when r <= s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 4
+ // Wd<7-4:0> = Wn<7:4>
+ // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
+ // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 8
+ // Wd<7-7,0> = Wn<7:7>
+ // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 12
+ // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
+ // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ if (Shift >= SrcBits && IsZExt)
+ return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
+
+ // It is not possible to fold a sign-extend into the LShr instruction. In this
+ // case emit a sign-extend.
+ if (!IsZExt) {
+ Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ if (!Op0)
+ return 0;
+ Op0IsKill = true;
+ SrcVT = RetVT;
+ SrcBits = SrcVT.getSizeInBits();
+ IsZExt = true;
+ }
+
+ unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
+ unsigned ImmS = SrcBits - 1;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
+ .addImm(0)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
+ return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+}
+
+unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::ASRVWr; break;
+ case MVT::i64: Opc = AArch64::ASRVXr; break;
+ }
+
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
+ Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
}
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
}
-unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
- bool isZExt) {
+unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<s-r:0> = Wn<s:r> when r <= s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 4
+ // Wd<7-4:0> = Wn<7:4>
+ // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
+ // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 8
+ // Wd<7-7,0> = Wn<7:7>
+ // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 12
+ // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
+ // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ if (Shift >= SrcBits && IsZExt)
+ return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
+
+ unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
+ unsigned ImmS = SrcBits - 1;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
+ .addImm(0)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
+ return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+}
+
+unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ bool IsZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
// FastISel does not have plumbing to deal with extensions where the SrcVT or
@@ -1768,24 +4215,24 @@ unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
default:
return 0;
case MVT::i1:
- return Emiti1Ext(SrcReg, DestVT, isZExt);
+ return emiti1Ext(SrcReg, DestVT, IsZExt);
case MVT::i8:
if (DestVT == MVT::i64)
- Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
+ Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
- Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
+ Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 7;
break;
case MVT::i16:
if (DestVT == MVT::i64)
- Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
+ Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
- Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
+ Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 15;
break;
case MVT::i32:
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
- Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
+ Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
Imm = 31;
break;
}
@@ -1803,45 +4250,167 @@ unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
SrcReg = Src64;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(SrcReg)
- .addImm(0)
- .addImm(Imm);
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
+}
- return ResultReg;
+static bool isZExtLoad(const MachineInstr *LI) {
+ switch (LI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURWi:
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRWroX:
+ case AArch64::LDRBBroW:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRWroW:
+ return true;
+ }
}
-bool AArch64FastISel::SelectIntExt(const Instruction *I) {
- // On ARM, in general, integer casts don't involve legal types; this code
- // handles promotable integers. The high bits for a type smaller than
- // the register size are assumed to be undefined.
- Type *DestTy = I->getType();
- Value *Src = I->getOperand(0);
- Type *SrcTy = Src->getType();
+static bool isSExtLoad(const MachineInstr *LI) {
+ switch (LI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSWi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSWui:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRSBWroW:
+ case AArch64::LDRSHWroW:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSWroW:
+ return true;
+ }
+}
- bool isZExt = isa<ZExtInst>(I);
- unsigned SrcReg = getRegForValue(Src);
- if (!SrcReg)
+bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
+ MVT SrcVT) {
+ const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
+ if (!LI || !LI->hasOneUse())
return false;
- EVT SrcEVT = TLI.getValueType(SrcTy, true);
- EVT DestEVT = TLI.getValueType(DestTy, true);
- if (!SrcEVT.isSimple())
+ // Check if the load instruction has already been selected.
+ unsigned Reg = lookUpRegForValue(LI);
+ if (!Reg)
return false;
- if (!DestEVT.isSimple())
+
+ MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI)
return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
- MVT DestVT = DestEVT.getSimpleVT();
- unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
- if (ResultReg == 0)
+ // Check if the correct load instruction has been emitted - SelectionDAG might
+ // have emitted a zero-extending load, but we need a sign-extending load.
+ bool IsZExt = isa<ZExtInst>(I);
+ const auto *LoadMI = MI;
+ if (LoadMI->getOpcode() == TargetOpcode::COPY &&
+ LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
+ unsigned LoadReg = MI->getOperand(1).getReg();
+ LoadMI = MRI.getUniqueVRegDef(LoadReg);
+ assert(LoadMI && "Expected valid instruction");
+ }
+ if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
+ return false;
+
+ // Nothing to be done.
+ if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ if (IsZExt) {
+ unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), Reg64)
+ .addImm(0)
+ .addReg(Reg, getKillRegState(true))
+ .addImm(AArch64::sub_32);
+ Reg = Reg64;
+ } else {
+ assert((MI->getOpcode() == TargetOpcode::COPY &&
+ MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
+ "Expected copy instruction");
+ Reg = MI->getOperand(1).getReg();
+ MI->eraseFromParent();
+ }
+ updateValueMap(I, Reg);
+ return true;
+}
+
+bool AArch64FastISel::selectIntExt(const Instruction *I) {
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ MVT RetVT;
+ MVT SrcVT;
+ if (!isTypeSupported(I->getType(), RetVT))
+ return false;
+
+ if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
+ return false;
+
+ // Try to optimize already sign-/zero-extended values from load instructions.
+ if (optimizeIntExtLoad(I, RetVT, SrcVT))
+ return true;
+
+ unsigned SrcReg = getRegForValue(I->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Try to optimize already sign-/zero-extended values from function arguments.
+ bool IsZExt = isa<ZExtInst>(I);
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
+ if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
+ if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), ResultReg)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(AArch64::sub_32);
+ SrcReg = ResultReg;
+ }
+ // Conservatively clear all kill flags from all uses, because we are
+ // replacing a sign-/zero-extend instruction at IR level with a nop at MI
+ // level. The result of the instruction at IR level might have been
+ // trivially dead, which is now not longer true.
+ unsigned UseReg = lookUpRegForValue(I);
+ if (UseReg)
+ MRI.clearKillFlags(UseReg);
+
+ updateValueMap(I, SrcReg);
+ return true;
+ }
+ }
+
+ unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
+ if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
+bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
EVT DestEVT = TLI.getValueType(I->getType(), true);
if (!DestEVT.isSimple())
return false;
@@ -1851,144 +4420,529 @@ bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
return false;
unsigned DivOpc;
- bool is64bit = (DestVT == MVT::i64);
+ bool Is64bit = (DestVT == MVT::i64);
switch (ISDOpcode) {
default:
return false;
case ISD::SREM:
- DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
+ DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
break;
case ISD::UREM:
- DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
+ DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
break;
}
- unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
+ unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
+ bool Src1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
- .addReg(Src0Reg)
- .addReg(Src1Reg);
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
+ Src1Reg, /*IsKill=*/false);
+ assert(QuotReg && "Unexpected DIV instruction emission failure.");
// The remainder is computed as numerator - (quotient * denominator) using the
// MSUB instruction.
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
- .addReg(QuotReg)
- .addReg(Src1Reg)
- .addReg(Src0Reg);
- UpdateValueMap(I, ResultReg);
+ unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
+ Src1Reg, Src1IsKill, Src0Reg,
+ Src0IsKill);
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::SelectMul(const Instruction *I) {
- EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
- if (!SrcEVT.isSimple())
+bool AArch64FastISel::selectMul(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
- // Must be simple value type. Don't handle vectors.
- if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
- SrcVT != MVT::i8)
+ if (VT.isVector())
+ return selectBinaryOp(I, ISD::MUL);
+
+ const Value *Src0 = I->getOperand(0);
+ const Value *Src1 = I->getOperand(1);
+ if (const auto *C = dyn_cast<ConstantInt>(Src0))
+ if (C->getValue().isPowerOf2())
+ std::swap(Src0, Src1);
+
+ // Try to simplify to a shift instruction.
+ if (const auto *C = dyn_cast<ConstantInt>(Src1))
+ if (C->getValue().isPowerOf2()) {
+ uint64_t ShiftVal = C->getValue().logBase2();
+ MVT SrcVT = VT;
+ bool IsZExt = true;
+ if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
+ if (!isIntExtFree(ZExt)) {
+ MVT VT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = true;
+ Src0 = ZExt->getOperand(0);
+ }
+ }
+ } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
+ if (!isIntExtFree(SExt)) {
+ MVT VT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = false;
+ Src0 = SExt->getOperand(0);
+ }
+ }
+ }
+
+ unsigned Src0Reg = getRegForValue(Src0);
+ if (!Src0Reg)
+ return false;
+ bool Src0IsKill = hasTrivialKill(Src0);
+
+ unsigned ResultReg =
+ emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
+
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ if (!Src0Reg)
+ return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned Src1Reg = getRegForValue(I->getOperand(1));
+ if (!Src1Reg)
+ return false;
+ bool Src1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectShift(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
+ return false;
+
+ if (RetVT.isVector())
+ return selectOperator(I, I->getOpcode());
+
+ if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = 0;
+ uint64_t ShiftVal = C->getZExtValue();
+ MVT SrcVT = RetVT;
+ bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+ const Value *Op0 = I->getOperand(0);
+ if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
+ if (!isIntExtFree(ZExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = true;
+ Op0 = ZExt->getOperand(0);
+ }
+ }
+ } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
+ if (!isIntExtFree(SExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = false;
+ Op0 = SExt->getOperand(0);
+ }
+ }
+ }
+
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(Op0);
+
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ }
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (!Op1Reg)
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ }
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectBitCast(const Instruction *I) {
+ MVT RetVT, SrcVT;
+
+ if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
+ return false;
+ if (!isTypeLegal(I->getType(), RetVT))
return false;
unsigned Opc;
- unsigned ZReg;
- switch (SrcVT.SimpleTy) {
+ if (RetVT == MVT::f32 && SrcVT == MVT::i32)
+ Opc = AArch64::FMOVWSr;
+ else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
+ Opc = AArch64::FMOVXDr;
+ else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
+ Opc = AArch64::FMOVSWr;
+ else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
+ Opc = AArch64::FMOVDXr;
+ else
+ return false;
+
+ const TargetRegisterClass *RC = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i32: RC = &AArch64::GPR32RegClass; break;
+ case MVT::i64: RC = &AArch64::GPR64RegClass; break;
+ case MVT::f32: RC = &AArch64::FPR32RegClass; break;
+ case MVT::f64: RC = &AArch64::FPR64RegClass; break;
+ }
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+ unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectFRem(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ RTLIB::Libcall LC;
+ switch (RetVT.SimpleTy) {
default:
return false;
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- ZReg = AArch64::WZR;
- Opc = AArch64::MADDWrrr;
- SrcVT = MVT::i32;
+ case MVT::f32:
+ LC = RTLIB::REM_F32;
break;
- case MVT::i64:
- ZReg = AArch64::XZR;
- Opc = AArch64::MADDXrrr;
+ case MVT::f64:
+ LC = RTLIB::REM_F64;
break;
}
+ ArgListTy Args;
+ Args.reserve(I->getNumOperands());
+
+ // Populate the argument list.
+ for (auto &Arg : I->operands()) {
+ ArgListEntry Entry;
+ Entry.Val = Arg;
+ Entry.Ty = Arg->getType();
+ Args.push_back(Entry);
+ }
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
+ TLI.getLibcallName(LC), std::move(Args));
+ if (!lowerCallTo(CLI))
+ return false;
+ updateValueMap(I, CLI.ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectSDiv(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ if (!isa<ConstantInt>(I->getOperand(1)))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
+ !(C.isPowerOf2() || (-C).isPowerOf2()))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ unsigned Lg2 = C.countTrailingZeros();
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
- unsigned Src1Reg = getRegForValue(I->getOperand(1));
- if (!Src1Reg)
+ if (cast<BinaryOperator>(I)->isExact()) {
+ unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
+ if (!ResultReg)
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
+ unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
+ if (!AddReg)
return false;
- // Create the base instruction, then add the operands.
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Src0Reg)
- .addReg(Src1Reg)
- .addReg(ZReg);
- UpdateValueMap(I, ResultReg);
+ // (Src0 < 0) ? Pow2 - 1 : 0;
+ if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
+ return false;
+
+ unsigned SelectOpc;
+ const TargetRegisterClass *RC;
+ if (VT == MVT::i64) {
+ SelectOpc = AArch64::CSELXr;
+ RC = &AArch64::GPR64RegClass;
+ } else {
+ SelectOpc = AArch64::CSELWr;
+ RC = &AArch64::GPR32RegClass;
+ }
+ unsigned SelectReg =
+ fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
+ Src0IsKill, AArch64CC::LT);
+ if (!SelectReg)
+ return false;
+
+ // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
+ // negate the result.
+ unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ unsigned ResultReg;
+ if (C.isNegative())
+ ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
+ SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
+ else
+ ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
return true;
}
-bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
+/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
+/// have to duplicate it for AArch64, because otherwise we would fail during the
+/// sign-extend emission.
+std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
+ IdxNIsKill = true;
+ } else if (IdxVT.bitsGT(PtrVT))
+ llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+/// This is mostly a copy of the existing FastISel GEP code, but we have to
+/// duplicate it for AArch64, because otherwise we would bail out even for
+/// simple cases. This is because the standard fastEmit functions don't cover
+/// MUL at all and ADD is lowered very inefficientily.
+bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (!N)
+ return false;
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (auto *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ // N = N + Offset
+ if (Field)
+ TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+ // If this is a constant subscript, handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ // N = N + Offset
+ TotalOffs +=
+ DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+ continue;
+ }
+ if (TotalOffs) {
+ N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ if (!N)
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (!IdxN)
+ return false;
+
+ if (ElementSize != 1) {
+ unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
+ if (!C)
+ return false;
+ IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
+ if (!IdxN)
+ return false;
+ IdxNIsKill = true;
+ }
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (!N)
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ if (!N)
+ return false;
+ }
+ updateValueMap(I, N);
+ return true;
+}
+
+bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
break;
- case Instruction::Load:
- return SelectLoad(I);
- case Instruction::Store:
- return SelectStore(I);
+ case Instruction::Add:
+ case Instruction::Sub:
+ return selectAddSub(I);
+ case Instruction::Mul:
+ return selectMul(I);
+ case Instruction::SDiv:
+ return selectSDiv(I);
+ case Instruction::SRem:
+ if (!selectBinaryOp(I, ISD::SREM))
+ return selectRem(I, ISD::SREM);
+ return true;
+ case Instruction::URem:
+ if (!selectBinaryOp(I, ISD::UREM))
+ return selectRem(I, ISD::UREM);
+ return true;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return selectShift(I);
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return selectLogicalOp(I);
case Instruction::Br:
- return SelectBranch(I);
+ return selectBranch(I);
case Instruction::IndirectBr:
- return SelectIndirectBr(I);
- case Instruction::FCmp:
- case Instruction::ICmp:
- return SelectCmp(I);
- case Instruction::Select:
- return SelectSelect(I);
- case Instruction::FPExt:
- return SelectFPExt(I);
- case Instruction::FPTrunc:
- return SelectFPTrunc(I);
+ return selectIndirectBr(I);
+ case Instruction::BitCast:
+ if (!FastISel::selectBitCast(I))
+ return selectBitCast(I);
+ return true;
case Instruction::FPToSI:
- return SelectFPToInt(I, /*Signed=*/true);
+ if (!selectCast(I, ISD::FP_TO_SINT))
+ return selectFPToInt(I, /*Signed=*/true);
+ return true;
case Instruction::FPToUI:
- return SelectFPToInt(I, /*Signed=*/false);
+ return selectFPToInt(I, /*Signed=*/false);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return selectIntExt(I);
+ case Instruction::Trunc:
+ if (!selectCast(I, ISD::TRUNCATE))
+ return selectTrunc(I);
+ return true;
+ case Instruction::FPExt:
+ return selectFPExt(I);
+ case Instruction::FPTrunc:
+ return selectFPTrunc(I);
case Instruction::SIToFP:
- return SelectIntToFP(I, /*Signed=*/true);
+ if (!selectCast(I, ISD::SINT_TO_FP))
+ return selectIntToFP(I, /*Signed=*/true);
+ return true;
case Instruction::UIToFP:
- return SelectIntToFP(I, /*Signed=*/false);
- case Instruction::SRem:
- return SelectRem(I, ISD::SREM);
- case Instruction::URem:
- return SelectRem(I, ISD::UREM);
- case Instruction::Call:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- return SelectIntrinsicCall(*II);
- return SelectCall(I);
+ return selectIntToFP(I, /*Signed=*/false);
+ case Instruction::Load:
+ return selectLoad(I);
+ case Instruction::Store:
+ return selectStore(I);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ return selectCmp(I);
+ case Instruction::Select:
+ return selectSelect(I);
case Instruction::Ret:
- return SelectRet(I);
- case Instruction::Trunc:
- return SelectTrunc(I);
- case Instruction::ZExt:
- case Instruction::SExt:
- return SelectIntExt(I);
- case Instruction::Mul:
- // FIXME: This really should be handled by the target-independent selector.
- return SelectMul(I);
+ return selectRet(I);
+ case Instruction::FRem:
+ return selectFRem(I);
+ case Instruction::GetElementPtr:
+ return selectGetElementPtr(I);
}
- return false;
+
+ // fall-back to target-independent instruction selection.
+ return selectOperator(I, I->getOpcode());
// Silence warnings.
(void)&CC_AArch64_DarwinPCS_VarArg;
}
namespace llvm {
-llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo) {
- return new AArch64FastISel(funcInfo, libInfo);
+llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) {
+ return new AArch64FastISel(FuncInfo, LibInfo);
}
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 9c33717..a7779d6 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -17,16 +17,16 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -86,13 +86,14 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
#ifndef NDEBUG
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
assert(!RegInfo->needsStackRealignment(MF) &&
"No stack realignment on AArch64!");
#endif
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
+ MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
+ MFI->hasPatchPoint());
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -109,13 +110,13 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc DL = I->getDebugLoc();
int Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (!TFI->hasReservedCallFrame(MF)) {
unsigned Align = getStackAlignment();
@@ -131,7 +132,7 @@ void AArch64FrameLowering::eliminateCallFramePseudoInstr(
// FIXME: in-function stack adjustment for calls is limited to 24-bits
// because there's no guaranteed temporary register available.
//
- // ADD/SUB (immediate) has only LSL #0 and LSL #12 avaiable.
+ // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
// 1) For offset <= 12-bit, we use LSL #0
// 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
// LSL #0, and the other uses LSL #12.
@@ -158,7 +159,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
// Add callee saved registers to move list.
@@ -166,7 +167,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
if (CSI.empty())
return;
- const DataLayout *TD = MF.getTarget().getDataLayout();
+ const DataLayout *TD = MF.getSubtarget().getDataLayout();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
@@ -205,8 +206,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MF.getSubtarget().getRegisterInfo());
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
@@ -300,7 +301,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
if (needsFrameMoves) {
- const DataLayout *TD = MF.getTarget().getDataLayout();
+ const DataLayout *TD = MF.getSubtarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -435,9 +436,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
MachineFrameInfo *MFI = MF.getFrameInfo();
const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
DebugLoc DL = MBBI->getDebugLoc();
unsigned RetOpcode = MBBI->getOpcode();
@@ -548,7 +549,7 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
bool PreferFP) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
int FPOffset = MFI->getObjectOffset(FI) + 16;
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
@@ -617,7 +618,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
@@ -693,7 +694,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
@@ -761,7 +762,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
MachineFunction &MF, RegScavenger *RS) const {
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
MachineRegisterInfo *MRI = &MF.getRegInfo();
SmallVector<unsigned, 4> UnspilledCSGPRs;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 7686e6f..df3875f 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64_FRAMELOWERING_H
-#define AArch64_FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 3f49fab..87a6d80 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -303,7 +303,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
/// \brief Determine wether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the a value is used at least twice, unless we are optimizing
+ // it hurts if the value is used at least twice, unless we are optimizing
// for code size.
if (ForCodeSize || V.hasOneUse())
return true;
@@ -777,6 +777,21 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
return false;
}
+// Check if the given immediate is preferred by ADD. If an immediate can be
+// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
+// encoded by one MOVZ, return true.
+static bool isPreferredADD(int64_t ImmOff) {
+ // Constant in [0x0, 0xfff] can be encoded in ADD.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+ // Check if it can be encoded in an "ADD LSL #12".
+ if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
+ // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ return false;
+}
+
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
SDValue &Base, SDValue &Offset,
SDValue &SignExtend,
@@ -786,11 +801,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
SDValue LHS = N.getOperand(0);
SDValue RHS = N.getOperand(1);
- // We don't want to match immediate adds here, because they are better lowered
- // to the register-immediate addressing modes.
- if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
- return false;
-
// Check if this particular node is reused in any non-memory related
// operation. If yes, do not try to fold this node into the address
// computation, since the computation will be kept.
@@ -800,6 +810,36 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
return false;
}
+ // Watch out if RHS is a wide immediate, it can not be selected into
+ // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
+ // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
+ // instructions like:
+ // MOV X0, WideImmediate
+ // ADD X1, BaseReg, X0
+ // LDR X2, [X1, 0]
+ // For such situation, using [BaseReg, XReg] addressing mode can save one
+ // ADD/SUB:
+ // MOV X0, WideImmediate
+ // LDR X2, [BaseReg, X0]
+ if (isa<ConstantSDNode>(RHS)) {
+ int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue();
+ unsigned Scale = Log2_32(Size);
+ // Skip the immediate can be seleced by load/store addressing mode.
+ // Also skip the immediate can be encoded by a single ADD (SUB is also
+ // checked by using -ImmOff).
+ if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
+ isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return false;
+
+ SDLoc DL(N.getNode());
+ SDValue Ops[] = { RHS };
+ SDNode *MOVI =
+ CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
+ SDValue MOVIV = SDValue(MOVI, 0);
+ // This ADD of two X register will be selected into [Reg+Reg] mode.
+ N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
+ }
+
// Remember if it is worth folding N when it produces extended register.
bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
@@ -1381,20 +1421,21 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
return true;
}
-static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB) {
- // We are looking for the following pattern which basically extracts a single
- // bit from the source value and places it in the LSB of the destination
- // value, all other bits of the destination value or set to zero:
+static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
+ SDValue &Opd0, unsigned &LSB,
+ unsigned &MSB) {
+ // We are looking for the following pattern which basically extracts several
+ // continuous bits from the source value and places it from the LSB of the
+ // destination value, all other bits of the destination value or set to zero:
//
// Value2 = AND Value, MaskImm
// SRL Value2, ShiftImm
//
- // with MaskImm >> ShiftImm == 1.
+ // with MaskImm >> ShiftImm to search for the bit width.
//
// This gets selected into a single UBFM:
//
- // UBFM Value, ShiftImm, ShiftImm
+ // UBFM Value, ShiftImm, BitWide + Srl_imm -1
//
if (N->getOpcode() != ISD::SRL)
@@ -1410,15 +1451,16 @@ static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
if (!isIntImmediate(N->getOperand(1), Srl_imm))
return false;
- // Check whether we really have a one bit extract here.
- if (And_mask >> Srl_imm == 0x1) {
+ // Check whether we really have several bits extract here.
+ unsigned BitWide = 64 - CountLeadingOnes_64(~(And_mask >> Srl_imm));
+ if (BitWide && isMask_64(And_mask >> Srl_imm)) {
if (N->getValueType(0) == MVT::i32)
Opc = AArch64::UBFMWri;
else
Opc = AArch64::UBFMXri;
- LSB = MSB = Srl_imm;
-
+ LSB = Srl_imm;
+ MSB = BitWide + Srl_imm - 1;
return true;
}
@@ -1439,8 +1481,8 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
assert((VT == MVT::i32 || VT == MVT::i64) &&
"Type checking must have been done before calling this function");
- // Check for AND + SRL doing a one bit extract.
- if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
+ // Check for AND + SRL doing several bits extract.
+ if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
return true;
// we're looking for a shift of a shift
@@ -2116,7 +2158,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case 32:
SubReg = AArch64::ssub;
break;
- case 16: // FALLTHROUGH
+ case 16:
+ SubReg = AArch64::hsub;
+ break;
case 8:
llvm_unreachable("unexpected zext-requiring extract element!");
}
@@ -2204,9 +2248,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
@@ -2222,9 +2266,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
@@ -2240,9 +2284,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
@@ -2258,9 +2302,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
@@ -2276,9 +2320,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
@@ -2294,9 +2338,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
@@ -2312,9 +2356,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
@@ -2330,9 +2374,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
@@ -2348,9 +2392,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
@@ -2364,7 +2408,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_ld2lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 2, AArch64::LD2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 2, AArch64::LD2i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2376,7 +2421,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_ld3lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 3, AArch64::LD3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 3, AArch64::LD3i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2388,7 +2434,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_ld4lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 4, AArch64::LD4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 4, AArch64::LD4i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2448,9 +2495,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 2, AArch64::ST1Twov8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 2, AArch64::ST1Twov16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 2, AArch64::ST1Twov4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 2, AArch64::ST1Twov8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 2, AArch64::ST1Twov2s);
@@ -2467,9 +2514,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 3, AArch64::ST1Threev8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 3, AArch64::ST1Threev16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 3, AArch64::ST1Threev4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 3, AArch64::ST1Threev8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 3, AArch64::ST1Threev2s);
@@ -2486,9 +2533,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 4, AArch64::ST1Fourv8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 4, AArch64::ST1Fourv16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 4, AArch64::ST1Fourv4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 4, AArch64::ST1Fourv8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 4, AArch64::ST1Fourv2s);
@@ -2505,9 +2552,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 2, AArch64::ST2Twov8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 2, AArch64::ST2Twov16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 2, AArch64::ST2Twov4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 2, AArch64::ST2Twov8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 2, AArch64::ST2Twov2s);
@@ -2524,9 +2571,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 3, AArch64::ST3Threev8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 3, AArch64::ST3Threev16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 3, AArch64::ST3Threev4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 3, AArch64::ST3Threev8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 3, AArch64::ST3Threev2s);
@@ -2543,9 +2590,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectStore(Node, 4, AArch64::ST4Fourv8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 4, AArch64::ST4Fourv16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 4, AArch64::ST4Fourv4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 4, AArch64::ST4Fourv8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 4, AArch64::ST4Fourv2s);
@@ -2560,7 +2607,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_st2lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 2, AArch64::ST2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 2, AArch64::ST2i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2573,7 +2621,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_st3lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 3, AArch64::ST3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 3, AArch64::ST3i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2586,7 +2635,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_neon_st4lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 4, AArch64::ST4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 4, AArch64::ST4i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2603,9 +2653,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
@@ -2622,9 +2672,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
@@ -2641,9 +2691,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
@@ -2660,9 +2710,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
@@ -2679,9 +2729,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
@@ -2698,9 +2748,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
@@ -2717,9 +2767,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
@@ -2736,9 +2786,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
@@ -2755,9 +2805,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
@@ -2774,9 +2824,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
@@ -2791,7 +2841,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD1LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2804,7 +2855,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD2LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2817,7 +2869,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD3LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2830,7 +2883,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64ISD::LD4LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2846,9 +2900,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
@@ -2866,9 +2920,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
@@ -2886,9 +2940,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
@@ -2906,9 +2960,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
@@ -2926,9 +2980,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
@@ -2946,9 +3000,9 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
@@ -2964,7 +3018,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2978,7 +3033,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
@@ -2992,7 +3048,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7c33423..7c94d83 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64Subtarget.h"
-#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -38,10 +38,12 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
+namespace {
enum AlignMode {
StrictAlign,
NoStrictAlign
};
+}
static cl::opt<AlignMode>
Align(cl::desc("Load/store alignment support"),
@@ -64,18 +66,9 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
cl::desc("Allow AArch64 SLI/SRI formation"),
cl::init(false));
-//===----------------------------------------------------------------------===//
-// AArch64 Lowering public interface.
-//===----------------------------------------------------------------------===//
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO())
- return new AArch64_MachoTargetObjectFile();
- return new AArch64_ELFTargetObjectFile();
-}
-
-AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
@@ -106,6 +99,7 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
+ addDRTypeForNEON(MVT::v4f16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
@@ -113,6 +107,7 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
+ addQRTypeForNEON(MVT::v8f16);
}
// Compute derived properties from the register classes
@@ -278,6 +273,94 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ // f16 is storage-only, so we promote operations to f32 if we know this is
+ // valid, and ignore them otherwise. The operations not mentioned here will
+ // fail to select, but this is not a major problem as no source language
+ // should be emitting native f16 operations yet.
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+
+ // v4f16 is also a storage-only type, so promote it to v4f32 when that is
+ // known to be safe.
+ setOperationAction(ISD::FADD, MVT::v4f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
+ AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
+ AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
+
+ // Expand all other v4f16 operations.
+ // FIXME: We could generate better code by promoting some operations to
+ // a pair of v4f32s
+ setOperationAction(ISD::FABS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
+ setOperationAction(ISD::FREM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
+
+
+ // v8f16 is also a storage-only type, so expand it.
+ setOperationAction(ISD::FABS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FADD, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMA, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
+ setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
+ setOperationAction(ISD::FREM, MVT::v8f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
+ setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
+ setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
+
// AArch64 has implementations of a lot of rounding-like FP operations.
static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
@@ -305,6 +388,7 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
// AArch64 does not have floating-point extending loads, i1 sign-extending
// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
@@ -316,6 +400,10 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -434,6 +522,11 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // Custom handling for some quad-vector types to detect MULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
@@ -479,13 +572,13 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
}
void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
- if (VT == MVT::v2f32) {
+ if (VT == MVT::v2f32 || VT == MVT::v4f16) {
setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32);
- } else if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64);
@@ -726,6 +819,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
+ case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
@@ -755,6 +849,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
+ case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
+ case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
}
}
@@ -773,7 +869,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
@@ -1019,6 +1116,8 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
+ SDValue Cmp;
+ AArch64CC::CondCode AArch64CC;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
@@ -1050,9 +1149,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
case ISD::SETLE:
case ISD::SETGT:
- if ((VT == MVT::i32 && C != 0x7fffffff &&
+ if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != 0x7ffffffffffffffULL &&
+ (VT == MVT::i64 && C != INT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
@@ -1061,9 +1160,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
case ISD::SETULE:
case ISD::SETUGT:
- if ((VT == MVT::i32 && C != 0xffffffff &&
+ if ((VT == MVT::i32 && C != UINT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != 0xfffffffffffffffULL &&
+ (VT == MVT::i64 && C != UINT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
@@ -1073,9 +1172,45 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
-
- SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
- AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+ // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+ // For the i8 operand, the largest immediate is 255, so this can be easily
+ // encoded in the compare instruction. For the i16 operand, however, the
+ // largest immediate cannot be encoded in the compare.
+ // Therefore, use a sign extending load and cmn to avoid materializing the -1
+ // constant. For example,
+ // movz w1, #65535
+ // ldrh w0, [x0, #0]
+ // cmp w0, w1
+ // >
+ // ldrsh w0, [x0, #0]
+ // cmn w0, #1
+ // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+ // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
+ // both the LHS and RHS are truely zero extended and to make sure the
+ // transformation is profitable.
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
+ if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
+ isa<LoadSDNode>(LHS)) {
+ if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+ LHS.getNode()->hasNUsesOfValue(1, 0)) {
+ int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+ DAG.getValueType(MVT::i16));
+ Cmp = emitComparison(SExt,
+ DAG.getConstant(ValueofRHS, RHS.getValueType()),
+ CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
+ AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+ return Cmp;
+ }
+ }
+ }
+ }
+ Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
return Cmp;
}
@@ -1332,8 +1467,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- // The data thing is not used.
- // unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
bool IsStream = !Locality;
// When the locality number is set
@@ -1348,6 +1482,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
// built the mask value encoding the expected behavior.
unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
+ (!IsData << 3) | // IsDataCache bit
(Locality << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
@@ -1399,7 +1534,10 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
SDLoc dl(Op);
- SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0));
+ MVT ExtVT =
+ MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
+ VT.getVectorNumElements());
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
@@ -1504,7 +1642,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
(ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0);
@@ -1513,12 +1651,221 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
return CallResult.first;
}
+static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
+ if (Op.getValueType() != MVT::f16)
+ return SDValue();
+
+ assert(Op.getOperand(0).getValueType() == MVT::i16);
+ SDLoc DL(Op);
+
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
+ return SDValue(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
+ DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+ 0);
+}
+
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+ if (OrigVT.getSizeInBits() >= 64)
+ return OrigVT;
+
+ assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ return MVT::v2i32;
+ case MVT::v4i8:
+ return MVT::v4i16;
+ }
+}
+
+static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
+ const EVT &OrigTy,
+ const EVT &ExtTy,
+ unsigned ExtOpcode) {
+ // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+ // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+ // 64-bits we need to insert a new extension so that it will be 64-bits.
+ assert(ExtTy.is128BitVector() && "Unexpected extension size");
+ if (OrigTy.getSizeInBits() >= 64)
+ return N;
+
+ // Must extend size to at least 64 bits to be used as an operand for VMULL.
+ EVT NewVT = getExtensionTo64Bits(OrigTy);
+
+ return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
+}
+
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ EVT VT = N->getValueType(0);
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ if (!isIntN(HalfSize, C->getSExtValue()))
+ return false;
+ } else {
+ if (!isUIntN(HalfSize, C->getZExtValue()))
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
+ N->getOperand(0)->getValueType(0),
+ N->getValueType(0),
+ N->getOpcode());
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ // Element types smaller than 32 bits are not legal, so use i32 elements.
+ // The values are implicitly truncated so sext vs. zext doesn't matter.
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ MVT::getVectorVT(TruncVT, NumElts), Ops);
+}
+
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
+}
+
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && VT.isInteger() &&
+ "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
+ NewOpc = AArch64ISD::SMULL;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = AArch64ISD::UMULL;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = AArch64ISD::SMULL;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = AArch64ISD::UMULL;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = AArch64ISD::UMULL;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
+
+ // Legalize to a S/UMULL instruction
+ SDLoc DL(Op);
+ SDValue Op0;
+ SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
+ if (!isMLA) {
+ Op0 = skipExtensionForVectorMULL(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+ // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
+ // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
+ SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("unimplemented operand");
return SDValue();
+ case ISD::BITCAST:
+ return LowerBITCAST(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
@@ -1610,6 +1957,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFP_TO_INT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
+ case ISD::MUL:
+ return LowerMUL(Op, DAG);
}
}
@@ -1624,8 +1973,7 @@ unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const {
#include "AArch64GenCallingConv.inc"
-/// Selects the correct CCAssignFn for a the given CallingConvention
-/// value.
+/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
@@ -1650,8 +1998,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
@@ -1715,6 +2063,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
RC = &AArch64::GPR32RegClass;
else if (RegVT == MVT::i64)
RC = &AArch64::GPR64RegClass;
+ else if (RegVT == MVT::f16)
+ RC = &AArch64::FPR16RegClass;
else if (RegVT == MVT::f32)
RC = &AArch64::FPR32RegClass;
else if (RegVT == MVT::f64 || RegVT.is64BitVector())
@@ -1753,7 +2103,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
uint32_t BEAlign = 0;
if (ArgSize < 8 && !Subtarget->isLittleEndian())
@@ -1788,7 +2138,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- MemVT, false, false, false, nullptr);
+ MemVT, false, false, false, 0);
InVals.push_back(ArgValue);
}
@@ -1920,8 +2270,8 @@ SDValue AArch64TargetLowering::LowerCallResult(
: RetCC_AArch64_AAPCS;
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC);
// Copy all of the result registers out of their specified physreg.
@@ -1990,6 +2340,19 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return false;
}
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on AArch64 when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ if (GV->hasExternalWeakLinkage())
+ return false;
+ }
+
// Now we search for cases where we can use a tail call without changing the
// ABI. Sibcall is used in some places (particularly gcc) to refer to this
// concept.
@@ -2007,8 +2370,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
@@ -2020,13 +2383,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForCall(CalleeCC, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
- CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForCall(CallerCC, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
@@ -2051,8 +2414,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
return true;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
@@ -2149,8 +2512,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
@@ -2295,7 +2658,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// common case. It should also work for fundamental types too.
uint32_t BEAlign = 0;
unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
- : VA.getLocVT().getSizeInBits();
+ : VA.getValVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
if (OpSize < 8)
@@ -2329,8 +2692,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
- /*isVolatile = */ false,
- /*alwaysInline = */ false, DstInfo, MachinePointerInfo());
+ /*isVol = */ false,
+ /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
@@ -2419,7 +2782,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const AArch64RegisterInfo *ARI =
static_cast<const AArch64RegisterInfo *>(TRI);
if (IsThisReturn) {
@@ -2473,7 +2837,7 @@ bool AArch64TargetLowering::CanLowerReturn(
? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
}
@@ -2487,8 +2851,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
@@ -2539,7 +2903,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
SDLoc DL(Op);
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GN->getGlobal();
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -2554,6 +2919,25 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
}
+ if ((OpFlags & AArch64II::MO_CONSTPOOL) != 0) {
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
+ "use of MO_CONSTPOOL only supported on small model");
+ SDValue Hi = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, AArch64II::MO_PAGE);
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+ unsigned char LoFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
+ SDValue Lo = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, LoFlags);
+ SDValue PoolAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ SDValue GlobalAddr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /*isVolatile=*/ false,
+ /*isNonTemporal=*/ true,
+ /*isInvariant=*/ true, 8);
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+ return GlobalAddr;
+ }
+
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
const unsigned char MO_NC = AArch64II::MO_NC;
return DAG.getNode(
@@ -2630,7 +3014,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const AArch64RegisterInfo *ARI =
static_cast<const AArch64RegisterInfo *>(TRI);
const uint32_t *Mask = ARI->getTLSCallPreservedMask();
@@ -2680,7 +3065,8 @@ SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const AArch64RegisterInfo *ARI =
static_cast<const AArch64RegisterInfo *>(TRI);
const uint32_t *Mask = ARI->getTLSCallPreservedMask();
@@ -2895,11 +3281,6 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
-
- // TBZ only operates on i64's, but the ext should be free.
- if (Test.getValueType() == MVT::i32)
- Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
}
@@ -2915,18 +3296,29 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
-
- // TBNZ only operates on i64's, but the ext should be free.
- if (Test.getValueType() == MVT::i32)
- Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
}
return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
+ } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
+ // Don't combine AND since emitComparison converts the AND to an ANDS
+ // (a.k.a. TST) and the test in the test bit and branch instruction
+ // becomes redundant. This would also increase register pressure.
+ uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
+ return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
+ DAG.getConstant(Mask, MVT::i64), Dest);
}
}
+ if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
+ LHS.getOpcode() != ISD::AND) {
+ // Don't combine AND since emitComparison converts the AND to an ANDS
+ // (a.k.a. TST) and the test in the test bit and branch instruction
+ // becomes redundant. This would also increase register pressure.
+ uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
+ return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
+ DAG.getConstant(Mask, MVT::i64), Dest);
+ }
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
@@ -3041,6 +3433,9 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
return SDValue();
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
@@ -3992,8 +4387,10 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
return;
case 'J': {
uint64_t NVal = -C->getSExtValue();
- if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal))
+ if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
+ CVal = C->getSExtValue();
break;
+ }
return;
}
// The K and L constraints apply *only* to logical immediates, including
@@ -4117,10 +4514,30 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
- SmallVector<SDValue, 2> SourceVecs;
- SmallVector<unsigned, 2> MinElts;
- SmallVector<unsigned, 2> MaxElts;
+ struct ShuffleSourceInfo {
+ SDValue Vec;
+ unsigned MinElt;
+ unsigned MaxElt;
+
+ // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
+ // be compatible with the shuffle we intend to construct. As a result
+ // ShuffleVec will be some sliding window into the original Vec.
+ SDValue ShuffleVec;
+
+ // Code should guarantee that element i in Vec starts at element "WindowBase
+ // + i * WindowScale in ShuffleVec".
+ int WindowBase;
+ int WindowScale;
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ ShuffleSourceInfo(SDValue Vec)
+ : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
+ WindowScale(1) {}
+ };
+ // First gather all vectors used as an immediate source for this BUILD_VECTOR
+ // node.
+ SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
@@ -4131,133 +4548,153 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
return SDValue();
}
- // Record this extraction against the appropriate vector if possible...
+ // Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
- bool FoundSource = false;
- for (unsigned j = 0; j < SourceVecs.size(); ++j) {
- if (SourceVecs[j] == SourceVec) {
- if (MinElts[j] > EltNo)
- MinElts[j] = EltNo;
- if (MaxElts[j] < EltNo)
- MaxElts[j] = EltNo;
- FoundSource = true;
- break;
- }
- }
+ auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+ if (Source == Sources.end())
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
- // Or record a new source if not...
- if (!FoundSource) {
- SourceVecs.push_back(SourceVec);
- MinElts.push_back(EltNo);
- MaxElts.push_back(EltNo);
- }
+ // Update the minimum and maximum lane number seen.
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ Source->MinElt = std::min(Source->MinElt, EltNo);
+ Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
// Currently only do something sane when at most two source vectors
- // involved.
- if (SourceVecs.size() > 2)
+ // are involved.
+ if (Sources.size() > 2)
return SDValue();
- SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
- int VEXTOffsets[2] = { 0, 0 };
- int OffsetMultipliers[2] = { 1, 1 };
-
- // This loop extracts the usage patterns of the source vectors
- // and prepares appropriate SDValues for a shuffle if possible.
- for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
- SDValue CurSource = SourceVecs[i];
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType()) {
- // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
- // Then bitcast it to the vector which holds asserted element type,
- // and record the multiplier of element width between SourceVecs and
- // Build_vector which is needed to extract the correct lanes later.
- EVT CastVT =
- EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- SourceVecs[i].getValueSizeInBits() /
- VT.getVectorElementType().getSizeInBits());
-
- CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
- OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
- NumSrcElts *= OffsetMultipliers[i];
- MaxElts[i] *= OffsetMultipliers[i];
- MinElts[i] *= OffsetMultipliers[i];
+ // Find out the smallest element size among result and two sources, and use
+ // it as element size to build the shuffle_vector.
+ EVT SmallestEltTy = VT.getVectorElementType();
+ for (auto &Source : Sources) {
+ EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
+ if (SrcEltTy.bitsLT(SmallestEltTy)) {
+ SmallestEltTy = SrcEltTy;
}
+ }
+ unsigned ResMultiplier =
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
- if (CurSource.getValueType() == VT) {
- // No VEXT necessary
- ShuffleSrcs[i] = CurSource;
- VEXTOffsets[i] = 0;
+ // If the source vector is too wide or too narrow, we may nevertheless be able
+ // to construct a compatible shuffle either by concatenating it with UNDEF or
+ // extracting a suitable range of elements.
+ for (auto &Src : Sources) {
+ EVT SrcVT = Src.ShuffleVec.getValueType();
+
+ if (SrcVT.getSizeInBits() == VT.getSizeInBits())
continue;
- } else if (NumSrcElts < NumElts) {
+
+ // This stage of the search produces a source with the same element type as
+ // the original, but with a total width matching the BUILD_VECTOR output.
+ EVT EltVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
+
+ if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
+ assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
- DAG.getUNDEF(CurSource.getValueType()));
+ Src.ShuffleVec =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
+ DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
}
- // Since only 64-bit and 128-bit vectors are legal on ARM and
- // we've eliminated the other cases...
- assert(NumSrcElts == 2 * NumElts &&
- "unexpected vector sizes in ReconstructShuffle");
+ assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
- if (MaxElts[i] - MinElts[i] >= NumElts) {
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
// Span too large for a VEXT to cope
return SDValue();
}
- if (MinElts[i] >= NumElts) {
+ if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
- VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(NumElts));
- } else if (MaxElts[i] < NumElts) {
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getIntPtrConstant(NumSrcElts));
+ Src.WindowBase = -NumSrcElts;
+ } else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
- VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(0));
+ Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+ Src.ShuffleVec, DAG.getIntPtrConstant(0));
} else {
// An actual VEXT is needed
- VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
- DAG.getIntPtrConstant(NumElts));
- unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
- ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
- DAG.getConstant(Imm, MVT::i32));
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+ Src.ShuffleVec, DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getIntPtrConstant(NumSrcElts));
+ unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
+
+ Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
+ VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
+ Src.WindowBase = -Src.MinElt;
}
}
- SmallVector<int, 8> Mask;
-
- for (unsigned i = 0; i < NumElts; ++i) {
+ // Another possible incompatibility occurs from the vector element types. We
+ // can fix this by bitcasting the source vectors to the same type we intend
+ // for the shuffle.
+ for (auto &Src : Sources) {
+ EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
+ if (SrcEltTy == SmallestEltTy)
+ continue;
+ assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ Src.WindowBase *= Src.WindowScale;
+ }
+
+ // Final sanity check before we try to actually produce a shuffle.
+ DEBUG(
+ for (auto Src : Sources)
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+ );
+
+ // The stars all align, our next step is to produce the mask for the shuffle.
+ SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
+ int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF) {
- Mask.push_back(-1);
+ if (Entry.getOpcode() == ISD::UNDEF)
continue;
- }
- SDValue ExtractVec = Entry.getOperand(0);
- int ExtractElt =
- cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
- if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
- } else {
- Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
- VEXTOffsets[1]);
- }
+ auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+ int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+
+ // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
+ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
+ // segment.
+ EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
+ int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+ VT.getVectorElementType().getSizeInBits());
+ int LanesDefined = BitsDefined / BitsPerShuffleLane;
+
+ // This source is expected to fill ResMultiplier lanes of the final shuffle,
+ // starting at the appropriate offset.
+ int *LaneMask = &Mask[i * ResMultiplier];
+
+ int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
+ ExtractBase += NumElts * (Src - Sources.begin());
+ for (int j = 0; j < LanesDefined; ++j)
+ LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
- if (isShuffleMaskLegal(Mask, VT))
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
- &Mask[0]);
+ if (!isShuffleMaskLegal(Mask, ShuffleVT))
+ return SDValue();
- return SDValue();
+ SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ for (unsigned i = 0; i < Sources.size(); ++i)
+ ShuffleOps[i] = Sources[i].ShuffleVec;
+
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+ ShuffleOps[1], &Mask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
// check if an EXT instruction can handle the shuffle mask when the
@@ -4586,7 +5023,8 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
VT.getVectorElementType() == MVT::f32)
return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
// vrev <4 x i16> -> REV32
- if (VT.getVectorElementType() == MVT::i16)
+ if (VT.getVectorElementType() == MVT::i16 ||
+ VT.getVectorElementType() == MVT::f16)
return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
// vrev <4 x i8> -> REV16
assert(VT.getVectorElementType() == MVT::i8);
@@ -4706,7 +5144,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
static unsigned getDUPLANEOp(EVT EltType) {
if (EltType == MVT::i8)
return AArch64ISD::DUPLANE8;
- if (EltType == MVT::i16)
+ if (EltType == MVT::i16 || EltType == MVT::f16)
return AArch64ISD::DUPLANE16;
if (EltType == MVT::i32 || EltType == MVT::f32)
return AArch64ISD::DUPLANE32;
@@ -4836,7 +5274,8 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
EVT ScalarVT = VT.getVectorElementType();
- if (ScalarVT.getSizeInBits() < 32)
+
+ if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;
return DAG.getNode(
@@ -4924,7 +5363,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -4933,7 +5372,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -4942,7 +5381,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -4951,7 +5390,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -4960,7 +5399,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -4969,7 +5408,7 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5124,7 +5563,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -5133,7 +5572,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -5142,7 +5581,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -5151,7 +5590,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -5160,7 +5599,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -5169,7 +5608,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5242,13 +5681,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT.getSizeInBits() == 128) {
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// Support the V64 version via subregister insertion.
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
@@ -5257,7 +5696,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -5266,7 +5705,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -5275,7 +5714,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -5284,7 +5723,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -5293,7 +5732,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -5302,7 +5741,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
@@ -5311,7 +5750,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(264, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
@@ -5320,7 +5759,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(272, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
@@ -5328,7 +5767,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// The few faces of FMOV...
@@ -5337,7 +5776,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
@@ -5345,7 +5784,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// The many faces of MVNI...
@@ -5356,7 +5795,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@@ -5365,7 +5804,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@@ -5374,7 +5813,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@@ -5383,7 +5822,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@@ -5392,7 +5831,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@@ -5401,7 +5840,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
@@ -5410,7 +5849,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(264, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
@@ -5419,7 +5858,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(272, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
@@ -5586,19 +6025,21 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
- // Check for non-constant lane.
- if (!isa<ConstantSDNode>(Op.getOperand(2)))
+ // Check for non-constant or out of range lane.
+ EVT VT = Op.getOperand(0).getValueType();
+ ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
- EVT VT = Op.getOperand(0).getValueType();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
+ VT == MVT::v8f16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32)
+ VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
return SDValue();
// For V64 types, we perform insertion by expanding the value
@@ -5618,19 +6059,21 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
- // Check for non-constant lane.
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ // Check for non-constant or out of range lane.
+ EVT VT = Op.getOperand(0).getValueType();
+ ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
- EVT VT = Op.getOperand(0).getValueType();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
+ VT == MVT::v8f16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32)
+ VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
return SDValue();
// For V64 types, we perform extraction by expanding the value
@@ -6164,7 +6607,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::f128, 0, &Fast) && Fast)))
+ (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
return MVT::f128;
return Size >= 8 ? MVT::i64 : MVT::i32;
@@ -6359,6 +6802,48 @@ static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
return performIntegerAbsCombine(N, DAG);
}
+SDValue
+AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ if ((VT != MVT::i32 && VT != MVT::i64) ||
+ !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ unsigned Lg2 = Divisor.countTrailingZeros();
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, VT);
+
+ // Add (N0 < 0) ? Pow2 - 1 : 0;
+ SDValue CCVal;
+ SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
+
+ if (Created) {
+ Created->push_back(Cmp.getNode());
+ Created->push_back(Add.getNode());
+ Created->push_back(CSel.getNode());
+ }
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ if (Created)
+ Created->push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -6417,10 +6902,63 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
+ SelectionDAG &DAG) {
+ // Take advantage of vector comparisons producing 0 or -1 in each lane to
+ // optimize away operation when it's from a constant.
+ //
+ // The general transformation is:
+ // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
+ // AND(VECTOR_CMP(x,y), constant2)
+ // constant2 = UNARYOP(constant)
+
+ // Early exit if this isn't a vector operation, the operand of the
+ // unary operation isn't a bitwise AND, or if the sizes of the operations
+ // aren't the same.
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
+ N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
+ VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
+ return SDValue();
+
+ // Now check that the other operand of the AND is a constant. We could
+ // make the transformation for non-constant splats as well, but it's unclear
+ // that would be a benefit as it would not eliminate any operations, just
+ // perform one more step in scalar code before moving to the vector unit.
+ if (BuildVectorSDNode *BV =
+ dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+ // Bail out if the vector isn't a constant.
+ if (!BV->isConstant())
+ return SDValue();
+
+ // Everything checks out. Build up the new and improved node.
+ SDLoc DL(N);
+ EVT IntVT = BV->getValueType(0);
+ // Create a new constant of the appropriate type for the transformed
+ // DAG.
+ SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
+ // The AND node needs bitcasts to/from an integer vector type around it.
+ SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
+ N->getOperand(0)->getOperand(0), MaskConst);
+ SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+ return Res;
+ }
+
+ return SDValue();
+}
+
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+ // First try to optimize away the conversion when it's conditionally from
+ // a constant. Vectors only.
+ SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
+ if (Res != SDValue())
+ return Res;
+
EVT VT = N->getValueType(0);
if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
+
// Only optimize when the source and destination types have the same width.
if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
return SDValue();
@@ -7190,11 +7728,11 @@ static SDValue performExtendCombine(SDNode *N,
// If the vector type isn't a simple VT, it's beyond the scope of what
// we're worried about here. Let legalization do its thing and hope for
// the best.
- if (!ResVT.isSimple())
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src->getValueType(0);
+ if (!ResVT.isSimple() || !SrcVT.isSimple())
return SDValue();
- SDValue Src = N->getOperand(0);
- MVT SrcVT = Src->getValueType(0).getSimpleVT();
// If the source VT is a 64-bit vector, we can play games and get the
// better results we want.
if (SrcVT.getSizeInBits() != 64)
@@ -7428,7 +7966,7 @@ static SDValue performPostLD1Combine(SDNode *N,
Ops.push_back(Inc);
EVT Tys[3] = { VT, MVT::i64, MVT::Other };
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, 3));
+ SDVTList SDTys = DAG.getVTList(Tys);
unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
MemVT,
@@ -7558,7 +8096,7 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
Tys[n] = VecTy;
Tys[n++] = MVT::i64; // Type of write back register
Tys[n] = MVT::Other; // Type of the chain
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2));
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
@@ -7579,10 +8117,272 @@ static SDValue performNEONPostLDSTCombine(SDNode *N,
return SDValue();
}
+// Checks to see if the value is the prescribed width and returns information
+// about its extension mode.
+static
+bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
+ ExtType = ISD::NON_EXTLOAD;
+ switch(V.getNode()->getOpcode()) {
+ default:
+ return false;
+ case ISD::LOAD: {
+ LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
+ if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
+ || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
+ ExtType = LoadNode->getExtensionType();
+ return true;
+ }
+ return false;
+ }
+ case ISD::AssertSext: {
+ VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+ if ((TypeNode->getVT() == MVT::i8 && width == 8)
+ || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ ExtType = ISD::SEXTLOAD;
+ return true;
+ }
+ return false;
+ }
+ case ISD::AssertZext: {
+ VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+ if ((TypeNode->getVT() == MVT::i8 && width == 8)
+ || (TypeNode->getVT() == MVT::i16 && width == 16)) {
+ ExtType = ISD::ZEXTLOAD;
+ return true;
+ }
+ return false;
+ }
+ case ISD::Constant:
+ case ISD::TargetConstant: {
+ if (std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
+ 1LL << (width - 1))
+ return true;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// This function does a whole lot of voodoo to determine if the tests are
+// equivalent without and with a mask. Essentially what happens is that given a
+// DAG resembling:
+//
+// +-------------+ +-------------+ +-------------+ +-------------+
+// | Input | | AddConstant | | CompConstant| | CC |
+// +-------------+ +-------------+ +-------------+ +-------------+
+// | | | |
+// V V | +----------+
+// +-------------+ +----+ | |
+// | ADD | |0xff| | |
+// +-------------+ +----+ | |
+// | | | |
+// V V | |
+// +-------------+ | |
+// | AND | | |
+// +-------------+ | |
+// | | |
+// +-----+ | |
+// | | |
+// V V V
+// +-------------+
+// | CMP |
+// +-------------+
+//
+// The AND node may be safely removed for some combinations of inputs. In
+// particular we need to take into account the extension type of the Input,
+// the exact values of AddConstant, CompConstant, and CC, along with the nominal
+// width of the input (this can work for any width inputs, the above graph is
+// specific to 8 bits.
+//
+// The specific equations were worked out by generating output tables for each
+// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
+// problem was simplified by working with 4 bit inputs, which means we only
+// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
+// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
+// patterns present in both extensions (0,7). For every distinct set of
+// AddConstant and CompConstants bit patterns we can consider the masked and
+// unmasked versions to be equivalent if the result of this function is true for
+// all 16 distinct bit patterns of for the current extension type of Input (w0).
+//
+// sub w8, w0, w1
+// and w10, w8, #0x0f
+// cmp w8, w2
+// cset w9, AArch64CC
+// cmp w10, w2
+// cset w11, AArch64CC
+// cmp w9, w11
+// cset w0, eq
+// ret
+//
+// Since the above function shows when the outputs are equivalent it defines
+// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
+// would be expensive to run during compiles. The equations below were written
+// in a test harness that confirmed they gave equivalent outputs to the above
+// for all inputs function, so they can be used determine if the removal is
+// legal instead.
+//
+// isEquivalentMaskless() is the code for testing if the AND can be removed
+// factored out of the DAG recognition as the DAG can take several forms.
+
+static
+bool isEquivalentMaskless(unsigned CC, unsigned width,
+ ISD::LoadExtType ExtType, signed AddConstant,
+ signed CompConstant) {
+ // By being careful about our equations and only writing the in term
+ // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
+ // make them generally applicable to all bit widths.
+ signed MaxUInt = (1 << width);
+
+ // For the purposes of these comparisons sign extending the type is
+ // equivalent to zero extending the add and displacing it by half the integer
+ // width. Provided we are careful and make sure our equations are valid over
+ // the whole range we can just adjust the input and avoid writing equations
+ // for sign extended inputs.
+ if (ExtType == ISD::SEXTLOAD)
+ AddConstant -= (1 << (width-1));
+
+ switch(CC) {
+ case AArch64CC::LE:
+ case AArch64CC::GT: {
+ if ((AddConstant == 0) ||
+ (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
+ (AddConstant >= 0 && CompConstant < 0) ||
+ (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
+ return true;
+ } break;
+ case AArch64CC::LT:
+ case AArch64CC::GE: {
+ if ((AddConstant == 0) ||
+ (AddConstant >= 0 && CompConstant <= 0) ||
+ (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
+ return true;
+ } break;
+ case AArch64CC::HI:
+ case AArch64CC::LS: {
+ if ((AddConstant >= 0 && CompConstant < 0) ||
+ (AddConstant <= 0 && CompConstant >= -1 &&
+ CompConstant < AddConstant + MaxUInt))
+ return true;
+ } break;
+ case AArch64CC::PL:
+ case AArch64CC::MI: {
+ if ((AddConstant == 0) ||
+ (AddConstant > 0 && CompConstant <= 0) ||
+ (AddConstant < 0 && CompConstant <= AddConstant))
+ return true;
+ } break;
+ case AArch64CC::LO:
+ case AArch64CC::HS: {
+ if ((AddConstant >= 0 && CompConstant <= 0) ||
+ (AddConstant <= 0 && CompConstant >= 0 &&
+ CompConstant <= AddConstant + MaxUInt))
+ return true;
+ } break;
+ case AArch64CC::EQ:
+ case AArch64CC::NE: {
+ if ((AddConstant > 0 && CompConstant < 0) ||
+ (AddConstant < 0 && CompConstant >= 0 &&
+ CompConstant < AddConstant + MaxUInt) ||
+ (AddConstant >= 0 && CompConstant >= 0 &&
+ CompConstant >= AddConstant) ||
+ (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
+
+ return true;
+ } break;
+ case AArch64CC::VS:
+ case AArch64CC::VC:
+ case AArch64CC::AL:
+ case AArch64CC::NV:
+ return true;
+ case AArch64CC::Invalid:
+ break;
+ }
+
+ return false;
+}
+
+static
+SDValue performCONDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG, unsigned CCIndex,
+ unsigned CmpIndex) {
+ unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
+ SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
+ unsigned CondOpcode = SubsNode->getOpcode();
+
+ if (CondOpcode != AArch64ISD::SUBS)
+ return SDValue();
+
+ // There is a SUBS feeding this condition. Is it fed by a mask we can
+ // use?
+
+ SDNode *AndNode = SubsNode->getOperand(0).getNode();
+ unsigned MaskBits = 0;
+
+ if (AndNode->getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
+ uint32_t CNV = CN->getZExtValue();
+ if (CNV == 255)
+ MaskBits = 8;
+ else if (CNV == 65535)
+ MaskBits = 16;
+ }
+
+ if (!MaskBits)
+ return SDValue();
+
+ SDValue AddValue = AndNode->getOperand(0);
+
+ if (AddValue.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // The basic dag structure is correct, grab the inputs and validate them.
+
+ SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
+ SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
+ SDValue SubsInputValue = SubsNode->getOperand(1);
+
+ // The mask is present and the provenance of all the values is a smaller type,
+ // lets see if the mask is superfluous.
+
+ if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
+ !isa<ConstantSDNode>(SubsInputValue.getNode()))
+ return SDValue();
+
+ ISD::LoadExtType ExtType;
+
+ if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
+ !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
+ !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
+ return SDValue();
+
+ if(!isEquivalentMaskless(CC, MaskBits, ExtType,
+ cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
+ cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
+ return SDValue();
+
+ // The AND is not necessary, remove it.
+
+ SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
+ SubsNode->getValueType(1));
+ SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
+
+ SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
+ DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
+
+ return SDValue(N, 0);
+}
+
// Optimize compare with zero and branch.
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3);
+ if (NV.getNode())
+ N = NV.getNode();
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue CCVal = N->getOperand(2);
@@ -7671,21 +8471,23 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
- if (!N->getOperand(1).getValueType().isVector())
- return SDValue();
-
if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
return SDValue();
- SDLoc DL(N0);
-
+ // If NumMaskElts == 0, the comparison is larger than select result. The
+ // largest real NEON comparison is 64-bits per lane, which means the result is
+ // at most 32-bits and an illegal vector. Just bail out for now.
EVT SrcVT = N0.getOperand(0).getValueType();
- SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT,
- ResVT.getSizeInBits() / SrcVT.getSizeInBits());
+ int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
+ if (!ResVT.isVector() || NumMaskElts == 0)
+ return SDValue();
+
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
+ SDLoc DL(N0);
SDValue LHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
SDValue RHS =
@@ -7695,8 +8497,8 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
// Now duplicate the comparison mask we want across all other lanes.
SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask.data());
- Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(),
- Mask);
+ Mask = DAG.getNode(ISD::BITCAST, DL,
+ ResVT.changeVectorElementTypeToInteger(), Mask);
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
@@ -7737,6 +8539,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
+ case AArch64ISD::CSEL:
+ return performCONDCombine(N, DCI, DAG, 2, 3);
case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);
case ISD::INSERT_VECTOR_ELT:
@@ -7890,11 +8694,32 @@ bool AArch64TargetLowering::getPostIndexedAddressParts(
return true;
}
+static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ if (N->getValueType(0) != MVT::i16)
+ return;
+
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+ assert(Op.getValueType() == MVT::f16 &&
+ "Inconsistent bitcast? Only 16-bit types should be i16 or f16");
+ Op = SDValue(
+ DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
+ DAG.getUNDEF(MVT::i32), Op,
+ DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+ 0);
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
+}
+
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this");
+ case ISD::BITCAST:
+ ReplaceBITCASTResults(N, Results, DAG);
+ return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
@@ -7903,17 +8728,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
}
}
-bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
- // Loads and stores less than 128-bits are already atomic; ones above that
- // are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong:
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128;
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType()->getPrimitiveSizeInBits() == 128;
-
- // For the real atomic operations, we have ldxr/stxr up to 128 bits.
- return Inst->getType()->getPrimitiveSizeInBits() <= 128;
+bool AArch64TargetLowering::useLoadStackGuardNode() const {
+ return true;
}
TargetLoweringBase::LegalizeTypeAction
@@ -7928,12 +8744,37 @@ AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+// Loads and stores less than 128-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong.
+bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return Size == 128;
+}
+
+// Loads and stores less than 128-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong.
+bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ unsigned Size = LI->getType()->getPrimitiveSizeInBits();
+ return Size == 128;
+}
+
+// For the real atomic operations, we have ldxr/stxr up to 128 bits,
+bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ return Size <= 128;
+}
+
+bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
+ return true;
+}
+
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire =
- Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsAcquire = isAtLeastAcquire(Ord);
// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i64, i64} and we have to recombine them into a
@@ -7968,8 +8809,7 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease =
- Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsRelease = isAtLeastRelease(Ord);
// Since the intrinsics must have legal type, the i128 intrinsics take two
// parameters: "i64, i64". We must marshal Val into the appropriate form
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index cb0b9ef..2f5708d 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64_ISELLOWERING_H
-#define LLVM_TARGET_AArch64_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -162,6 +162,16 @@ enum {
SITOF,
UITOF,
+ /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
+ /// world w.r.t vectors; which causes additional REV instructions to be
+ /// generated to compensate for the byte-swapping. But sometimes we do
+ /// need to re-interpret the data in SIMD vector registers in big-endian
+ /// mode without emitting such REV instructions.
+ NVCAST,
+
+ SMULL,
+ UMULL,
+
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
@@ -197,10 +207,9 @@ class AArch64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
- explicit AArch64TargetLowering(TargetMachine &TM);
+ explicit AArch64TargetLowering(const TargetMachine &TM);
- /// Selects the correct CCAssignFn for a the given CallingConvention
- /// value.
+ /// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
@@ -212,10 +221,11 @@ public:
MVT getScalarShiftAmountTy(EVT LHSTy) const override;
- /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// allowsMisalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
- bool *Fast = nullptr) const override {
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override {
if (RequireStrictAlign)
return false;
// FIXME: True for Cyclone, but not necessary others.
@@ -317,13 +327,17 @@ public:
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool hasLoadLinkedStoreConditional() const override;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
@@ -424,6 +438,9 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const override;
+
ConstraintType
getConstraintType(const std::string &Constraint) const override;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -464,4 +481,4 @@ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
} // end namespace llvm
-#endif // LLVM_TARGET_AArch64_ISELLOWERING_H
+#endif
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index 3b9e3c6..4923a11 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -29,8 +29,7 @@ def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- assert(Ordering != AcquireRelease && "unexpected load ordering");
- return Ordering == Acquire || Ordering == SequentiallyConsistent;
+ return isAtLeastAcquire(Ordering);
}]>;
// An atomic load operation that does not need either acquire or release
@@ -38,7 +37,7 @@ class acquiring_load<PatFrag base>
class relaxed_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Monotonic || Ordering == Unordered;
+ return !isAtLeastAcquire(Ordering);
}]>;
// 8-bit loads
@@ -114,14 +113,14 @@ class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
assert(Ordering != AcquireRelease && "unexpected store ordering");
- return Ordering == Release || Ordering == SequentiallyConsistent;
+ return isAtLeastRelease(Ordering);
}]>;
// An atomic store operation that doesn't actually need to be atomic on AArch64.
class relaxed_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Monotonic || Ordering == Unordered;
+ return !isAtLeastRelease(Ordering);
}]>;
// 8-bit stores
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 4876c7d..2b0f5d2 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -539,6 +539,11 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_7Operand;
}
+// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
+def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 16;
+}]>;
+
// An arithmetic shifter operand:
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
// {5-0} - imm6
@@ -776,15 +781,17 @@ def simdimmtype10 : Operand<i32>,
// Base encoding for system instruction operands.
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands>
- : I<oops, iops, asm, operands, "", []> {
+class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands,
+ list<dag> pattern = []>
+ : I<oops, iops, asm, operands, "", pattern> {
let Inst{31-22} = 0b1101010100;
let Inst{21} = L;
}
// System instructions which do not have an Rt register.
-class SimpleSystemI<bit L, dag iops, string asm, string operands>
- : BaseSystemI<L, (outs), iops, asm, operands> {
+class SimpleSystemI<bit L, dag iops, string asm, string operands,
+ list<dag> pattern = []>
+ : BaseSystemI<L, (outs), iops, asm, operands, pattern> {
let Inst{4-0} = 0b11111;
}
@@ -797,13 +804,17 @@ class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
}
// Hint instructions that take both a CRm and a 3-bit immediate.
-class HintI<string mnemonic>
- : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "">,
- Sched<[WriteHint]> {
- bits <7> imm;
- let Inst{20-12} = 0b000110010;
- let Inst{11-5} = imm;
-}
+// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
+// model patterns with sufficiently fine granularity
+let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in
+ class HintI<string mnemonic>
+ : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "",
+ [(int_aarch64_hint imm0_127:$imm)]>,
+ Sched<[WriteHint]> {
+ bits <7> imm;
+ let Inst{20-12} = 0b000110010;
+ let Inst{11-5} = imm;
+ }
// System instructions taking a single literal operand which encodes into
// CRm. op2 differentiates the opcodes.
@@ -815,8 +826,9 @@ def barrier_op : Operand<i32> {
let PrintMethod = "printBarrierOption";
let ParserMatchClass = BarrierAsmOperand;
}
-class CRmSystemI<Operand crmtype, bits<3> opc, string asm>
- : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm">,
+class CRmSystemI<Operand crmtype, bits<3> opc, string asm,
+ list<dag> pattern = []>
+ : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>,
Sched<[WriteBarrier]> {
bits<4> CRm;
let Inst{20-12} = 0b000110011;
@@ -831,7 +843,7 @@ def MRSSystemRegisterOperand : AsmOperandClass {
let ParserMethod = "tryParseSysReg";
let DiagnosticType = "MRS";
}
-// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate.
+// concatenation of op0, op1, CRn, CRm, op2. 16-bit immediate.
def mrs_sysreg_op : Operand<i32> {
let ParserMatchClass = MRSSystemRegisterOperand;
let DecoderMethod = "DecodeMRSSystemRegister";
@@ -851,9 +863,8 @@ def msr_sysreg_op : Operand<i32> {
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
- bits<15> systemreg;
- let Inst{20} = 1;
- let Inst{19-5} = systemreg;
+ bits<16> systemreg;
+ let Inst{20-5} = systemreg;
}
// FIXME: Some of these def NZCV, others don't. Best way to model that?
@@ -861,9 +872,8 @@ class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
// would do it, but feels like overkill at this point.
class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
"msr", "\t$systemreg, $Rt"> {
- bits<15> systemreg;
- let Inst{20} = 1;
- let Inst{19-5} = systemreg;
+ bits<16> systemreg;
+ let Inst{20-5} = systemreg;
}
def SystemPStateFieldOperand : AsmOperandClass {
@@ -1339,14 +1349,15 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
}
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
+ // MADD/MSUB generation is decided by MachineCombiner.cpp
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
- [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
+ [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>,
Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
- [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
+ [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>,
Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
@@ -2985,7 +2996,7 @@ class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePreIdx<sz, V, opc,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset), asm,
- "$Rn = $wback", []>,
+ "$Rn = $wback,@earlyclobber $wback", []>,
Sched<[WriteLD, WriteAdr]>;
let mayStore = 1, mayLoad = 0 in
@@ -2994,7 +3005,7 @@ class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePreIdx<sz, V, opc,
(outs GPR64sp:$wback),
(ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
- asm, "$Rn = $wback",
+ asm, "$Rn = $wback,@earlyclobber $wback",
[(set GPR64sp:$wback,
(storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
Sched<[WriteAdr, WriteST]>;
@@ -3004,7 +3015,6 @@ class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
// Load/store post-indexed
//---
-// (pre-index) load/stores.
class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
string asm, string cstr, list<dag> pat>
: I<oops, iops, asm, "\t$Rt, [$Rn], $offset", cstr, pat> {
@@ -3032,7 +3042,7 @@ class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePostIdx<sz, V, opc,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset),
- asm, "$Rn = $wback", []>,
+ asm, "$Rn = $wback,@earlyclobber $wback", []>,
Sched<[WriteLD, WriteI]>;
let mayStore = 1, mayLoad = 0 in
@@ -3041,7 +3051,7 @@ class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
: BaseLoadStorePostIdx<sz, V, opc,
(outs GPR64sp:$wback),
(ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
- asm, "$Rn = $wback",
+ asm, "$Rn = $wback,@earlyclobber $wback",
[(set GPR64sp:$wback,
(storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
Sched<[WriteAdr, WriteST, ReadAdrBase]>;
@@ -3105,7 +3115,7 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
// (pre-indexed)
class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]!", "$Rn = $wback", []> {
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]!", "$Rn = $wback,@earlyclobber $wback", []> {
bits<5> Rt;
bits<5> Rt2;
bits<5> Rn;
@@ -3146,7 +3156,7 @@ class StorePairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn], $offset", "$Rn = $wback", []> {
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn], $offset", "$Rn = $wback,@earlyclobber $wback", []> {
bits<5> Rt;
bits<5> Rt2;
bits<5> Rn;
@@ -5250,6 +5260,10 @@ multiclass SIMDZipVector<bits<3>opc, string asm,
def v2i64 : BaseSIMDZipVector<0b111, opc, V128,
asm, ".2d", OpNode, v2i64>;
+ def : Pat<(v4f16 (OpNode V64:$Rn, V64:$Rm)),
+ (!cast<Instruction>(NAME#"v4i16") V64:$Rn, V64:$Rm)>;
+ def : Pat<(v8f16 (OpNode V128:$Rn, V128:$Rm)),
+ (!cast<Instruction>(NAME#"v8i16") V128:$Rn, V128:$Rm)>;
def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)),
(!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>;
def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)),
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index b702275..2dbb31c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -14,6 +14,7 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "AArch64MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -260,8 +261,9 @@ void AArch64InstrInfo::instantiateCondBranch(
BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
} else {
// Folded compare-and-branch
+ // Note that we use addOperand instead of addReg to keep the flags.
const MachineInstrBuilder MIB =
- BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg());
+ BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
if (Cond.size() > 3)
MIB.addImm(Cond[3].getImm());
MIB.addMBB(TBB);
@@ -606,6 +608,42 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
}
+bool
+AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+ MachineInstr *MIb,
+ AliasAnalysis *AA) const {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned BaseRegA = 0, BaseRegB = 0;
+ int OffsetA = 0, OffsetB = 0;
+ int WidthA = 0, WidthB = 0;
+
+ assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
+ "MIa must be a store or a load");
+ assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
+ "MIb must be a store or a load");
+
+ if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
+ MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
+ return false;
+
+ // Retrieve the base register, offset from the base register and width. Width
+ // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
+ // base registers are identical, and the offset of a lower memory access +
+ // the width doesn't overlap the offset of a higher memory access,
+ // then the memory accesses are different.
+ if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
+ getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
+ if (BaseRegA == BaseRegB) {
+ int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+ int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowOffset + LowWidth <= HighOffset)
+ return true;
+ }
+ }
+ return false;
+}
+
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
@@ -640,7 +678,8 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
SrcReg = MI->getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- CmpValue = MI->getOperand(2).getImm();
+ // FIXME: In order to convert CmpValue to 0 or 1
+ CmpValue = (MI->getOperand(2).getImm() != 0);
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
@@ -649,9 +688,14 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
SrcReg = MI->getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- CmpValue = AArch64_AM::decodeLogicalImmediate(
- MI->getOperand(2).getImm(),
- MI->getOpcode() == AArch64::ANDSWri ? 32 : 64);
+ // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
+ // while the type of CmpValue is int. When converting uint64_t to int,
+ // the high 32 bits of uint64_t will be lost.
+ // In fact it causes a bug in spec2006-483.xalancbmk
+ // CmpValue is only used to compare with zero in OptimizeCompareInstr
+ CmpValue = (AArch64_AM::decodeLogicalImmediate(
+ MI->getOperand(2).getImm(),
+ MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
return true;
}
@@ -664,8 +708,8 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
MachineFunction *MF = MBB->getParent();
assert(MF && "Can't get MachineFunction here");
const TargetMachine *TM = &MF->getTarget();
- const TargetInstrInfo *TII = TM->getInstrInfo();
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ const TargetInstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
+ const TargetRegisterInfo *TRI = TM->getSubtargetImpl()->getRegisterInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
@@ -697,6 +741,87 @@ static bool UpdateOperandRegClass(MachineInstr *Instr) {
return true;
}
+/// \brief Return the opcode that does not set flags when possible - otherwise
+/// return the original opcode. The caller is responsible to do the actual
+/// substitution and legality checking.
+static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
+ // Don't convert all compare instructions, because for some the zero register
+ // encoding becomes the sp register.
+ bool MIDefinesZeroReg = false;
+ if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
+ MIDefinesZeroReg = true;
+
+ switch (MI->getOpcode()) {
+ default:
+ return MI->getOpcode();
+ case AArch64::ADDSWrr:
+ return AArch64::ADDWrr;
+ case AArch64::ADDSWri:
+ return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
+ case AArch64::ADDSWrs:
+ return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
+ case AArch64::ADDSWrx:
+ return AArch64::ADDWrx;
+ case AArch64::ADDSXrr:
+ return AArch64::ADDXrr;
+ case AArch64::ADDSXri:
+ return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
+ case AArch64::ADDSXrs:
+ return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
+ case AArch64::ADDSXrx:
+ return AArch64::ADDXrx;
+ case AArch64::SUBSWrr:
+ return AArch64::SUBWrr;
+ case AArch64::SUBSWri:
+ return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
+ case AArch64::SUBSWrs:
+ return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
+ case AArch64::SUBSWrx:
+ return AArch64::SUBWrx;
+ case AArch64::SUBSXrr:
+ return AArch64::SUBXrr;
+ case AArch64::SUBSXri:
+ return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
+ case AArch64::SUBSXrs:
+ return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
+ case AArch64::SUBSXrx:
+ return AArch64::SUBXrx;
+ }
+}
+
+/// True when condition code could be modified on the instruction
+/// trace starting at from and ending at to.
+static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To,
+ const bool CheckOnlyCCWrites,
+ const TargetRegisterInfo *TRI) {
+ // We iterate backward starting \p To until we hit \p From
+ MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
+
+ // Early exit if To is at the beginning of the BB.
+ if (I == B)
+ return true;
+
+ // Check whether the definition of SrcReg is in the same basic block as
+ // Compare. If not, assume the condition code gets modified on some path.
+ if (To->getParent() != From->getParent())
+ return true;
+
+ // Check that NZCV isn't set on the trace.
+ for (--I; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
+ (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI)))
+ // This instruction modifies or uses NZCV after the one we want to
+ // change.
+ return true;
+ if (I == B)
+ // We currently don't allow the instruction trace to cross basic
+ // block boundaries
+ return true;
+ }
+ return false;
+}
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register.
bool AArch64InstrInfo::optimizeCompareInstr(
@@ -706,28 +831,15 @@ bool AArch64InstrInfo::optimizeCompareInstr(
// Replace SUBSWrr with SUBWrr if NZCV is not used.
int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
if (Cmp_NZCV != -1) {
- unsigned NewOpc;
- switch (CmpInstr->getOpcode()) {
- default:
- return false;
- case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break;
- case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break;
- case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break;
- case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break;
- case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break;
- case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break;
- case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break;
- case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break;
- case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break;
- case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break;
- case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break;
- case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break;
- case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break;
- case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break;
- case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break;
- case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break;
+ if (CmpInstr->definesRegister(AArch64::WZR) ||
+ CmpInstr->definesRegister(AArch64::XZR)) {
+ CmpInstr->eraseFromParent();
+ return true;
}
-
+ unsigned Opc = CmpInstr->getOpcode();
+ unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
+ if (NewOpc == Opc)
+ return false;
const MCInstrDesc &MCID = get(NewOpc);
CmpInstr->setDesc(MCID);
CmpInstr->RemoveOperand(Cmp_NZCV);
@@ -738,6 +850,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
}
// Continue only if we have a "ri" where immediate is zero.
+ // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
+ // function.
+ assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
if (CmpValue != 0 || SrcReg2 != 0)
return false;
@@ -750,36 +865,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MI)
return false;
- // We iterate backward, starting from the instruction before CmpInstr and
- // stop when reaching the definition of the source register or done with the
- // basic block, to check whether NZCV is used or modified in between.
- MachineBasicBlock::iterator I = CmpInstr, E = MI,
- B = CmpInstr->getParent()->begin();
-
- // Early exit if CmpInstr is at the beginning of the BB.
- if (I == B)
- return false;
-
- // Check whether the definition of SrcReg is in the same basic block as
- // Compare. If not, we can't optimize away the Compare.
- if (MI->getParent() != CmpInstr->getParent())
- return false;
-
- // Check that NZCV isn't set between the comparison instruction and the one we
- // want to change.
+ bool CheckOnlyCCWrites = false;
const TargetRegisterInfo *TRI = &getRegisterInfo();
- for (--I; I != E; --I) {
- const MachineInstr &Instr = *I;
-
- if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
- Instr.readsRegister(AArch64::NZCV, TRI))
- // This instruction modifies or uses NZCV after the one we want to
- // change. We can't do this transformation.
- return false;
- if (I == B)
- // The 'and' is below the comparison instruction.
- return false;
- }
+ if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI))
+ return false;
unsigned NewOpc = MI->getOpcode();
switch (MI->getOpcode()) {
@@ -893,6 +982,56 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return true;
}
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
+ return false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
+ const unsigned char MO_NC = AArch64II::MO_NC;
+
+ if ((OpFlags & AArch64II::MO_GOT) != 0) {
+ BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill).addImm(0)
+ .addMemOperand(*MI->memoperands_begin());
+ } else if (TM.getCodeModel() == CodeModel::Large) {
+ BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
+ BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill).addImm(0)
+ .addMemOperand(*MI->memoperands_begin());
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
+ .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
+ unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI->memoperands_begin());
+ }
+
+ MBB.erase(MI);
+
+ return true;
+}
+
/// Return true if this is this instruction has a non-zero immediate
bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
@@ -1008,12 +1147,14 @@ bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
return true;
}
+ break;
case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
if (MI->getOperand(2).getImm() == 0) {
assert(MI->getDesc().getNumOperands() == 4 &&
MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
return true;
}
+ break;
}
return false;
}
@@ -1036,6 +1177,7 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
"invalid ORRv16i8 operands");
return true;
}
+ break;
}
return false;
}
@@ -1197,6 +1339,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
};
}
+bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
+ MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
+ const TargetRegisterInfo *TRI) const {
+ // Handle only loads/stores with base register followed by immediate offset.
+ if (LdSt->getNumOperands() != 3)
+ return false;
+ if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+ return false;
+
+ // Offset is calculated as the immediate operand multiplied by the scaling factor.
+ // Unscaled instructions have scaling factor set to 1.
+ int Scale = 0;
+ switch (LdSt->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ Width = 16;
+ Scale = 1;
+ break;
+ case AArch64::LDURXi:
+ case AArch64::LDURDi:
+ case AArch64::STURXi:
+ case AArch64::STURDi:
+ Width = 8;
+ Scale = 1;
+ break;
+ case AArch64::LDURWi:
+ case AArch64::LDURSi:
+ case AArch64::LDURSWi:
+ case AArch64::STURWi:
+ case AArch64::STURSi:
+ Width = 4;
+ Scale = 1;
+ break;
+ case AArch64::LDURHi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ case AArch64::STURHi:
+ case AArch64::STURHHi:
+ Width = 2;
+ Scale = 1;
+ break;
+ case AArch64::LDURBi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSBWi:
+ case AArch64::STURBi:
+ case AArch64::STURBBi:
+ Width = 1;
+ Scale = 1;
+ break;
+ case AArch64::LDRXui:
+ case AArch64::STRXui:
+ Scale = Width = 8;
+ break;
+ case AArch64::LDRWui:
+ case AArch64::STRWui:
+ Scale = Width = 4;
+ break;
+ case AArch64::LDRBui:
+ case AArch64::STRBui:
+ Scale = Width = 1;
+ break;
+ case AArch64::LDRHui:
+ case AArch64::STRHui:
+ Scale = Width = 2;
+ break;
+ case AArch64::LDRSui:
+ case AArch64::STRSui:
+ Scale = Width = 4;
+ break;
+ case AArch64::LDRDui:
+ case AArch64::STRDui:
+ Scale = Width = 8;
+ break;
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ Scale = Width = 16;
+ break;
+ case AArch64::LDRBBui:
+ case AArch64::STRBBui:
+ Scale = Width = 1;
+ break;
+ case AArch64::LDRHHui:
+ case AArch64::STRHHui:
+ Scale = Width = 2;
+ break;
+ };
+
+ BaseReg = LdSt->getOperand(1).getReg();
+ Offset = LdSt->getOperand(2).getImm() * Scale;
+ return true;
+}
+
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
@@ -1239,16 +1477,15 @@ bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
}
}
-MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx,
- uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const {
+MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
+ MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
+ const MDNode *Expr, DebugLoc DL) const {
MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
.addFrameIndex(FrameIx)
.addImm(0)
.addImm(Offset)
- .addMetadata(MDPtr);
+ .addMetadata(Var)
+ .addMetadata(Expr);
return &*MIB;
}
@@ -2132,3 +2369,592 @@ void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(AArch64::HINT);
NopInst.addOperand(MCOperand::CreateImm(0));
}
+/// useMachineCombiner - return true when a target supports MachineCombiner
+bool AArch64InstrInfo::useMachineCombiner() const {
+ // AArch64 supports the combiner
+ return true;
+}
+//
+// True when Opc sets flag
+static bool isCombineInstrSettingFlag(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+//
+// 32b Opcodes that can be combined with a MUL
+static bool isCombineInstrCandidate32(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::ADDWrr:
+ case AArch64::ADDWri:
+ case AArch64::SUBWrr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWri:
+ case AArch64::SUBSWrr:
+ // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
+ case AArch64::SUBWri:
+ case AArch64::SUBSWri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+//
+// 64b Opcodes that can be combined with a MUL
+static bool isCombineInstrCandidate64(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::ADDXrr:
+ case AArch64::ADDXri:
+ case AArch64::SUBXrr:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXri:
+ case AArch64::SUBSXrr:
+ // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
+ case AArch64::SUBXri:
+ case AArch64::SUBSXri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+//
+// Opcodes that can be combined with a MUL
+static bool isCombineInstrCandidate(unsigned Opc) {
+ return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
+}
+
+static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
+ unsigned MulOpc, unsigned ZeroReg) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineInstr *MI = nullptr;
+ // We need a virtual register definition.
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ // And it needs to be in the trace (otherwise, it won't have a depth).
+ if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
+ return false;
+
+ assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
+
+ // The third input reg must be zero.
+ if (MI->getOperand(3).getReg() != ZeroReg)
+ return false;
+
+ // Must only used by the user we combine with.
+ if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ return true;
+}
+
+/// hasPattern - return true when there is potentially a faster code sequence
+/// for an instruction chain ending in \p Root. All potential patterns are
+/// listed
+/// in the \p Pattern vector. Pattern should be sorted in priority order since
+/// the pattern evaluator stops checking as soon as it finds a faster sequence.
+
+bool AArch64InstrInfo::hasPattern(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
+ unsigned Opc = Root.getOpcode();
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ if (!isCombineInstrCandidate(Opc))
+ return 0;
+ if (isCombineInstrSettingFlag(Opc)) {
+ int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
+ // When NZCV is live bail out.
+ if (Cmp_NZCV == -1)
+ return 0;
+ unsigned NewOpc = convertFlagSettingOpcode(&Root);
+ // When opcode can't change bail out.
+ // CHECKME: do we miss any cases for opcode conversion?
+ if (NewOpc == Opc)
+ return 0;
+ Opc = NewOpc;
+ }
+
+ switch (Opc) {
+ default:
+ break;
+ case AArch64::ADDWrr:
+ assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
+ "ADDWrr does not have register operands");
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::ADDXrr:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBWrr:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBXrr:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
+ Found = true;
+ }
+ if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
+ Found = true;
+ }
+ break;
+ case AArch64::ADDWri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
+ Found = true;
+ }
+ break;
+ case AArch64::ADDXri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBWri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
+ AArch64::WZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
+ Found = true;
+ }
+ break;
+ case AArch64::SUBXri:
+ if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
+ AArch64::XZR)) {
+ Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
+ Found = true;
+ }
+ break;
+ }
+ return Found;
+}
+
+/// genMadd - Generate madd instruction and combine mul and add.
+/// Example:
+/// MUL I=A,B,0
+/// ADD R,I,C
+/// ==> MADD R,A,B,C
+/// \param Root is the ADD instruction
+/// \param [out] InsInstrs is a vector of machine instructions and will
+/// contain the generated madd instruction
+/// \param IdxMulOpd is index of operand in Root that is the result of
+/// the MUL. In the example above IdxMulOpd is 1.
+/// \param MaddOpc the opcode fo the madd instruction
+static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxMulOpd, unsigned MaddOpc,
+ const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1 || IdxMulOpd == 2);
+
+ unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
+ MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
+ unsigned ResultReg = Root.getOperand(0).getReg();
+ unsigned SrcReg0 = MUL->getOperand(1).getReg();
+ bool Src0IsKill = MUL->getOperand(1).isKill();
+ unsigned SrcReg1 = MUL->getOperand(2).getReg();
+ bool Src1IsKill = MUL->getOperand(2).isKill();
+ unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
+ bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+
+ if (TargetRegisterInfo::isVirtualRegister(ResultReg))
+ MRI.constrainRegClass(ResultReg, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
+ MRI.constrainRegClass(SrcReg0, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
+ MRI.constrainRegClass(SrcReg1, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
+ MRI.constrainRegClass(SrcReg2, RC);
+
+ MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
+ ResultReg)
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addReg(SrcReg2, getKillRegState(Src2IsKill));
+ // Insert the MADD
+ InsInstrs.push_back(MIB);
+ return MUL;
+}
+
+/// genMaddR - Generate madd instruction and combine mul and add using
+/// an extra virtual register
+/// Example - an ADD intermediate needs to be stored in a register:
+/// MUL I=A,B,0
+/// ADD R,I,Imm
+/// ==> ORR V, ZR, Imm
+/// ==> MADD R,A,B,V
+/// \param Root is the ADD instruction
+/// \param [out] InsInstrs is a vector of machine instructions and will
+/// contain the generated madd instruction
+/// \param IdxMulOpd is index of operand in Root that is the result of
+/// the MUL. In the example above IdxMulOpd is 1.
+/// \param MaddOpc the opcode fo the madd instruction
+/// \param VR is a virtual register that holds the value of an ADD operand
+/// (V in the example above).
+static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxMulOpd, unsigned MaddOpc,
+ unsigned VR, const TargetRegisterClass *RC) {
+ assert(IdxMulOpd == 1 || IdxMulOpd == 2);
+
+ MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
+ unsigned ResultReg = Root.getOperand(0).getReg();
+ unsigned SrcReg0 = MUL->getOperand(1).getReg();
+ bool Src0IsKill = MUL->getOperand(1).isKill();
+ unsigned SrcReg1 = MUL->getOperand(2).getReg();
+ bool Src1IsKill = MUL->getOperand(2).isKill();
+
+ if (TargetRegisterInfo::isVirtualRegister(ResultReg))
+ MRI.constrainRegClass(ResultReg, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
+ MRI.constrainRegClass(SrcReg0, RC);
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
+ MRI.constrainRegClass(SrcReg1, RC);
+ if (TargetRegisterInfo::isVirtualRegister(VR))
+ MRI.constrainRegClass(VR, RC);
+
+ MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
+ ResultReg)
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addReg(VR);
+ // Insert the MADD
+ InsInstrs.push_back(MIB);
+ return MUL;
+}
+
+/// genAlternativeCodeSequence - when hasPattern() finds a pattern
+/// this function generates the instructions that could replace the
+/// original code sequence
+void AArch64InstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineBasicBlock &MBB = *Root.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineInstr *MUL;
+ const TargetRegisterClass *RC;
+ unsigned Opc;
+ switch (Pattern) {
+ default:
+ // signal error.
+ break;
+ case MachineCombinerPattern::MC_MULADDW_OP1:
+ case MachineCombinerPattern::MC_MULADDX_OP1:
+ // MUL I=A,B,0
+ // ADD R,I,C
+ // ==> MADD R,A,B,C
+ // --- Create(MADD);
+ if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+ break;
+ case MachineCombinerPattern::MC_MULADDW_OP2:
+ case MachineCombinerPattern::MC_MULADDX_OP2:
+ // MUL I=A,B,0
+ // ADD R,C,I
+ // ==> MADD R,A,B,C
+ // --- Create(MADD);
+ if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MC_MULADDWI_OP1:
+ case MachineCombinerPattern::MC_MULADDXI_OP1: {
+ // MUL I=A,B,0
+ // ADD R,I,Imm
+ // ==> ORR V, ZR, Imm
+ // ==> MADD R,A,B,V
+ // --- Create(MADD);
+ const TargetRegisterClass *OrrRC;
+ unsigned BitSize, OrrOpc, ZeroReg;
+ if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
+ OrrOpc = AArch64::ORRWri;
+ OrrRC = &AArch64::GPR32spRegClass;
+ BitSize = 32;
+ ZeroReg = AArch64::WZR;
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ OrrOpc = AArch64::ORRXri;
+ OrrRC = &AArch64::GPR64spRegClass;
+ BitSize = 64;
+ ZeroReg = AArch64::XZR;
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ unsigned NewVR = MRI.createVirtualRegister(OrrRC);
+ uint64_t Imm = Root.getOperand(2).getImm();
+
+ if (Root.getOperand(3).isImm()) {
+ unsigned Val = Root.getOperand(3).getImm();
+ Imm = Imm << Val;
+ }
+ uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t Encoding;
+ if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
+ }
+ break;
+ }
+ case MachineCombinerPattern::MC_MULSUBW_OP1:
+ case MachineCombinerPattern::MC_MULSUBX_OP1: {
+ // MUL I=A,B,0
+ // SUB R,I, C
+ // ==> SUB V, 0, C
+ // ==> MADD R,A,B,V // = -C + A*B
+ // --- Create(MADD);
+ const TargetRegisterClass *SubRC;
+ unsigned SubOpc, ZeroReg;
+ if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
+ SubOpc = AArch64::SUBWrr;
+ SubRC = &AArch64::GPR32spRegClass;
+ ZeroReg = AArch64::WZR;
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ SubOpc = AArch64::SUBXrr;
+ SubRC = &AArch64::GPR64spRegClass;
+ ZeroReg = AArch64::XZR;
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ unsigned NewVR = MRI.createVirtualRegister(SubRC);
+ // SUB NewVR, 0, C
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
+ .addReg(ZeroReg)
+ .addOperand(Root.getOperand(2));
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
+ break;
+ }
+ case MachineCombinerPattern::MC_MULSUBW_OP2:
+ case MachineCombinerPattern::MC_MULSUBX_OP2:
+ // MUL I=A,B,0
+ // SUB R,C,I
+ // ==> MSUB R,A,B,C (computes C - A*B)
+ // --- Create(MSUB);
+ if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
+ Opc = AArch64::MSUBWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ Opc = AArch64::MSUBXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+ break;
+ case MachineCombinerPattern::MC_MULSUBWI_OP1:
+ case MachineCombinerPattern::MC_MULSUBXI_OP1: {
+ // MUL I=A,B,0
+ // SUB R,I, Imm
+ // ==> ORR V, ZR, -Imm
+ // ==> MADD R,A,B,V // = -Imm + A*B
+ // --- Create(MADD);
+ const TargetRegisterClass *OrrRC;
+ unsigned BitSize, OrrOpc, ZeroReg;
+ if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
+ OrrOpc = AArch64::ORRWri;
+ OrrRC = &AArch64::GPR32spRegClass;
+ BitSize = 32;
+ ZeroReg = AArch64::WZR;
+ Opc = AArch64::MADDWrrr;
+ RC = &AArch64::GPR32RegClass;
+ } else {
+ OrrOpc = AArch64::ORRXri;
+ OrrRC = &AArch64::GPR64spRegClass;
+ BitSize = 64;
+ ZeroReg = AArch64::XZR;
+ Opc = AArch64::MADDXrrr;
+ RC = &AArch64::GPR64RegClass;
+ }
+ unsigned NewVR = MRI.createVirtualRegister(OrrRC);
+ int Imm = Root.getOperand(2).getImm();
+ if (Root.getOperand(3).isImm()) {
+ unsigned Val = Root.getOperand(3).getImm();
+ Imm = Imm << Val;
+ }
+ uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t Encoding;
+ if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
+ }
+ break;
+ }
+ } // end switch (Pattern)
+ // Record MUL and ADD/SUB for deletion
+ DelInstrs.push_back(MUL);
+ DelInstrs.push_back(&Root);
+
+ return;
+}
+
+/// \brief Replace csincr-branch sequence by simple conditional branch
+///
+/// Examples:
+/// 1.
+/// csinc w9, wzr, wzr, <condition code>
+/// tbnz w9, #0, 0x44
+/// to
+/// b.<inverted condition code>
+///
+/// 2.
+/// csinc w9, wzr, wzr, <condition code>
+/// tbz w9, #0, 0x44
+/// to
+/// b.<condition code>
+///
+/// \param MI Conditional Branch
+/// \return True when the simple conditional branch is generated
+///
+bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
+ bool IsNegativeBranch = false;
+ bool IsTestAndBranch = false;
+ unsigned TargetBBInMI = 0;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown branch instruction?");
+ case AArch64::Bcc:
+ return false;
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ TargetBBInMI = 1;
+ break;
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ TargetBBInMI = 1;
+ IsNegativeBranch = true;
+ break;
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ TargetBBInMI = 2;
+ IsTestAndBranch = true;
+ break;
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ TargetBBInMI = 2;
+ IsNegativeBranch = true;
+ IsTestAndBranch = true;
+ break;
+ }
+ // So we increment a zero register and test for bits other
+ // than bit 0? Conservatively bail out in case the verifier
+ // missed this case.
+ if (IsTestAndBranch && MI->getOperand(1).getImm())
+ return false;
+
+ // Find Definition.
+ assert(MI->getParent() && "Incomplete machine instruciton\n");
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ unsigned VReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(VReg))
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+
+ // Look for CSINC
+ if (!(DefMI->getOpcode() == AArch64::CSINCWr &&
+ DefMI->getOperand(1).getReg() == AArch64::WZR &&
+ DefMI->getOperand(2).getReg() == AArch64::WZR) &&
+ !(DefMI->getOpcode() == AArch64::CSINCXr &&
+ DefMI->getOperand(1).getReg() == AArch64::XZR &&
+ DefMI->getOperand(2).getReg() == AArch64::XZR))
+ return false;
+
+ if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
+ return false;
+
+ AArch64CC::CondCode CC =
+ (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
+ bool CheckOnlyCCWrites = true;
+ // Convert only when the condition code is not modified between
+ // the CSINC and the branch. The CC may be used by other
+ // instructions in between.
+ if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo()))
+ return false;
+ MachineBasicBlock &RefToMBB = *MBB;
+ MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
+ DebugLoc DL = MI->getDebugLoc();
+ if (IsNegativeBranch)
+ CC = AArch64CC::getInvertedCondCode(CC);
+ BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
+ MI->eraseFromParent();
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index b27565e..30bf650 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64INSTRINFO_H
-#define LLVM_TARGET_AArch64INSTRINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRINFO_H
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
@@ -51,6 +52,10 @@ public:
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DstReg, unsigned &SubIdx) const override;
+ bool
+ areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr) const override;
+
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -89,6 +94,10 @@ public:
unsigned &Offset,
const TargetRegisterInfo *TRI) const override;
+ bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg,
+ int &Offset, int &Width,
+ const TargetRegisterInfo *TRI) const;
+
bool enableClusterLoads() const override { return true; }
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
@@ -98,8 +107,8 @@ public:
MachineInstr *Second) const override;
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
- uint64_t Offset, const MDNode *MDPtr,
- DebugLoc DL) const;
+ uint64_t Offset, const MDNode *Var,
+ const MDNode *Expr, DebugLoc DL) const;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc, unsigned Opcode,
@@ -119,6 +128,7 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ using TargetInstrInfo::foldMemoryOperandImpl;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
@@ -155,7 +165,27 @@ public:
bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
-
+ bool optimizeCondBranch(MachineInstr *MI) const override;
+ /// hasPattern - return true when there is potentially a faster code sequence
+ /// for an instruction chain ending in <Root>. All potential patterns are
+ /// listed
+ /// in the <Pattern> array.
+ bool hasPattern(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern)
+ const override;
+
+ /// genAlternativeCodeSequence - when hasPattern() finds a pattern
+ /// this function generates the instructions that could replace the
+ /// original code sequence
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+ /// useMachineCombiner - AArch64 supports MachineCombiner
+ bool useMachineCombiner() const override;
+
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
private:
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
MachineBasicBlock *TBB,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 1211fba..252ed40 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -24,6 +24,7 @@ def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
+def IsCyclone : Predicate<"Subtarget->isCyclone()">;
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
@@ -236,6 +237,12 @@ def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
+def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
+
+def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
+def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
//===----------------------------------------------------------------------===//
@@ -331,13 +338,23 @@ def : InstAlias<"wfi", (HINT 0b011)>;
def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
- // As far as LLVM is concerned this writes to the system's exclusive monitors.
+// As far as LLVM is concerned this writes to the system's exclusive monitors.
let mayLoad = 1, mayStore = 1 in
def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
-def DMB : CRmSystemI<barrier_op, 0b101, "dmb">;
-def DSB : CRmSystemI<barrier_op, 0b100, "dsb">;
-def ISB : CRmSystemI<barrier_op, 0b110, "isb">;
+// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
+// model patterns with sufficiently fine granularity.
+let mayLoad = ?, mayStore = ? in {
+def DMB : CRmSystemI<barrier_op, 0b101, "dmb",
+ [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
+
+def DSB : CRmSystemI<barrier_op, 0b100, "dsb",
+ [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
+
+def ISB : CRmSystemI<barrier_op, 0b110, "isb",
+ [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
+}
+
def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
@@ -1163,6 +1180,9 @@ defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
+
defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
@@ -1203,6 +1223,7 @@ let Predicates = [IsLE] in {
defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
}
defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
@@ -1216,6 +1237,7 @@ let Predicates = [IsLE] in {
defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
}
} // AddedComplexity = 10
@@ -1345,6 +1367,8 @@ let Predicates = [IsLE] in {
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
@@ -1366,6 +1390,8 @@ let Predicates = [IsLE] in {
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
@@ -1502,6 +1528,8 @@ let Predicates = [IsLE] in {
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
@@ -1522,6 +1550,8 @@ let Predicates = [IsLE] in {
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
}
// anyext -> zext
@@ -1818,6 +1848,7 @@ let Predicates = [IsLE] in {
defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
}
defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
@@ -1832,6 +1863,7 @@ let Predicates = [IsLE] in {
defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
}
} // AddedComplexity = 10
@@ -1882,6 +1914,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2i32 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(store (v1f64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
@@ -1911,6 +1946,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2i64 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(store (f128 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
@@ -1973,6 +2011,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2i32 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
@@ -2003,6 +2044,9 @@ let Predicates = [IsLE] in {
def : Pat<(store (v2f64 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
// unscaled i64 truncating stores
@@ -2079,6 +2123,8 @@ def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
@@ -2092,6 +2138,8 @@ def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//---
// (immediate post-indexed)
@@ -2129,6 +2177,8 @@ def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
@@ -2142,6 +2192,8 @@ def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//===----------------------------------------------------------------------===//
// Load/store exclusive instructions.
@@ -2234,89 +2286,6 @@ def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
defm FCVT : FPConversion<"fcvt">;
-def : Pat<(f32_to_f16 FPR32:$Rn),
- (i32 (COPY_TO_REGCLASS
- (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
- GPR32))>;
-
-def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
- [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
-
-// When converting from f16 coming directly from a load, make sure we
-// load into the FPR16 registers rather than going through the GPRs.
-// f16->f32
-def : Pat<(f32 (f16_to_f32 (i32
- (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend))))),
- (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f32 (f16_to_f32 (i32
- (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend))))),
- (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f32 (f16_to_f32 (i32
- (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
- (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f32 (f16_to_f32 (i32
- (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
- (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
-
-// f16->f64
-def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
- (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend))))))),
- (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
- (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend))))))),
- (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
- (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
- (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
- (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
- (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
-
-// When converting to f16 going directly to a store, make sure we use the
-// appropriate direct conversion instructions and store via the FPR16
-// registers rather than going through the GPRs.
-let AddedComplexity = 10 in {
-// f32->f16
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
- (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)),
- (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
- (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)),
- (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
- (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
- (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
- (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
- (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
-// f64->f16
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
- (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)),
- (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
- (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)),
- (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
- (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
- (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
- (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
- (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
-}
-
-
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
@@ -2457,6 +2426,28 @@ defm FMOV : FPMoveImmediate<"fmov">;
//===----------------------------------------------------------------------===//
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
+def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
+ (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
+ (ABSv8i8 V64:$src)>;
+def : Pat<(xor (v4i16 (AArch64vashr V64:$src, (i32 15))),
+ (v4i16 (add V64:$src, (AArch64vashr V64:$src, (i32 15))))),
+ (ABSv4i16 V64:$src)>;
+def : Pat<(xor (v2i32 (AArch64vashr V64:$src, (i32 31))),
+ (v2i32 (add V64:$src, (AArch64vashr V64:$src, (i32 31))))),
+ (ABSv2i32 V64:$src)>;
+def : Pat<(xor (v16i8 (AArch64vashr V128:$src, (i32 7))),
+ (v16i8 (add V128:$src, (AArch64vashr V128:$src, (i32 7))))),
+ (ABSv16i8 V128:$src)>;
+def : Pat<(xor (v8i16 (AArch64vashr V128:$src, (i32 15))),
+ (v8i16 (add V128:$src, (AArch64vashr V128:$src, (i32 15))))),
+ (ABSv8i16 V128:$src)>;
+def : Pat<(xor (v4i32 (AArch64vashr V128:$src, (i32 31))),
+ (v4i32 (add V128:$src, (AArch64vashr V128:$src, (i32 31))))),
+ (ABSv4i32 V128:$src)>;
+def : Pat<(xor (v2i64 (AArch64vashr V128:$src, (i32 63))),
+ (v2i64 (add V128:$src, (AArch64vashr V128:$src, (i32 63))))),
+ (ABSv2i64 V128:$src)>;
+
defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
@@ -2485,6 +2476,11 @@ def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
(i64 2))))),
(FCVTLv4i32 V128:$Rn)>;
+def : Pat<(v4f32 (fextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (fextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
+ (i64 4))))),
+ (FCVTLv8i16 V128:$Rn)>;
+
defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
@@ -2496,6 +2492,7 @@ def : Pat<(concat_vectors V64:$Rd,
(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
(FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (fround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
(FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
@@ -2578,6 +2575,10 @@ defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>
defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
+def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
+def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
+def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
+def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
@@ -3174,6 +3175,46 @@ defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
+// Additional patterns for SMULL and UMULL
+multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
+ SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
+defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
+ UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
+
+// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
+multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
+
// Patterns for 64-bit pmull
def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
@@ -3256,6 +3297,10 @@ def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
(EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
(EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
// We use EXT to handle extract_subvector to copy the upper 64-bits of a
// 128-bit vector.
@@ -3267,6 +3312,8 @@ def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
@@ -3379,6 +3426,19 @@ def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
(v2f64 (DUPv2i64lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
(i64 0)))>;
+def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v4f16 (DUPv4i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v8f16 (DUPv8i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+
+def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
+def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
(DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
@@ -3500,6 +3560,23 @@ def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
+def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi16lane
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0)),
+ dsub)>;
+
+def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
+ (INSvi16lane
+ V128:$Rn, VectorIndexH:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0))>;
+
def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
(f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
(EXTRACT_SUBREG
@@ -3580,6 +3657,7 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
dsub)>;
}
+defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
@@ -3595,6 +3673,8 @@ def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+ (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
(f64 (EXTRACT_SUBREG
(INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
@@ -3605,6 +3685,11 @@ def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
(INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
V128:$Rn, VectorIndexS:$idx),
ssub))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
+ (f16 (EXTRACT_SUBREG
+ (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0,
+ V128:$Rn, VectorIndexH:$idx),
+ hsub))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
@@ -3619,6 +3704,7 @@ def : ConcatPat<v2f64, v1f64>;
def : ConcatPat<v4i32, v2i32>;
def : ConcatPat<v4f32, v2f32>;
def : ConcatPat<v8i16, v4i16>;
+def : ConcatPat<v8f16, v4f16>;
def : ConcatPat<v16i8, v8i8>;
// If the high lanes are undef, though, we can just ignore them:
@@ -4459,7 +4545,7 @@ class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
0),
dsub)),
0),
- ssub)))>, Requires<[NotForCodeSize]>;
+ ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -4512,8 +4598,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
0),
dsub)),
0),
- dsub)))>, Requires<[NotForCodeSize]>;
-
+ dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
@@ -4636,6 +4722,10 @@ def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
(LD1Rv2d GPR64sp:$Rn)>;
def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
(LD1Rv1d GPR64sp:$Rn)>;
+def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
@@ -4649,6 +4739,7 @@ def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
+def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
@@ -4663,6 +4754,7 @@ def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
+def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
@@ -4690,6 +4782,7 @@ def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
+def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
let AddedComplexity = 15 in
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
@@ -4704,6 +4797,7 @@ def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
+def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
@@ -4728,6 +4822,7 @@ defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
@@ -4751,6 +4846,7 @@ defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
let mayStore = 1, neverHasSideEffects = 1 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
@@ -4929,10 +5025,77 @@ def : Pat<(trap), (BRK 1)>;
// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
//
+// Natural vector casts (64 bit)
+def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+// Natural vector casts (128 bit)
+def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
@@ -4941,6 +5104,8 @@ def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
@@ -4953,6 +5118,8 @@ def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
(REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
+ (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
@@ -4962,6 +5129,8 @@ def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
(REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
}
@@ -4990,6 +5159,7 @@ let Predicates = [IsLE] in {
def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
@@ -4999,6 +5169,8 @@ def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
(v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
(v1i64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
+ (v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
(v1i64 (REV64v2i32 FPR64:$src))>;
}
@@ -5011,6 +5183,7 @@ def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
@@ -5023,6 +5196,8 @@ def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
+ (v2i32 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
@@ -5031,6 +5206,7 @@ def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
}
@@ -5043,6 +5219,8 @@ def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))),
(v4i16 (REV16v8i8 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))),
+ (v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
(v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
@@ -5050,12 +5228,41 @@ def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
}
let Predicates = [IsLE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))),
+ (v4f16 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+}
+
+
+
+let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
@@ -5070,6 +5277,8 @@ def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))),
(v8i8 (REV32v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
(v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
+ (v8i8 (REV16v8i8 FPR64:$src))>;
}
let Predicates = [IsLE] in {
@@ -5077,6 +5286,7 @@ def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
@@ -5087,6 +5297,8 @@ def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))),
(f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
(f64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
+ (f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
@@ -5096,6 +5308,7 @@ def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
@@ -5106,6 +5319,8 @@ def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))),
(v1f64 (REV64v8i8 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
(v1f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
+ (v1f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
@@ -5116,6 +5331,7 @@ def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
@@ -5128,6 +5344,8 @@ def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
+ (v2f32 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
@@ -5137,6 +5355,7 @@ def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
}
let Predicates = [IsBE] in {
@@ -5148,6 +5367,9 @@ def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
(f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
(f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
@@ -5162,6 +5384,7 @@ let Predicates = [IsLE] in {
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
}
@@ -5173,6 +5396,8 @@ def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
(v2f64 (REV64v4i32 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
+ (v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
(v2f64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
@@ -5183,6 +5408,7 @@ def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
@@ -5193,6 +5419,8 @@ def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
(REV64v4i32 FPR128:$src), (i32 8)))>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
+ (v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
(v4f32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
@@ -5208,6 +5436,7 @@ def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
@@ -5221,6 +5450,8 @@ def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
(v2i64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
(v2i64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
+ (v2i64 (REV64v8i16 FPR128:$src))>;
}
def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
@@ -5230,6 +5461,7 @@ def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
@@ -5244,6 +5476,8 @@ def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
(v4i32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
(v4i32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
+ (v4i32 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
@@ -5254,6 +5488,7 @@ def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
@@ -5270,6 +5505,36 @@ def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
(v8i16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
(v8i16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))),
+ (v8i16 (REV32v8i16 FPR128:$src))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
+ (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
+ (v8f16 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
}
let Predicates = [IsLE] in {
@@ -5279,6 +5544,7 @@ def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
@@ -5295,6 +5561,8 @@ def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
(v16i8 (REV64v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
(v16i8 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
+ (v16i8 (REV16v16i8 FPR128:$src))>;
}
def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
@@ -5318,6 +5586,8 @@ def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 3df9c4f..8157981 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -13,20 +13,21 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-ldst-opt"
@@ -108,7 +109,7 @@ private:
int getMemSize(MachineInstr *MemMI);
};
char AArch64LoadStoreOpt::ID = 0;
-}
+} // namespace
static bool isUnscaledLdst(unsigned Opc) {
switch (Opc) {
@@ -931,8 +932,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
- TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
- TRI = TM.getRegisterInfo();
+ TII = static_cast<const AArch64InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
bool Modified = false;
for (auto &MBB : Fn)
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 75a17b9..e57b0f4 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -25,8 +25,7 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, Mangler &mang,
- AsmPrinter &printer)
+AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
MCSymbol *
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index ba50ba9..1e29b80 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64_MCINSTLOWER_H
-#define AArch64_MCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MCINSTLOWER_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MCINSTLOWER_H
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Compiler.h"
@@ -33,7 +33,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64MCInstLower {
Triple TargetTriple;
public:
- AArch64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
+ AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/AArch64/AArch64MachineCombinerPattern.h b/lib/Target/AArch64/AArch64MachineCombinerPattern.h
new file mode 100644
index 0000000..4164b33
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineCombinerPattern.h
@@ -0,0 +1,42 @@
+//===- AArch64MachineCombinerPattern.h -===//
+//===- AArch64 instruction pattern supported by combiner -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines instruction pattern supported by combiner
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
+
+namespace llvm {
+
+/// Enumeration of instruction pattern supported by machine combiner
+///
+///
+namespace MachineCombinerPattern {
+enum MC_PATTERN : int {
+ MC_NONE = 0,
+ MC_MULADDW_OP1 = 1,
+ MC_MULADDW_OP2 = 2,
+ MC_MULSUBW_OP1 = 3,
+ MC_MULSUBW_OP2 = 4,
+ MC_MULADDWI_OP1 = 5,
+ MC_MULSUBWI_OP1 = 6,
+ MC_MULADDX_OP1 = 7,
+ MC_MULADDX_OP2 = 8,
+ MC_MULSUBX_OP1 = 9,
+ MC_MULSUBX_OP2 = 10,
+ MC_MULADDXI_OP1 = 11,
+ MC_MULSUBXI_OP1 = 12
+};
+} // end namespace MachineCombinerPattern
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 7c257ba..536a8d0 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64MACHINEFUNCTIONINFO_H
-#define AArch64MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -160,4 +160,4 @@ private:
};
} // End llvm namespace
-#endif // AArch64MACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
new file mode 100644
index 0000000..f942c4e
--- /dev/null
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -0,0 +1,383 @@
+//===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file contains the AArch64 / Cortex-A57 specific register allocation
+// constraints for use by the PBQP register allocator.
+//
+// It is essentially a transcription of what is contained in
+// AArch64A57FPLoadBalancing, which tries to use a balanced
+// mix of odd and even D-registers when performing a critical sequence of
+// independent, non-quadword FP/ASIMD floating-point multiply-accumulates.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-pbqp"
+
+#include "AArch64.h"
+#include "AArch64PBQPRegAlloc.h"
+#include "AArch64RegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+#ifndef NDEBUG
+bool isFPReg(unsigned reg) {
+ return AArch64::FPR32RegClass.contains(reg) ||
+ AArch64::FPR64RegClass.contains(reg) ||
+ AArch64::FPR128RegClass.contains(reg);
+}
+#endif
+
+bool isOdd(unsigned reg) {
+ switch (reg) {
+ default:
+ llvm_unreachable("Register is not from the expected class !");
+ case AArch64::S1:
+ case AArch64::S3:
+ case AArch64::S5:
+ case AArch64::S7:
+ case AArch64::S9:
+ case AArch64::S11:
+ case AArch64::S13:
+ case AArch64::S15:
+ case AArch64::S17:
+ case AArch64::S19:
+ case AArch64::S21:
+ case AArch64::S23:
+ case AArch64::S25:
+ case AArch64::S27:
+ case AArch64::S29:
+ case AArch64::S31:
+ case AArch64::D1:
+ case AArch64::D3:
+ case AArch64::D5:
+ case AArch64::D7:
+ case AArch64::D9:
+ case AArch64::D11:
+ case AArch64::D13:
+ case AArch64::D15:
+ case AArch64::D17:
+ case AArch64::D19:
+ case AArch64::D21:
+ case AArch64::D23:
+ case AArch64::D25:
+ case AArch64::D27:
+ case AArch64::D29:
+ case AArch64::D31:
+ case AArch64::Q1:
+ case AArch64::Q3:
+ case AArch64::Q5:
+ case AArch64::Q7:
+ case AArch64::Q9:
+ case AArch64::Q11:
+ case AArch64::Q13:
+ case AArch64::Q15:
+ case AArch64::Q17:
+ case AArch64::Q19:
+ case AArch64::Q21:
+ case AArch64::Q23:
+ case AArch64::Q25:
+ case AArch64::Q27:
+ case AArch64::Q29:
+ case AArch64::Q31:
+ return true;
+ case AArch64::S0:
+ case AArch64::S2:
+ case AArch64::S4:
+ case AArch64::S6:
+ case AArch64::S8:
+ case AArch64::S10:
+ case AArch64::S12:
+ case AArch64::S14:
+ case AArch64::S16:
+ case AArch64::S18:
+ case AArch64::S20:
+ case AArch64::S22:
+ case AArch64::S24:
+ case AArch64::S26:
+ case AArch64::S28:
+ case AArch64::S30:
+ case AArch64::D0:
+ case AArch64::D2:
+ case AArch64::D4:
+ case AArch64::D6:
+ case AArch64::D8:
+ case AArch64::D10:
+ case AArch64::D12:
+ case AArch64::D14:
+ case AArch64::D16:
+ case AArch64::D18:
+ case AArch64::D20:
+ case AArch64::D22:
+ case AArch64::D24:
+ case AArch64::D26:
+ case AArch64::D28:
+ case AArch64::D30:
+ case AArch64::Q0:
+ case AArch64::Q2:
+ case AArch64::Q4:
+ case AArch64::Q6:
+ case AArch64::Q8:
+ case AArch64::Q10:
+ case AArch64::Q12:
+ case AArch64::Q14:
+ case AArch64::Q16:
+ case AArch64::Q18:
+ case AArch64::Q20:
+ case AArch64::Q22:
+ case AArch64::Q24:
+ case AArch64::Q26:
+ case AArch64::Q28:
+ case AArch64::Q30:
+ return false;
+
+ }
+}
+
+bool haveSameParity(unsigned reg1, unsigned reg2) {
+ assert(isFPReg(reg1) && "Expecting an FP register for reg1");
+ assert(isFPReg(reg2) && "Expecting an FP register for reg2");
+
+ return isOdd(reg1) == isOdd(reg2);
+}
+
+}
+
+bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
+ unsigned Ra) {
+ if (Rd == Ra)
+ return false;
+
+ LiveIntervals &LIs = G.getMetadata().LIS;
+
+ if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
+ DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
+ << '\n');
+ DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
+ << '\n');
+ return false;
+ }
+
+ PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
+ PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(Ra);
+
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
+ &G.getNodeMetadata(node1).getAllowedRegs();
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRaAllowed =
+ &G.getNodeMetadata(node2).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
+
+ // The edge does not exist. Create one with the appropriate interference
+ // costs.
+ if (edge == G.invalidEdgeId()) {
+ const LiveInterval &ld = LIs.getInterval(Rd);
+ const LiveInterval &la = LIs.getInterval(Ra);
+ bool livesOverlap = ld.overlaps(la);
+
+ PBQPRAGraph::RawMatrix costs(vRdAllowed->size() + 1,
+ vRaAllowed->size() + 1, 0);
+ for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+ for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (livesOverlap && TRI->regsOverlap(pRd, pRa))
+ costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ else
+ costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
+ }
+ }
+ G.addEdge(node1, node2, std::move(costs));
+ return true;
+ }
+
+ if (G.getEdgeNode1Id(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(vRdAllowed, vRaAllowed);
+ }
+
+ // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass))
+ PBQPRAGraph::RawMatrix costs(G.getEdgeCosts(edge));
+ for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+
+ // Get the maximum cost (excluding unallocatable reg) for same parity
+ // registers
+ PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
+ for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (haveSameParity(pRd, pRa))
+ if (costs[i + 1][j + 1] !=
+ std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ costs[i + 1][j + 1] > sameParityMax)
+ sameParityMax = costs[i + 1][j + 1];
+ }
+
+ // Ensure all registers with a different parity have a higher cost
+ // than sameParityMax
+ for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRaAllowed)[j];
+ if (!haveSameParity(pRd, pRa))
+ if (sameParityMax > costs[i + 1][j + 1])
+ costs[i + 1][j + 1] = sameParityMax + 1.0;
+ }
+ }
+ G.setEdgeCosts(edge, std::move(costs));
+
+ return true;
+}
+
+void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
+ unsigned Ra) {
+ LiveIntervals &LIs = G.getMetadata().LIS;
+
+ // Do some Chain management
+ if (Chains.count(Ra)) {
+ if (Rd != Ra) {
+ DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
+ << PrintReg(Rd, TRI) << '\n';);
+ Chains.remove(Ra);
+ Chains.insert(Rd);
+ }
+ } else {
+ DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
+ << '\n';);
+ Chains.insert(Rd);
+ }
+
+ PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
+
+ const LiveInterval &ld = LIs.getInterval(Rd);
+ for (auto r : Chains) {
+ // Skip self
+ if (r == Rd)
+ continue;
+
+ const LiveInterval &lr = LIs.getInterval(r);
+ if (ld.overlaps(lr)) {
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
+ &G.getNodeMetadata(node1).getAllowedRegs();
+
+ PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(r);
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRrAllowed =
+ &G.getNodeMetadata(node2).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
+ assert(edge != G.invalidEdgeId() &&
+ "PBQP error ! The edge should exist !");
+
+ DEBUG(dbgs() << "Refining constraint !\n";);
+
+ if (G.getEdgeNode1Id(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(vRdAllowed, vRrAllowed);
+ }
+
+ // Enforce that cost is higher with all other Chains of the same parity
+ PBQP::Matrix costs(G.getEdgeCosts(edge));
+ for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
+ unsigned pRd = (*vRdAllowed)[i];
+
+ // Get the maximum cost (excluding unallocatable reg) for all other
+ // parity registers
+ PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
+ for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRrAllowed)[j];
+ if (!haveSameParity(pRd, pRa))
+ if (costs[i + 1][j + 1] !=
+ std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ costs[i + 1][j + 1] > sameParityMax)
+ sameParityMax = costs[i + 1][j + 1];
+ }
+
+ // Ensure all registers with same parity have a higher cost
+ // than sameParityMax
+ for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
+ unsigned pRa = (*vRrAllowed)[j];
+ if (haveSameParity(pRd, pRa))
+ if (sameParityMax > costs[i + 1][j + 1])
+ costs[i + 1][j + 1] = sameParityMax + 1.0;
+ }
+ }
+ G.setEdgeCosts(edge, std::move(costs));
+ }
+ }
+}
+
+static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
+ const MachineInstr &MI) {
+ LiveInterval LI = LIs.getInterval(reg);
+ SlotIndex SI = LIs.getInstructionIndex(&MI);
+ return LI.expiredAt(SI);
+}
+
+void A57ChainingConstraint::apply(PBQPRAGraph &G) {
+ const MachineFunction &MF = G.getMetadata().MF;
+ LiveIntervals &LIs = G.getMetadata().LIS;
+
+ TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
+ DEBUG(MF.dump());
+
+ for (const auto &MBB: MF) {
+ Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
+
+ for (const auto &MI: MBB) {
+
+ // Forget Chains which have expired
+ for (auto r : Chains) {
+ SmallVector<unsigned, 8> toDel;
+ if(regJustKilledBefore(LIs, r, MI)) {
+ DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
+ MI.print(dbgs()););
+ toDel.push_back(r);
+ }
+
+ while (!toDel.empty()) {
+ Chains.remove(toDel.back());
+ toDel.pop_back();
+ }
+ }
+
+ switch (MI.getOpcode()) {
+ case AArch64::FMSUBSrrr:
+ case AArch64::FMADDSrrr:
+ case AArch64::FNMSUBSrrr:
+ case AArch64::FNMADDSrrr:
+ case AArch64::FMSUBDrrr:
+ case AArch64::FMADDDrrr:
+ case AArch64::FNMSUBDrrr:
+ case AArch64::FNMADDDrrr: {
+ unsigned Rd = MI.getOperand(0).getReg();
+ unsigned Ra = MI.getOperand(3).getReg();
+
+ if (addIntraChainConstraint(G, Rd, Ra))
+ addInterChainConstraint(G, Rd, Ra);
+ break;
+ }
+
+ case AArch64::FMLAv2f32:
+ case AArch64::FMLSv2f32: {
+ unsigned Rd = MI.getOperand(0).getReg();
+ addInterChainConstraint(G, Rd, Rd);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+ }
+}
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
new file mode 100644
index 0000000..4f656f9
--- /dev/null
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -0,0 +1,38 @@
+//===-- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
+
+namespace llvm {
+
+/// Add the accumulator chaining constraint to a PBQP graph
+class A57ChainingConstraint : public PBQPRAConstraint {
+public:
+ // Add A57 specific constraints to the PBQP graph.
+ void apply(PBQPRAGraph &G) override;
+
+private:
+ SmallSetVector<unsigned, 32> Chains;
+ const TargetRegisterInfo *TRI;
+
+ // Add the accumulator chaining constraint, inside the chain, i.e. so that
+ // parity(Rd) == parity(Ra).
+ // \return true if a constraint was added
+ bool addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
+
+ // Add constraints between existing chains
+ void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
+};
+}
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
diff --git a/lib/Target/AArch64/AArch64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h
index b22fa24..9e9eec4 100644
--- a/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
+
// 31 entries have cost 0
// 242 entries have cost 1
// 1447 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
+
+#endif
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 4723cc4..16c33b7 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -21,18 +21,18 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -569,7 +569,7 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
// global. Do not promote constant expressions either, as they may
// require some code expansion.
if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
- AlreadyChecked.insert(Cst))
+ AlreadyChecked.insert(Cst).second)
LocalChange |= promoteConstant(Cst);
}
}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 01b9587..d734d43 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -76,7 +76,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
BitVector
AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -105,7 +105,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
unsigned Reg) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
switch (Reg) {
default:
@@ -169,7 +169,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
}
@@ -236,7 +236,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = MF.getFrameInfo();
// Estimate an offset from the frame pointer.
@@ -326,7 +326,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
- MF.getTarget().getFrameLowering());
+ MF.getSubtarget().getFrameLowering());
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned FrameReg;
@@ -364,7 +364,7 @@ namespace llvm {
unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
switch (RC->getID()) {
default:
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 76af1ed..51a5034 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64REGISTERINFO_H
-#define LLVM_TARGET_AArch64REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64REGISTERINFO_H
#define GET_REGINFO_HEADER
#include "AArch64GenRegisterInfo.inc"
@@ -98,4 +98,4 @@ public:
} // end namespace llvm
-#endif // LLVM_TARGET_AArch64REGISTERINFO_H
+#endif
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index a30e4ad..d5ff3f1 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -390,13 +390,14 @@ def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
}
def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
- v1i64],
+ v1i64, v4f16],
64, (sequence "D%u", 0, 31)>;
// We don't (yet) have an f128 legal type, so don't use that here. We
// normalize 128-bit vectors to v2f64 for arg passing and such, so use
// that here.
def FPR128 : RegisterClass<"AArch64",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128,
+ v8f16],
128, (sequence "Q%u", 0, 31)>;
// The lower 16 vector registers. Some instructions can only take registers
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 8209f96..3ec4157 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -12,11 +12,24 @@
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// The Cortex-A57 is a traditional superscaler microprocessor with a
+// conservative 3-wide in-order stage for decode and dispatch. Combined with the
+// much wider out-of-order issue stage, this produced a need to carefully
+// schedule micro-ops so that all three decoded each cycle are successfully
+// issued as the reservation station(s) simply don't stay occupied for long.
+// Therefore, IssueWidth is set to the narrower of the two at three, while still
+// modeling the machine as out-of-order.
+
def CortexA57Model : SchedMachineModel {
- let IssueWidth = 8; // 3-way decode and 8-way issue
+ let IssueWidth = 3; // 3-way decode and dispatch
let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
let LoadLatency = 4; // Optimistic load latency
let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling. The magic number is chosen based on
+ // experiments and benchmarking data.
+ let LoopMicroOpBufferSize = 16;
}
//===----------------------------------------------------------------------===//
@@ -24,18 +37,17 @@ def CortexA57Model : SchedMachineModel {
// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
// micro-ops wait for their operands and then issue out-of-order.
-def A57UnitB : ProcResource<1> { let BufferSize = 8; } // Type B micro-ops
-def A57UnitI : ProcResource<2> { let BufferSize = 8; } // Type I micro-ops
-def A57UnitM : ProcResource<1> { let BufferSize = 8; } // Type M micro-ops
-def A57UnitL : ProcResource<1> { let BufferSize = 8; } // Type L micro-ops
-def A57UnitS : ProcResource<1> { let BufferSize = 8; } // Type S micro-ops
-def A57UnitX : ProcResource<1> { let BufferSize = 8; } // Type X micro-ops
-def A57UnitW : ProcResource<1> { let BufferSize = 8; } // Type W micro-ops
+def A57UnitB : ProcResource<1>; // Type B micro-ops
+def A57UnitI : ProcResource<2>; // Type I micro-ops
+def A57UnitM : ProcResource<1>; // Type M micro-ops
+def A57UnitL : ProcResource<1>; // Type L micro-ops
+def A57UnitS : ProcResource<1>; // Type S micro-ops
+def A57UnitX : ProcResource<1>; // Type X micro-ops
+def A57UnitW : ProcResource<1>; // Type W micro-ops
let SchedModel = CortexA57Model in {
def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
}
-
let SchedModel = CortexA57Model in {
//===----------------------------------------------------------------------===//
@@ -71,7 +83,7 @@ def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>;
def : SchedAlias<WriteF, A57Write_3cyc_1V>;
def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
-def : SchedAlias<WriteFCopy, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>;
@@ -85,13 +97,12 @@ def : WriteRes<WriteHint, []> { let Latency = 1; }
def : WriteRes<WriteLDHi, []> { let Latency = 4; }
-// Forwarding logic is not [yet] explicitly modeled beyond what is captured
-// in the latencies of the A57 Generic SchedWriteRes's.
+// Forwarding logic is only modeled for multiply and accumulate
def : ReadAdvance<ReadI, 0>;
def : ReadAdvance<ReadISReg, 0>;
def : ReadAdvance<ReadIEReg, 0>;
def : ReadAdvance<ReadIM, 0>;
-def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
@@ -134,7 +145,13 @@ def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
// Cryptography Extensions
// -----------------------------------------------------------------------------
-def : InstRW<[A57Write_3cyc_1W], (instregex "CRC32")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^CRC32")>;
// Vector Load
@@ -301,4 +318,330 @@ def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_PO
def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>;
def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+// Vector - Integer
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
+// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B
+def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU]?ADDL?Vv16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B
+def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+
+// ASIMD multiply, D-form
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate saturating long
+def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>;
+def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>;
+
+// ASIMD multiply long
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD pairwise add and accumulate
+// ASIMD shift accumulate
+def A57WriteIVA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadIVA3 : SchedReadAdvance<3, [A57WriteIVA]>;
+def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^[SU]ADALP")>;
+def : InstRW<[A57WriteIVA, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[A57Write_4cyc_1X], (instregex "^SQSHLU")>;
+
+
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A57Write_4cyc_2X], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, complex, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+
+// Vector - Floating Point
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v2f32
+// Q form - v4f32, v2f64
+// D form - 32, 64
+// D form - v1i32, v1i64
+// D form - v2i32
+// Q form - v4i32, v2i64
+
+// ASIMD FP arith, normal, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FABD|FADD|FSUB)(v2f32|32|64|v2i32p)")>;
+// ASIMD FP arith, normal, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FABD|FADD|FSUB)(v4f32|v2f64|v2i64p)")>;
+
+// ASIMD FP arith, pairwise, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FADDP(v2f32|32|64|v2i32)")>;
+// ASIMD FP arith, pairwise, Q-form
+def : InstRW<[A57Write_9cyc_3V], (instregex "^FADDP(v4f32|v2f64|v2i64)")>;
+
+// ASIMD FP compare, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v2f32|32|64|v1i32|v2i32|v1i64)")>;
+// ASIMD FP compare, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FACGE|FACGT|FCMEQ|FCMGE|FCMGT|FCMLE|FCMLT)(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP convert, long and narrow
+def : InstRW<[A57Write_8cyc_3V], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[A57Write_18cyc_1X], (instregex "FDIVv2f32")>;
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[A57Write_36cyc_2X], (instregex "FDIVv4f32")>;
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[A57Write_64cyc_2X], (instregex "FDIVv2f64")>;
+
+// Note: These were simply duplicated from ASIMD FDIV because of missing documentation
+// ASIMD FP square root, D-form, F32
+def : InstRW<[A57Write_18cyc_1X], (instregex "FSQRTv2f32")>;
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[A57Write_36cyc_2X], (instregex "FSQRTv4f32")>;
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[A57Write_64cyc_2X], (instregex "FSQRTv2f64")>;
+
+// ASIMD FP max/min, normal, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>;
+// ASIMD FP max/min, normal, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^(FMAX|FMIN)(NM)?(v4f32|v2f64)")>;
+// ASIMD FP max/min, pairwise, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?P(v2f32|v2i32)")>;
+// ASIMD FP max/min, pairwise, Q-form
+def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i64)")>;
+// ASIMD FP max/min, reduce
+def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;
+
+// ASIMD FP multiply, D-form, FZ
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+// ASIMD FP multiply, Q-form, FZ
+def : InstRW<[A57Write_5cyc_2V], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP multiply accumulate, D-form, FZ
+// ASIMD FP multiply accumulate, Q-form, FZ
+def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; }
+def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ]>;
+def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA5], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
+
+
+// Vector - Miscellaneous
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+
+// ASIMD bitwise insert, Q-form
+def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL)v16i8")>;
+
+// ASIMD duplicate, gen reg, D-form and Q-form
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>;
+
+// ASIMD move, saturating
+def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]QXTU?N")>;
+
+// ASIMD reciprocal estimate, D-form
+def : InstRW<[A57Write_5cyc_1V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD reciprocal estimate, Q-form
+def : InstRW<[A57Write_5cyc_2V], (instregex "^[FU](RECP|RSQRT)(E|X)(v2f64|v4f32|v4i32)")>;
+
+// ASIMD reciprocal step, D-form, FZ
+def : InstRW<[A57Write_9cyc_1V], (instregex "^F(RECP|RSQRT)S(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+// ASIMD reciprocal step, Q-form, FZ
+def : InstRW<[A57Write_9cyc_2V], (instregex "^F(RECP|RSQRT)S(v2f64|v4f32|v4i32)")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[A57Write_3cyc_1V], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[A57Write_6cyc_2V], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[A57Write_9cyc_3V], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[A57Write_12cyc_4V], (instregex "^TB[LX]v8i8Four")>;
+// ASIMD table lookup, Q-form
+def : InstRW<[A57Write_6cyc_3V], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[A57Write_9cyc_5V], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[A57Write_12cyc_7V], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[A57Write_15cyc_9V], (instregex "^TB[LX]v16i8Four")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[A57Write_6cyc_1I_1L], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^INSv")>;
+
+// ASIMD unzip/zip, Q-form
+def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>;
+
+
+// Remainder
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;
+
+def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA]>;
+def A57ReadFPM : SchedReadAdvance<0>;
+def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
+
+def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
+def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>;
+
+def : InstRW<[A57Write_32cyc_1X], (instrs FDIVDrr)>;
+def : InstRW<[A57Write_18cyc_1X], (instrs FDIVSrr)>;
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>;
+
+def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>;
+
+def : InstRW<[A57Write_32cyc_1X], (instrs FSQRTDr)>;
+def : InstRW<[A57Write_18cyc_1X], (instrs FSQRTSr)>;
+
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPDpre)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpost)>;
+def : InstRW<[A57Write_6cyc_2L, WriteLDHi, WriteAdr], (instrs LDPQpre)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
+def : InstRW<[A57Write_5cyc_1I_2L, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteLDHi, WriteAdr], (instrs LDPSpre)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRBpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRBui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRDl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRDpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRDui)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRHpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRHui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRQl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRQpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[A57Write_6cyc_1I_1L, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRQui)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[A57Write_5cyc_1I_1L, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRSl)>;
+def : InstRW<[A57Write_5cyc_1L, WriteI], (instrs LDRSpost)>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[A57Write_5cyc_1L, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDRSui)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURBi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURDi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURHi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURQi)>;
+def : InstRW<[A57Write_5cyc_1L], (instrs LDURSi)>;
+
+def : InstRW<[A57Write_2cyc_2S], (instrs STNPDi)>;
+def : InstRW<[A57Write_4cyc_1I_4S], (instrs STNPQi)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STNPXi)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STPDi)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPDpre)>;
+def : InstRW<[A57Write_4cyc_1I_4S], (instrs STPQi)>;
+def : InstRW<[WriteAdr, A57Write_4cyc_1I_4S], (instrs STPQpost)>;
+def : InstRW<[WriteAdr, A57Write_4cyc_2I_4S], (instrs STPQpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPSpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STPWpre)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STPXi)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STPXpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBBpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRBpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRBpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRBroX)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRDpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRDpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHHpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHHroX)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRHpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRHpre)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroW)>;
+def : InstRW<[A57Write_3cyc_1I_1S, ReadAdrBase], (instrs STRHroX)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQpost)>;
+def : InstRW<[WriteAdr, A57Write_2cyc_1I_2S], (instrs STRQpre)>;
+def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroW)>;
+def : InstRW<[A57Write_2cyc_1I_2S, ReadAdrBase], (instrs STRQroX)>;
+def : InstRW<[A57Write_2cyc_1I_2S], (instrs STRQui)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRSpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S], (instrs STRSpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRWpre)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpost)>;
+def : InstRW<[WriteAdr, A57Write_1cyc_1I_1S, ReadAdrBase], (instrs STRXpre)>;
+def : InstRW<[A57Write_2cyc_2S], (instrs STURQi)>;
+
} // SchedModel = CortexA57Model
diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index a8f421b..6f30108 100644
--- a/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -28,14 +28,18 @@ def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
-def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; }
-def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
+ let ResourceCycles = [18]; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
+ let ResourceCycles = [19]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
-def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; }
-def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
+ let ResourceCycles = [35]; }
def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
@@ -53,6 +57,7 @@ def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 64;
let NumMicroOps = 2;
+ let ResourceCycles = [32, 32];
}
def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
@@ -137,6 +142,7 @@ def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 36;
let NumMicroOps = 2;
+ let ResourceCycles = [18, 18];
}
def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
A57UnitM]> {
@@ -153,6 +159,10 @@ def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
let Latency = 3;
let NumMicroOps = 2;
}
+def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
let Latency = 4;
@@ -295,6 +305,11 @@ def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL,
let Latency = 9;
let NumMicroOps = 4;
}
+def A57Write_12cyc_4V : SchedWriteRes<[A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
//===----------------------------------------------------------------------===//
@@ -334,6 +349,11 @@ def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL,
let Latency = 9;
let NumMicroOps = 5;
}
+def A57Write_9cyc_5V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
//===----------------------------------------------------------------------===//
@@ -399,7 +419,7 @@ def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI,
let Latency = 4;
let NumMicroOps = 7;
}
-def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
+def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
A57UnitS, A57UnitS, A57UnitS,
A57UnitS, A57UnitS, A57UnitS]> {
let Latency = 6;
@@ -412,6 +432,12 @@ def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI,
let Latency = 9;
let NumMicroOps = 7;
}
+def A57Write_12cyc_7V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 12;
+ let NumMicroOps = 7;
+}
//===----------------------------------------------------------------------===//
@@ -443,11 +469,11 @@ def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS,
//===----------------------------------------------------------------------===//
// Define Generic 9 micro-op types
-def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
- A57UnitS, A57UnitS,
- A57UnitS, A57UnitS,
- A57UnitS, A57UnitS,
- A57UnitS, A57UnitS]> {
+def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
let Latency = 8;
let NumMicroOps = 9;
}
@@ -459,6 +485,12 @@ def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI,
let Latency = 11;
let NumMicroOps = 9;
}
+def A57Write_15cyc_9V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 15;
+ let NumMicroOps = 9;
+}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 1bf64fc..0cfd582 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -36,8 +36,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
const AArch64TargetLowering &TLI =
- *static_cast<const AArch64TargetLowering *>(
- DAG.getTarget().getTargetLowering());
+ *DAG.getTarget().getSubtarget<AArch64Subtarget>().getTargetLowering();
EVT IntPtr = TLI.getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 1180eea..11932d2 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64SELECTIONDAGINFO_H
-#define AArch64SELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 45f8ddb..0c36e8f 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -39,7 +39,7 @@ public:
static char ID;
AArch64StorePairSuppress() : MachineFunctionPass(ID) {}
- virtual const char *getPassName() const override {
+ const char *getPassName() const override {
return "AArch64 Store Pair Suppression";
}
@@ -50,7 +50,7 @@ private:
bool isNarrowFPStore(const MachineInstr &MI);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
@@ -85,8 +85,7 @@ bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB)
// If a subtarget does not define resources for STPQi, bail here.
if (SCDesc->isValid() && !SCDesc->isVariant()) {
- unsigned ResLenWithSTP = BBTrace.getResourceLength(
- ArrayRef<const MachineBasicBlock *>(), SCDesc);
+ unsigned ResLenWithSTP = BBTrace.getResourceLength(None, SCDesc);
if (ResLenWithSTP > ResLength) {
DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
<< " resources " << ResLength << " -> " << ResLenWithSTP
@@ -118,12 +117,13 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
- TII = static_cast<const AArch64InstrInfo *>(MF->getTarget().getInstrInfo());
- TRI = MF->getTarget().getRegisterInfo();
+ TII =
+ static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
+ TRI = MF->getSubtarget().getRegisterInfo();
MRI = &MF->getRegInfo();
const TargetSubtargetInfo &ST =
MF->getTarget().getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index bb0b72c..47b5d54 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64PBQPRegAlloc.h"
#include "AArch64Subtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineScheduler.h"
@@ -43,8 +44,8 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const std::string &CPU,
- const std::string &FS, TargetMachine &TM,
- bool LittleEndian)
+ const std::string &FS,
+ const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
@@ -64,13 +65,7 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
unsigned char
AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const {
-
- // Determine whether this is a reference to a definition or a declaration.
- // Materializable GVs (in JIT lazy compilation mode) do not require an extra
- // load from stub.
- bool isDecl = GV->hasAvailableExternallyLinkage();
- if (GV->isDeclaration() && !GV->isMaterializable())
- isDecl = true;
+ bool isDecl = GV->isDeclarationForLinker();
// MachO large model always goes via a GOT, simply to get a single 8-byte
// absolute relocation on all global addresses.
@@ -78,10 +73,15 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
return AArch64II::MO_GOT;
// The small code mode's direct accesses use ADRP, which cannot necessarily
- // produce the value 0 (if the code is above 4GB). Therefore they must use the
- // GOT.
- if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl)
- return AArch64II::MO_GOT;
+ // produce the value 0 (if the code is above 4GB).
+ if (TM.getCodeModel() == CodeModel::Small &&
+ GV->isWeakForLinker() && isDecl) {
+ // In PIC mode use the GOT, but in absolute mode use a constant pool load.
+ if (TM.getRelocationModel() == Reloc::Static)
+ return AArch64II::MO_CONSTPOOL;
+ else
+ return AArch64II::MO_GOT;
+ }
// If symbol visibility is hidden, the extra load is not needed if
// the symbol is definitely defined in the current translation unit.
@@ -128,3 +128,11 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
bool AArch64Subtarget::enableEarlyIfConversion() const {
return EnableEarlyIfConvert;
}
+
+std::unique_ptr<PBQPRAConstraint>
+AArch64Subtarget::getCustomPBQPConstraints() const {
+ if (!isCortexA57())
+ return nullptr;
+
+ return llvm::make_unique<A57ChainingConstraint>();
+}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 10c646d..e2740f1 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64SUBTARGET_H
-#define AArch64SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
-#include "AArch64InstrInfo.h"
#include "AArch64FrameLowering.h"
#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -69,18 +69,27 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, TargetMachine &TM, bool LittleEndian);
+ const std::string &FS, const TargetMachine &TM,
+ bool LittleEndian);
- const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const AArch64FrameLowering *getFrameLowering() const {
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const AArch64FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- const AArch64TargetLowering *getTargetLowering() const {
+ const AArch64TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- const AArch64InstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
+ const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const AArch64RegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
bool enableMachineScheduler() const override { return true; }
+ bool enablePostMachineScheduler() const override {
+ return isCortexA53() || isCortexA57();
+ }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -94,15 +103,20 @@ public:
bool isLittleEndian() const { return DL.isLittleEndian(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetIOS() const { return TargetTriple.isiOS(); }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
+ bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
-
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isCyclone() const { return CPUString == "cyclone"; }
bool isCortexA57() const { return CPUString == "cortex-a57"; }
bool isCortexA53() const { return CPUString == "cortex-a53"; }
+ bool useAA() const override { return isCortexA53(); }
+
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const { return 64; }
@@ -128,7 +142,9 @@ public:
unsigned NumRegionInstrs) const override;
bool enableEarlyIfConversion() const override;
+
+ std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
};
} // End llvm namespace
-#endif // AArch64SUBTARGET_H
+#endif
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 722e5e7..beed8e0 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,8 +12,11 @@
#include "AArch64.h"
#include "AArch64TargetMachine.h"
-#include "llvm/PassManager.h"
+#include "AArch64TargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
@@ -24,6 +27,10 @@ static cl::opt<bool>
EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableMCR("aarch64-mcr",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
static cl::opt<bool>
EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"),
cl::init(true), cl::Hidden);
@@ -64,19 +71,36 @@ EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
cl::desc("Run early if-conversion"),
cl::init(true));
+static cl::opt<bool>
+EnableCondOpt("aarch64-condopt",
+ cl::desc("Enable the condition optimizer pass"),
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
cl::desc("Work around Cortex-A53 erratum 835769"),
cl::init(false));
+static cl::opt<bool>
+EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
+ cl::desc("Enable optimizations on complex GEPs"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget);
+ RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64Target);
+}
- RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64leTarget);
- RegisterTargetMachine<AArch64beTargetMachine> W(TheARM64beTarget);
+//===----------------------------------------------------------------------===//
+// AArch64 Lowering public interface.
+//===----------------------------------------------------------------------===//
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
+ return make_unique<AArch64_MachoTargetObjectFile>();
+
+ return make_unique<AArch64_ELFTargetObjectFile>();
}
/// TargetMachine ctor - Create an AArch64 architecture model.
@@ -88,10 +112,39 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool LittleEndian)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, LittleEndian) {
+ TLOF(createTLOF(Triple(getTargetTriple()))),
+ Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
initAsmInfo();
}
+AArch64TargetMachine::~AArch64TargetMachine() {}
+
+const AArch64Subtarget *
+AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, isLittle);
+ }
+ return I.get();
+}
+
void AArch64leTargetMachine::anchor() { }
AArch64leTargetMachine::
@@ -115,7 +168,10 @@ namespace {
class AArch64PassConfig : public TargetPassConfig {
public:
AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
+ }
AArch64TargetMachine &getAArch64TargetMachine() const {
return getTM<AArch64TargetMachine>();
@@ -147,7 +203,7 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
void AArch64PassConfig::addIRPasses() {
// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
// ourselves.
- addPass(createAtomicExpandLoadLinkedPass(TM));
+ addPass(createAtomicExpandPass(TM));
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
@@ -156,6 +212,19 @@ void AArch64PassConfig::addIRPasses() {
addPass(createCFGSimplificationPass());
TargetPassConfig::addIRPasses();
+
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
+ // Call SeparateConstOffsetFromGEP pass to extract constants within indices
+ // and lower a GEP with multiple indices to either arithmetic operations or
+ // multiple GEPs with single index.
+ addPass(createSeparateConstOffsetFromGEPPass(TM, true));
+ // Call EarlyCSE pass to find and remove subexpressions in the lowered
+ // result.
+ addPass(createEarlyCSEPass());
+ // Do loop invariant code motion in case part of the lowered result is
+ // invariant.
+ addPass(createLICMPass());
+ }
}
// Pass Pipeline Configuration
@@ -185,8 +254,12 @@ bool AArch64PassConfig::addInstSelector() {
}
bool AArch64PassConfig::addILPOpts() {
+ if (EnableCondOpt)
+ addPass(createAArch64ConditionOptimizerPass());
if (EnableCCMP)
addPass(createAArch64ConditionalCompares());
+ if (EnableMCR)
+ addPass(&MachineCombinerID);
if (EnableEarlyIfConversion)
addPass(&EarlyIfConverterID);
if (EnableStPairSuppress)
@@ -196,8 +269,12 @@ bool AArch64PassConfig::addILPOpts() {
bool AArch64PassConfig::addPreRegAlloc() {
// Use AdvSIMD scalar instructions whenever profitable.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
+ // The AdvSIMD pass may produce copies that can be rewritten to
+ // be register coaleascer friendly.
+ addPass(&PeepholeOptimizerID);
+ }
return true;
}
@@ -206,7 +283,9 @@ bool AArch64PassConfig::addPostRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
if (TM->getOptLevel() != CodeGenOpt::None &&
- TM->getSubtarget<AArch64Subtarget>().isCortexA57())
+ (TM->getSubtarget<AArch64Subtarget>().isCortexA53() ||
+ TM->getSubtarget<AArch64Subtarget>().isCortexA57()) &&
+ usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
return true;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 852cb3f..75c65c5 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64TARGETMACHINE_H
-#define AArch64TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
@@ -23,7 +23,9 @@ namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
AArch64Subtarget Subtarget;
+ mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
public:
AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
@@ -31,33 +33,25 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool IsLittleEndian);
+ ~AArch64TargetMachine() override;
+
const AArch64Subtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const AArch64TargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const AArch64FrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const AArch64InstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const AArch64RegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
- const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
+ const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
/// \brief Register AArch64 analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile* getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+private:
+ bool isLittle;
};
// AArch64leTargetMachine - AArch64 little endian target machine.
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index de63cb4..2e595f9 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
-#define LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 1dac14b..b1a2914 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -51,7 +51,7 @@ public:
AArch64TTI(const AArch64TargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
}
@@ -104,7 +104,7 @@ public:
return 64;
}
- unsigned getMaximumUnrollFactor() const override { return 2; }
+ unsigned getMaxInterleaveFactor() const override;
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
override;
@@ -112,10 +112,11 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
override;
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue) const
- override;
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+ OperandValueKind Opd2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const override;
unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
@@ -124,6 +125,13 @@ public:
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override;
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override;
+
+ void getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const override;
+
+
/// @}
};
@@ -400,18 +408,42 @@ unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
return 2;
}
-unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Opd1Info,
- OperandValueKind Opd2Info) const {
+unsigned AArch64TTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (ISD == ISD::SDIV &&
+ Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ // On AArch64, scalar signed division by constants power-of-two are
+ // normally expanded to the sequence ADD + CMP + SELECT + SRA.
+ // The OperandValue properties many not be same as that of previous
+ // operation; conservatively assume OP_None.
+ unsigned Cost =
+ getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ return Cost;
+ }
+
switch (ISD) {
default:
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
- Opd2Info);
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
@@ -498,3 +530,27 @@ unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
+
+unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const {
+ unsigned Cost = 0;
+ for (auto *I : Tys) {
+ if (!I->isVectorTy())
+ continue;
+ if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
+ Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
+ getMemoryOpCost(Instruction::Load, I, 128, 0);
+ }
+ return Cost;
+}
+
+unsigned AArch64TTI::getMaxInterleaveFactor() const {
+ if (ST->isCortexA57())
+ return 4;
+ return 2;
+}
+
+void AArch64TTI::getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const {
+ // Disable partial & runtime unrolling on -Os.
+ UP.PartialOptSizeThreshold = 0;
+}
diff --git a/lib/Target/AArch64/Android.mk b/lib/Target/AArch64/Android.mk
index d7b3317..f3acd3a 100644
--- a/lib/Target/AArch64/Android.mk
+++ b/lib/Target/AArch64/Android.mk
@@ -24,6 +24,7 @@ aarch64_codegen_SRC_FILES := \
AArch64CleanupLocalDynamicTLSPass.cpp \
AArch64CollectLOH.cpp \
AArch64ConditionalCompares.cpp \
+ AArch64ConditionOptimizer.cpp \
AArch64DeadRegisterDefinitionsPass.cpp \
AArch64ExpandPseudoInsts.cpp \
AArch64FastISel.cpp \
@@ -33,6 +34,7 @@ aarch64_codegen_SRC_FILES := \
AArch64ISelLowering.cpp \
AArch64LoadStoreOptimizer.cpp \
AArch64MCInstLower.cpp \
+ AArch64PBQPRegAlloc.cpp \
AArch64PromoteConstant.cpp \
AArch64RegisterInfo.cpp \
AArch64SelectionDAGInfo.cpp \
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c42d11e..98e0ea8 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -10,26 +10,28 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
#include <cstdio>
using namespace llvm;
@@ -41,7 +43,6 @@ class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
// Map of register aliases registers via the .req directive.
StringMap<std::pair<bool, unsigned> > RegisterReqs;
@@ -51,10 +52,7 @@ private:
return static_cast<AArch64TargetStreamer &>(TS);
}
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
- SMLoc getLoc() const { return Parser.getTok().getLoc(); }
+ SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
@@ -68,11 +66,13 @@ private:
bool parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode);
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
- bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
bool showMatchError(SMLoc Loc, unsigned ErrCode);
bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveInst(SMLoc L);
+
bool parseDirectiveTLSDescCall(SMLoc L);
bool parseDirectiveLOH(StringRef LOH, SMLoc L);
@@ -84,7 +84,7 @@ private:
bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
/// @name Auto-generated Match Functions
/// {
@@ -116,10 +116,11 @@ public:
AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ : MCTargetAsmParser(), STI(_STI) {
MCAsmParserExtension::Initialize(_Parser);
- if (Parser.getStreamer().getTargetStreamer() == nullptr)
- new AArch64TargetStreamer(Parser.getStreamer());
+ MCStreamer &S = getParser().getStreamer();
+ if (S.getTargetStreamer() == nullptr)
+ new AArch64TargetStreamer(S);
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -1253,134 +1254,116 @@ public:
void addSImm9Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4));
}
void addSImm7s8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8));
}
void addSImm7s16Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16));
}
void addImm0_7Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm1_8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm0_15Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm1_16Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
assert(MCE && "Invalid constant immediate operand!");
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm0_31Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm1_31Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm1_32Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm0_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm1_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm1_64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm0_127Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm0_255Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addImm32_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
void addLogicalImm32Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid logical immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
uint64_t encoding =
AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32);
Inst.addOperand(MCOperand::CreateImm(encoding));
@@ -1388,8 +1371,7 @@ public:
void addLogicalImm64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid logical immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
Inst.addOperand(MCOperand::CreateImm(encoding));
}
@@ -1412,8 +1394,7 @@ public:
void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid immediate operand!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
Inst.addOperand(MCOperand::CreateImm(encoding));
}
@@ -1894,6 +1875,7 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
/// Identifier when called, and if it is a register name the token is eaten and
/// the register is added to the operand list.
int AArch64AsmParser::tryParseRegister() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
@@ -1918,6 +1900,7 @@ int AArch64AsmParser::tryParseRegister() {
/// tryMatchVectorRegister - Try to parse a vector register name with optional
/// kind specifier. If it is a register specifier, eat the token and return it.
int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
TokError("vector register expected");
return -1;
@@ -1950,6 +1933,7 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
@@ -1979,6 +1963,7 @@ AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
/// tryParsePrefetch - Try to parse a prefetch operand.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const AsmToken &Tok = Parser.getTok();
// Either an identifier for named values or a 5-bit immediate.
@@ -2026,6 +2011,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
/// instruction.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const MCExpr *Expr;
@@ -2076,6 +2062,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
/// instruction.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const MCExpr *Expr;
@@ -2095,6 +2082,7 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
/// tryParseFPImm - A floating point immediate expression operand.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
bool Hash = false;
@@ -2157,6 +2145,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
/// tryParseAddSubImm - Parse ADD/SUB shifted immediate operand
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
if (Parser.getTok().is(AsmToken::Hash))
@@ -2248,6 +2237,7 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
/// parseCondCode - Parse a Condition Code operand.
bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
bool invertCondCode) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
@@ -2273,6 +2263,7 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
/// them if present.
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
std::string LowerID = Tok.getString().lower();
AArch64_AM::ShiftExtendType ShOp =
@@ -2318,10 +2309,11 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
if (Hash)
Parser.Lex(); // Eat the '#'.
- // Make sure we do actually have a number
- if (!Parser.getTok().is(AsmToken::Integer)) {
- Error(Parser.getTok().getLoc(),
- "expected integer shift amount");
+ // Make sure we do actually have a number or a parenthesized expression.
+ SMLoc E = Parser.getTok().getLoc();
+ if (!Parser.getTok().is(AsmToken::Integer) &&
+ !Parser.getTok().is(AsmToken::LParen)) {
+ Error(E, "expected integer shift amount");
return MatchOperand_ParseFail;
}
@@ -2331,11 +2323,11 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
if (!MCE) {
- TokError("expected #imm after shift specifier");
+ Error(E, "expected constant '#imm' after shift specifier");
return MatchOperand_ParseFail;
}
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
Operands.push_back(AArch64Operand::CreateShiftExtend(
ShOp, MCE->getValue(), true, S, E, getContext()));
return MatchOperand_Success;
@@ -2352,6 +2344,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
Operands.push_back(
AArch64Operand::CreateToken("sys", false, NameLoc, getContext()));
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
@@ -2590,6 +2583,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
// Can be either a #imm style literal or an option name
@@ -2643,6 +2637,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -2657,6 +2652,7 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
/// tryParseVectorRegister - Parse a vector register operand.
bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier))
return true;
@@ -2705,6 +2701,7 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
/// parseRegister - Parse a non-vector register operand.
bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
// Try for a vector register.
if (!tryParseVectorRegister(Operands))
@@ -2747,6 +2744,7 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
}
bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
+ MCAsmParser &Parser = getParser();
bool HasELFModifier = false;
AArch64MCExpr::VariantKind RefKind;
@@ -2825,6 +2823,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
/// parseVectorList - Parse a vector list operand for AdvSIMD instructions.
bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket");
SMLoc S = getLoc();
Parser.Lex(); // Eat left bracket token.
@@ -2923,6 +2922,7 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
AArch64AsmParser::OperandMatchResultTy
AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -2968,6 +2968,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
/// operand regardless of the mnemonic.
bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode) {
+ MCAsmParser &Parser = getParser();
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -3087,13 +3088,18 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (getParser().parseExpression(SubExprVal))
return true;
+ if (Operands.size() < 2 ||
+ !static_cast<AArch64Operand &>(*Operands[1]).isReg())
+ return true;
+
+ bool IsXReg =
+ AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Operands[1]->getReg());
+
MCContext& Ctx = getContext();
E = SMLoc::getFromPointer(Loc.getPointer() - 1);
// If the op is an imm and can be fit into a mov, then replace ldr with mov.
- if (isa<MCConstantExpr>(SubExprVal) && Operands.size() >= 2 &&
- static_cast<AArch64Operand &>(*Operands[1]).isReg()) {
- bool IsXReg = AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Operands[1]->getReg());
+ if (isa<MCConstantExpr>(SubExprVal)) {
uint64_t Imm = (cast<MCConstantExpr>(SubExprVal))->getValue();
uint32_t ShiftAmt = 0, MaxShiftAmt = IsXReg ? 48 : 16;
while(Imm > 0xFFFF && countTrailingZeros(Imm) >= 16) {
@@ -3109,9 +3115,14 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
ShiftAmt, true, S, E, Ctx));
return false;
}
+ APInt Simm = APInt(64, Imm << ShiftAmt);
+ // check if the immediate is an unsigned or signed 32-bit int for W regs
+ if (!IsXReg && !(Simm.isIntN(32) || Simm.isSignedIntN(32)))
+ return Error(Loc, "Immediate too large for register");
}
// If it is a label or an imm that cannot fit in a movz, put it into CP.
- const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+ const MCExpr *CPLoc =
+ getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4);
Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx));
return false;
}
@@ -3123,6 +3134,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
Name = StringSwitch<StringRef>(Name.lower())
.Case("beq", "b.eq")
.Case("bne", "b.ne")
@@ -3571,12 +3583,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
}
}
-static const char *getSubtargetFeatureName(unsigned Val);
+static const char *getSubtargetFeatureName(uint64_t Val);
bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[0]);
@@ -3826,7 +3838,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Special case the error message for the very common case where only
// a single subtarget feature is missing (neon, e.g.).
std::string Msg = "instruction requires:";
- unsigned Mask = 1;
+ uint64_t Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
if (ErrorInfo & Mask) {
Msg += " ";
@@ -3840,7 +3852,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return showMatchError(IDLoc, MatchResult);
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -3920,6 +3932,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
/// ParseDirective parses the arm specific directives
bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
+ const MCObjectFileInfo::Environment Format =
+ getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+ bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
+
StringRef IDVal = DirectiveID.getIdentifier();
SMLoc Loc = DirectiveID.getLoc();
if (IDVal == ".hword")
@@ -3935,12 +3952,18 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".unreq")
return parseDirectiveUnreq(DirectiveID.getLoc());
+ if (!IsMachO && !IsCOFF) {
+ if (IDVal == ".inst")
+ return parseDirectiveInst(Loc);
+ }
+
return parseDirectiveLOH(IDVal, Loc);
}
/// parseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -3963,6 +3986,47 @@ bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// parseDirectiveInst
+/// ::= .inst opcode [, ...]
+bool AArch64AsmParser::parseDirectiveInst(SMLoc Loc) {
+ MCAsmParser &Parser = getParser();
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ Parser.eatToEndOfStatement();
+ Error(Loc, "expected expression following directive");
+ return false;
+ }
+
+ for (;;) {
+ const MCExpr *Expr;
+
+ if (getParser().parseExpression(Expr)) {
+ Error(Loc, "expected expression");
+ return false;
+ }
+
+ const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
+ if (!Value) {
+ Error(Loc, "expected constant expression");
+ return false;
+ }
+
+ getTargetStreamer().emitInst(Value->getValue());
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(Loc, "unexpected token in directive");
+ return false;
+ }
+
+ Parser.Lex(); // Eat comma.
+ }
+
+ Parser.Lex();
+ return false;
+}
+
// parseDirectiveTLSDescCall:
// ::= .tlsdesccall symbol
bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
@@ -3994,10 +4058,9 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
// We successfully get a numeric value for the identifier.
// Check if it is valid.
int64_t Id = getParser().getTok().getIntVal();
- Kind = (MCLOHType)Id;
- // Check that Id does not overflow MCLOHType.
- if (!isValidMCLOHType(Kind) || Id != Kind)
+ if (Id <= -1U && !isValidMCLOHType(Id))
return TokError("invalid numeric identifier in directive");
+ Kind = (MCLOHType)Id;
} else {
StringRef Name = getTok().getIdentifier();
// We successfully parse an identifier.
@@ -4045,6 +4108,7 @@ bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
/// parseDirectiveReq
/// ::= name .req registername
bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat the '.req' token.
SMLoc SRegLoc = getLoc();
unsigned RegNum = tryParseRegister();
@@ -4076,7 +4140,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Consume the EndOfStatement
auto pair = std::make_pair(IsVector, RegNum);
- if (RegisterReqs.GetOrCreateValue(Name, pair).getValue() != pair)
+ if (!RegisterReqs.insert(std::make_pair(Name, pair)).second)
Warning(L, "ignoring redefinition of register alias '" + Name + "'");
return true;
@@ -4085,6 +4149,7 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// parseDirectiveUneq
/// ::= .unreq registername
bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive.");
Parser.eatToEndOfStatement();
@@ -4149,9 +4214,7 @@ AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
extern "C" void LLVMInitializeAArch64AsmParser() {
RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64leTarget);
RegisterMCAsmParser<AArch64AsmParser> Y(TheAArch64beTarget);
-
- RegisterMCAsmParser<AArch64AsmParser> Z(TheARM64leTarget);
- RegisterMCAsmParser<AArch64AsmParser> W(TheARM64beTarget);
+ RegisterMCAsmParser<AArch64AsmParser> Z(TheARM64Target);
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index c2f0488..f26327f 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -2,7 +2,7 @@ set(LLVM_TARGET_DEFINITIONS AArch64.td)
tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
@@ -28,12 +28,14 @@ add_llvm_target(AArch64CodeGen
AArch64FastISel.cpp
AArch64A53Fix835769.cpp
AArch64FrameLowering.cpp
+ AArch64ConditionOptimizer.cpp
AArch64ISelDAGToDAG.cpp
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
AArch64LoadStoreOptimizer.cpp
AArch64MCInstLower.cpp
AArch64PromoteConstant.cpp
+ AArch64PBQPRegAlloc.cpp
AArch64RegisterInfo.cpp
AArch64SelectionDAGInfo.cpp
AArch64StorePairSuppress.cpp
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 6de27d6..878e29c 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -15,12 +15,11 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -200,26 +199,24 @@ static MCDisassembler *createAArch64Disassembler(const Target &T,
}
DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
- CommentStream = &cs;
-
- uint8_t bytes[4];
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
Size = 0;
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1)
+ if (Bytes.size() < 4)
return Fail;
Size = 4;
// Encoded as a small-endian 32-bit word in the stream.
- uint32_t insn =
- (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0);
+ uint32_t Insn =
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
// Calling the auto-generated decoder function.
- return decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
+ return decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
}
static MCSymbolizer *
@@ -243,13 +240,9 @@ extern "C" void LLVMInitializeAArch64Disassembler() {
TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget,
createAArch64ExternalSymbolizer);
- TargetRegistry::RegisterMCDisassembler(TheARM64leTarget,
- createAArch64Disassembler);
- TargetRegistry::RegisterMCDisassembler(TheARM64beTarget,
+ TargetRegistry::RegisterMCDisassembler(TheARM64Target,
createAArch64Disassembler);
- TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget,
- createAArch64ExternalSymbolizer);
- TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget,
+ TargetRegistry::RegisterMCSymbolizer(TheARM64Target,
createAArch64ExternalSymbolizer);
}
@@ -592,7 +585,7 @@ static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
uint64_t Addr,
const void *Decoder) {
// scale{5} is asserted as 1 in tblgen.
- Imm |= 0x20;
+ Imm |= 0x20;
Inst.addOperand(MCOperand::CreateImm(64 - Imm));
return Success;
}
@@ -614,7 +607,7 @@ static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
if (ImmVal & (1 << (19 - 1)))
ImmVal |= ~((1LL << 19) - 1);
- if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr,
+ if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal * 4, Addr,
Inst.getOpcode() != AArch64::LDRXl, 0, 4))
Inst.addOperand(MCOperand::CreateImm(ImmVal));
return Success;
@@ -630,35 +623,19 @@ static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
- const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
-
- Imm |= 0x8000;
Inst.addOperand(MCOperand::CreateImm(Imm));
- bool ValidNamed;
- (void)AArch64SysReg::MRSMapper(STI.getFeatureBits())
- .toString(Imm, ValidNamed);
-
- return ValidNamed ? Success : Fail;
+ // Every system register in the encoding space is valid with the syntax
+ // S<op0>_<op1>_<Cn>_<Cm>_<op2>, so decoding system registers always succeeds.
+ return Success;
}
static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- const AArch64Disassembler *Dis =
- static_cast<const AArch64Disassembler *>(Decoder);
- const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
-
- Imm |= 0x8000;
Inst.addOperand(MCOperand::CreateImm(Imm));
- bool ValidNamed;
- (void)AArch64SysReg::MSRMapper(STI.getFeatureBits())
- .toString(Imm, ValidNamed);
-
- return ValidNamed ? Success : Fail;
+ return Success;
}
static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
@@ -1510,7 +1487,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
if (imm & (1 << (26 - 1)))
imm |= ~((1LL << 26) - 1);
- if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4))
+ if (!Dis->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
Inst.addOperand(MCOperand::CreateImm(imm));
return Success;
@@ -1530,7 +1507,7 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
bool ValidNamed;
(void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
-
+
return ValidNamed ? Success : Fail;
}
@@ -1552,7 +1529,7 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
else
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
Inst.addOperand(MCOperand::CreateImm(bit));
- if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4))
+ if (!Dis->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
Inst.addOperand(MCOperand::CreateImm(dst));
return Success;
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 68d4867..7fb57ad 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64DISASSEMBLER_H
-#define AArch64DISASSEMBLER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
+#define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
#include "llvm/MC/MCDisassembler.h"
@@ -28,11 +28,10 @@ public:
~AArch64Disassembler() {}
- /// getInstruction - See MCDisassembler.
MCDisassembler::DecodeStatus
- getInstruction(MCInst &instr, uint64_t &size, const MemoryObject &region,
- uint64_t address, raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // namespace llvm
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
index 171d31c..12b8450 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64EXTERNALSYMBOLIZER_H
-#define AArch64EXTERNALSYMBOLIZER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64EXTERNALSYMBOLIZER_H
+#define LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64EXTERNALSYMBOLIZER_H
#include "llvm/MC/MCExternalSymbolizer.h"
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
index a4224f4..62827e8 100644
--- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = AArch64Disassembler
parent = AArch64
-required_libraries = AArch64Info AArch64Utils MC Support
+required_libraries = AArch64Info AArch64Utils MC MCDisassembler Support
add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 8a21f06..46a1d79 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -16,8 +16,8 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -1223,7 +1223,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- O << "#" << (Op.getImm() << 2);
+ O << "#" << (Op.getImm() * 4);
return;
}
@@ -1247,7 +1247,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- O << "#" << (Op.getImm() << 12);
+ O << "#" << (Op.getImm() * (1 << 12));
return;
}
@@ -1276,24 +1276,20 @@ void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- bool Valid;
auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val, Valid);
+ std::string Name = Mapper.toString(Val);
- if (Valid)
- O << StringRef(Name).upper();
+ O << StringRef(Name).upper();
}
void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- bool Valid;
auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val, Valid);
+ std::string Name = Mapper.toString(Val);
- if (Valid)
- O << StringRef(Name).upper();
+ O << StringRef(Name).upper();
}
void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index fe7666e..5f51621 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64INSTPRINTER_H
-#define AArch64INSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
+#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
@@ -127,8 +127,9 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O) override;
bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
- virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx,
+ raw_ostream &O) override;
StringRef getRegName(unsigned RegNo) const override {
return getRegisterName(RegNo);
}
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
index 642c183..573fa10 100644
--- a/lib/Target/AArch64/LLVMBuild.txt
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = AArch64CodeGen
parent = AArch64
-required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 8b1e44e..1dc506a 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
-#define LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -210,67 +210,62 @@ static inline uint64_t ror(uint64_t elt, unsigned size) {
/// as the immediate operand of a logical instruction for the given register
/// size. If so, return true with "encoding" set to the encoded value in
/// the form N:immr:imms.
-static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize,
- uint64_t &encoding) {
- if (imm == 0ULL || imm == ~0ULL ||
- (regSize != 64 && (imm >> regSize != 0 || imm == ~0U)))
+static inline bool processLogicalImmediate(uint64_t Imm, unsigned RegSize,
+ uint64_t &Encoding) {
+ if (Imm == 0ULL || Imm == ~0ULL ||
+ (RegSize != 64 && (Imm >> RegSize != 0 || Imm == ~0U)))
return false;
- unsigned size = 2;
- uint64_t eltVal = imm;
-
// First, determine the element size.
- while (size < regSize) {
- unsigned numElts = regSize / size;
- unsigned mask = (1ULL << size) - 1;
- uint64_t lowestEltVal = imm & mask;
-
- bool allMatched = true;
- for (unsigned i = 1; i < numElts; ++i) {
- uint64_t currEltVal = (imm >> (i*size)) & mask;
- if (currEltVal != lowestEltVal) {
- allMatched = false;
- break;
- }
- }
+ unsigned Size = RegSize;
+
+ do {
+ Size /= 2;
+ uint64_t Mask = (1ULL << Size) - 1;
- if (allMatched) {
- eltVal = lowestEltVal;
+ if ((Imm & Mask) != ((Imm >> Size) & Mask)) {
+ Size *= 2;
break;
}
-
- size *= 2;
- }
+ } while (Size > 2);
// Second, determine the rotation to make the element be: 0^m 1^n.
- for (unsigned i = 0; i < size; ++i) {
- eltVal = ror(eltVal, size);
- uint32_t clz = countLeadingZeros(eltVal) - (64 - size);
- uint32_t cto = CountTrailingOnes_64(eltVal);
-
- if (clz + cto == size) {
- // Encode in immr the number of RORs it would take to get *from* this
- // element value to our target value, where i+1 is the number of RORs
- // to go the opposite direction.
- unsigned immr = size - (i + 1);
-
- // If size has a 1 in the n'th bit, create a value that has zeroes in
- // bits [0, n] and ones above that.
- uint64_t nimms = ~(size-1) << 1;
-
- // Or the CTO value into the low bits, which must be below the Nth bit
- // bit mentioned above.
- nimms |= (cto-1);
-
- // Extract the seventh bit and toggle it to create the N field.
- unsigned N = ((nimms >> 6) & 1) ^ 1;
-
- encoding = (N << 12) | (immr << 6) | (nimms & 0x3f);
- return true;
- }
+ uint32_t CTO, I;
+ uint64_t Mask = ((uint64_t)-1LL) >> (64 - Size);
+ Imm &= Mask;
+
+ if (isShiftedMask_64(Imm)) {
+ I = countTrailingZeros(Imm);
+ CTO = CountTrailingOnes_64(Imm >> I);
+ } else {
+ Imm |= ~Mask;
+ if (!isShiftedMask_64(~Imm))
+ return false;
+
+ unsigned CLO = CountLeadingOnes_64(Imm);
+ I = 64 - CLO;
+ CTO = CLO + CountTrailingOnes_64(Imm) - (64 - Size);
}
- return false;
+ // Encode in Immr the number of RORs it would take to get *from* 0^m 1^n
+ // to our target value, where i is the number of RORs to go the opposite
+ // direction.
+ assert(Size > I && "I should be smaller than element Size");
+ unsigned Immr = (Size - I) & (Size - 1);
+
+ // If size has a 1 in the n'th bit, create a value that has zeroes in
+ // bits [0, n] and ones above that.
+ uint64_t NImms = ~(Size-1) << 1;
+
+ // Or the CTO value into the low bits, which must be below the Nth bit
+ // bit mentioned above.
+ NImms |= (CTO-1);
+
+ // Extract the seventh bit and toggle it to create the N field.
+ unsigned N = ((NImms >> 6) & 1) ^ 1;
+
+ Encoding = (N << 12) | (Immr << 6) | (NImms & 0x3f);
+ return true;
}
/// isLogicalImmediate - Return true if the immediate is valid for a logical
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index a917616..0bc2f77 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -14,9 +14,10 @@
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -534,8 +535,8 @@ void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
// store fixups in .eh_frame section in big endian order
if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
- const MCSectionELF *SecELF = static_cast<const MCSectionELF *>(Sec);
- if (SecELF->getSectionName() == ".eh_frame")
+ const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec);
+ if (SecELF && SecELF->getSectionName() == ".eh_frame")
Value = ByteSwap_32(unsigned(Value));
}
AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
@@ -551,7 +552,8 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
return new DarwinAArch64AsmBackend(T, MRI);
assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
- return new ELFAArch64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true);
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true);
}
MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
@@ -561,6 +563,7 @@ MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
assert(TheTriple.isOSBinFormatELF() &&
"Big endian is only supported for ELF targets!");
- return new ELFAArch64AsmBackend(T, TheTriple.getOS(),
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+ return new ELFAArch64AsmBackend(T, OSABI,
/*IsLittleEndian=*/false);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index a79406d..60e9c19 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -15,8 +15,10 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -34,12 +36,42 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
+class AArch64ELFStreamer;
+
+class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
+ formatted_raw_ostream &OS;
+
+ void emitInst(uint32_t Inst) override;
+
+public:
+ AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+};
+
+AArch64TargetAsmStreamer::AArch64TargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS)
+ : AArch64TargetStreamer(S), OS(OS) {}
+
+void AArch64TargetAsmStreamer::emitInst(uint32_t Inst) {
+ OS << "\t.inst\t0x" << utohexstr(Inst) << "\n";
+}
+
+class AArch64TargetELFStreamer : public AArch64TargetStreamer {
+private:
+ AArch64ELFStreamer &getStreamer();
+
+ void emitInst(uint32_t Inst) override;
+
+public:
+ AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
+};
+
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// AArch64 ELF ABI:
@@ -55,6 +87,8 @@ namespace {
/// by MachO. Beware!
class AArch64ELFStreamer : public MCELFStreamer {
public:
+ friend class AArch64TargetELFStreamer;
+
AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter)
: MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
@@ -82,6 +116,18 @@ public:
MCELFStreamer::EmitInstruction(Inst, STI);
}
+ void emitInst(uint32_t Inst) {
+ char Buffer[4];
+ const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian();
+
+ EmitA64MappingSymbol();
+ for (unsigned II = 0; II != 4; ++II) {
+ const unsigned I = LittleEndian ? (4 - II - 1) : II;
+ Buffer[4 - II - 1] = uint8_t(Inst >> I * CHAR_BIT);
+ }
+ MCELFStreamer::EmitBytes(StringRef(Buffer, 4));
+ }
+
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
@@ -144,17 +190,35 @@ private:
/// @}
};
+} // end anonymous namespace
+
+AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
+ return static_cast<AArch64ELFStreamer &>(Streamer);
+}
+
+void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
+ getStreamer().emitInst(Inst);
}
namespace llvm {
+MCStreamer *
+createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
+ new AArch64TargetAsmStreamer(*S, OS);
+ return S;
+}
+
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack) {
+ bool RelaxAll) {
AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+ new AArch64TargetELFStreamer(*S);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
return S;
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index bc6973b..71b05cc 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64_ELF_STREAMER_H
-#define LLVM_AARCH64_ELF_STREAMER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ELFSTREAMER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ELFSTREAMER_H
#include "llvm/MC/MCELFStreamer.h"
@@ -20,7 +20,7 @@ namespace llvm {
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack);
+ bool RelaxAll);
}
-#endif // AArch64_ELF_STREAMER_H
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index bf405fb..0f5b765 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AArch64FIXUPKINDS_H
-#define LLVM_AArch64FIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64FIXUPKINDS_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64FIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 1763b40..70b9329 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -13,8 +13,8 @@
#include "AArch64MCAsmInfo.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -66,7 +66,7 @@ const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
Triple T(TT);
- if (T.getArch() == Triple::arm64_be || T.getArch() == Triple::aarch64_be)
+ if (T.getArch() == Triple::aarch64_be)
IsLittleEndian = false;
// We prefer NEON instructions to be printed in the short form.
@@ -89,7 +89,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
WeakRefDirective = "\t.weak\t";
- HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 42a031d..5d03c21 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64TARGETASMINFO_H
-#define AArch64TARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index f051357..c306b11 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -15,13 +15,13 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 42a6787..e396df8 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -90,8 +90,9 @@ const MCSection *AArch64MCExpr::FindAssociatedSection() const {
}
bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup))
return false;
Res =
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index 5422f9d..db48ac9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AArch64MCEXPR_H
-#define LLVM_AArch64MCEXPR_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCEXPR_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCEXPR_H
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
@@ -152,7 +152,8 @@ public:
const MCSection *FindAssociatedSection() const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index ae698c5..0f7a6b8 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -126,15 +126,14 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack) {
+ const MCSubtargetInfo &STI, bool RelaxAll) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
/*LabelSections*/ true);
- return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
}
// Force static initialization.
@@ -142,17 +141,14 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Z(TheARM64leTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn W(TheARM64beTarget, createAArch64MCAsmInfo);
+ RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
createAArch64MCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
createAArch64MCCodeGenInfo);
// Register the MC instruction info.
@@ -160,9 +156,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64MCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
createAArch64MCInstrInfo);
// Register the MC register info.
@@ -170,9 +164,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64MCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64leTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCRegInfo(TheARM64Target,
createAArch64MCRegisterInfo);
// Register the MC subtarget info.
@@ -180,9 +172,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64MCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget,
+ TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
createAArch64MCSubtargetInfo);
// Register the asm backend.
@@ -190,19 +180,15 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createAArch64leAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
createAArch64beAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget,
+ TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
createAArch64leAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget,
- createAArch64beAsmBackend);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
createAArch64MCCodeEmitter);
TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget,
+ TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
createAArch64MCCodeEmitter);
// Register the object streamer.
@@ -210,16 +196,21 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget,
+ createAArch64MCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget,
+ createAArch64MCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheARM64Target,
+ createAArch64MCAsmStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
createAArch64MCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget,
+ TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
createAArch64MCInstPrinter);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index d886ea2..1553115 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -11,19 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64MCTARGETDESC_H
-#define AArch64MCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
#include <string>
namespace llvm {
+class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
+class MCInstPrinter;
class MCRegisterInfo;
class MCObjectWriter;
+class MCStreamer;
class MCSubtargetInfo;
class StringRef;
class Target;
@@ -31,8 +34,7 @@ class raw_ostream;
extern Target TheAArch64leTarget;
extern Target TheAArch64beTarget;
-extern Target TheARM64leTarget;
-extern Target TheARM64beTarget;
+extern Target TheARM64Target;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -51,6 +53,11 @@ MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
uint32_t CPUSubtype);
+MCStreamer *
+createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst);
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index ba95366..e12a24b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -9,15 +9,15 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -288,7 +288,8 @@ void AArch64MachObjectWriter::RecordRelocation(
// FIXME: Will the Target we already have ever have any data in it
// we need to preserve and merge with the new Target? How about
// the FixedValue?
- if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout))
+ if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout,
+ &Fixup))
Asm.getContext().FatalError(Fixup.getLoc(),
"unable to resolve variable '" +
Symbol->getName() + "'");
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index f9aeb35..e3112fa 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -28,8 +28,9 @@ AArch64TargetStreamer::~AArch64TargetStreamer() {}
// The constant pool handling is shared by all AArch64TargetStreamer
// implementations.
-const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
- return ConstantPools->addEntry(Streamer, Expr);
+const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr,
+ unsigned Size) {
+ return ConstantPools->addEntry(Streamer, Expr, Size);
}
void AArch64TargetStreamer::emitCurrentConstantPool() {
@@ -38,3 +39,5 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+
+void AArch64TargetStreamer::emitInst(uint32_t Inst) {}
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 3a382c1..f42ecb1 100644
--- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -14,18 +14,19 @@ using namespace llvm;
namespace llvm {
Target TheAArch64leTarget;
Target TheAArch64beTarget;
-Target TheARM64leTarget;
-Target TheARM64beTarget;
+Target TheARM64Target;
} // end namespace llvm
extern "C" void LLVMInitializeAArch64TargetInfo() {
- RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64leTarget, "arm64",
- "AArch64 (little endian)");
- RegisterTarget<Triple::arm64_be, /*HasJIT=*/true> Y(TheARM64beTarget, "arm64_be",
- "AArch64 (big endian)");
+ // Now register the "arm64" name for use with "-march". We don't want it to
+ // take possession of the Triple::aarch64 tag though.
+ TargetRegistry::RegisterTarget(TheARM64Target, "arm64",
+ "ARM64 (little endian)",
+ [](Triple::ArchType) { return false; }, true);
RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
TheAArch64leTarget, "aarch64", "AArch64 (little endian)");
RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
TheAArch64beTarget, "aarch64_be", "AArch64 (big endian)");
+
}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 3c24bb3..bc6c7a9 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -791,22 +791,22 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
}
- // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
- // are: 11 xxx 1x11 xxxx xxx
- Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+ // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name
+ Regex GenericRegPattern("^s([0-3])_([0-7])_c([0-9]|1[0-5])_c([0-9]|1[0-5])_([0-7])$");
- SmallVector<StringRef, 4> Ops;
+ SmallVector<StringRef, 5> Ops;
if (!GenericRegPattern.match(NameLower, &Ops)) {
Valid = false;
return -1;
}
- uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+ uint32_t Op0 = 0, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
uint32_t Bits;
- Ops[1].getAsInteger(10, Op1);
- Ops[2].getAsInteger(10, CRn);
- Ops[3].getAsInteger(10, CRm);
- Ops[4].getAsInteger(10, Op2);
+ Ops[1].getAsInteger(10, Op0);
+ Ops[2].getAsInteger(10, Op1);
+ Ops[3].getAsInteger(10, CRn);
+ Ops[4].getAsInteger(10, CRm);
+ Ops[5].getAsInteger(10, Op2);
Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
Valid = true;
@@ -814,11 +814,10 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
// First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
if (SysRegPairs[i].Value == Bits) {
- Valid = true;
return SysRegPairs[i].Name;
}
}
@@ -827,7 +826,6 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
if (FeatureBits & AArch64::ProcCyclone) {
for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
if (CycloneSysRegPairs[i].Value == Bits) {
- Valid = true;
return CycloneSysRegPairs[i].Name;
}
}
@@ -837,28 +835,18 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
// write-only).
for (unsigned i = 0; i < NumInstPairs; ++i) {
if (InstPairs[i].Value == Bits) {
- Valid = true;
return InstPairs[i].Name;
}
}
+ assert(Bits < 0x10000);
uint32_t Op0 = (Bits >> 14) & 0x3;
uint32_t Op1 = (Bits >> 11) & 0x7;
uint32_t CRn = (Bits >> 7) & 0xf;
uint32_t CRm = (Bits >> 3) & 0xf;
uint32_t Op2 = Bits & 0x7;
- // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
- // name.
- if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
- Valid = false;
- return "";
- }
-
- assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
-
- Valid = true;
- return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+ return "s" + utostr(Op0)+ "_" + utostr(Op1) + "_c" + utostr(CRn)
+ "_c" + utostr(CRm) + "_" + utostr(Op2);
}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 9d2ce21..c60b09a 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AArch64BASEINFO_H
-#define AArch64BASEINFO_H
+#ifndef LLVM_LIB_TARGET_AARCH64_UTILS_AARCH64BASEINFO_H
+#define LLVM_LIB_TARGET_AARCH64_UTILS_AARCH64BASEINFO_H
// FIXME: Is it easiest to fix this layering violation by moving the .inc
// #includes from AArch64MCTargetDesc.h to here?
@@ -1143,7 +1143,7 @@ namespace AArch64SysReg {
SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
uint32_t fromString(StringRef Name, bool &Valid) const;
- std::string toString(uint32_t Bits, bool &Valid) const;
+ std::string toString(uint32_t Bits) const;
};
struct MSRMapper : SysRegMapper {
@@ -1271,7 +1271,12 @@ namespace AArch64II {
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20
+ MO_TLS = 0x20,
+
+ /// MO_CONSTPOOL - This flag indicates that a symbol operand represents
+ /// the address of a constant pool entry for the symbol, rather than the
+ /// address of the symbol itself.
+ MO_CONSTPOOL = 0x40
};
} // end namespace AArch64II
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 92eaf9e..387f1f6 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -34,6 +34,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
#include <set>
using namespace llvm;
@@ -676,8 +678,8 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
}
bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
- TRI = Fn.getTarget().getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 55df29c..02db53a 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_ARM_H
-#define TARGET_ARM_H
+#ifndef LLVM_LIB_TARGET_ARM_ARM_H
+#define LLVM_LIB_TARGET_ARM_ARM_H
#include "llvm/Support/CodeGen.h"
@@ -23,7 +23,6 @@ class ARMAsmPrinter;
class ARMBaseTargetMachine;
class FunctionPass;
class ImmutablePass;
-class JITCodeEmitter;
class MachineInstr;
class MCInst;
class TargetLowering;
@@ -31,10 +30,6 @@ class TargetMachine;
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
-
-FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
- JITCodeEmitter &JCE);
-
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 25385a6..80b976b 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -228,6 +228,15 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
FeatureAvoidPartialCPSR,
FeatureTrustZone, FeatureVirtualization]>;
+def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
+ "Cortex-A17 ARM processors",
+ [FeatureVMLxForwarding,
+ FeatureT2XtPk, FeatureVFP4,
+ FeatureHWDiv, FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization,
+ FeatureTrustZone]>;
+
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
"Cortex-A53 ARM processors",
[FeatureHWDiv, FeatureHWDivARM,
@@ -351,12 +360,8 @@ def : ProcessorModel<"cortex-a8", CortexA8Model,
FeatureAClass]>;
def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
+ FeatureDSPThumb2, FeatureHasRAS, FeatureMP,
FeatureAClass]>;
-def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
- [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
// FIXME: A12 has currently the same Schedule model as A9
def : ProcessorModel<"cortex-a12", CortexA9Model,
@@ -370,6 +375,12 @@ def : ProcessorModel<"cortex-a15", CortexA9Model,
FeatureDSPThumb2, FeatureHasRAS,
FeatureAClass]>;
+// FIXME: A17 has currently the same Schedule model as A9
+def : ProcessorModel<"cortex-a17", CortexA9Model,
+ [ProcA17, HasV7Ops, FeatureNEON, FeatureDB,
+ FeatureDSPThumb2, FeatureMP,
+ FeatureHasRAS, FeatureAClass]>;
+
// FIXME: krait has currently the same Schedule model as A9
def : ProcessorModel<"krait", CortexA9Model,
[ProcKrait, HasV7Ops,
@@ -396,6 +407,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureD16,
FeatureMClass]>;
+def : ProcNoItin<"cortex-m7", [HasV7Ops,
+ FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv, FeatureDSPThumb2,
+ FeatureT2XtPk, FeatureFPARMv8,
+ FeatureD16, FeatureMClass]>;
+
// Swift uArch Processors.
def : ProcessorModel<"swift", SwiftModel,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 28d2610..695fd4d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -76,7 +76,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
}
void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -136,7 +137,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
assert(!MO.getSubReg() && "Subregs should be eliminated!");
if(ARM::GPRPairRegClass.contains(Reg)) {
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
Reg = TRI->getSubReg(Reg, ARM::gsub_0);
}
O << ARMInstPrinter::getRegisterName(Reg);
@@ -182,7 +183,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid << '_' << uid2;
@@ -191,7 +192,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
<< getFunctionNumber();
@@ -229,7 +230,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
case 'y': // Print a VFP single precision register as indexed double.
if (MI->getOperand(OpNum).isReg()) {
unsigned Reg = MI->getOperand(OpNum).getReg();
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Find the 'd' register that has this 's' register as a sub-register,
// and determine the lane number.
for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
@@ -261,7 +262,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// inline asm statement.
O << "{";
if (ARM::GPRPairRegClass.contains(RegBegin)) {
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
O << ARMInstPrinter::getRegisterName(Reg0) << ", ";
RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
@@ -317,7 +318,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
if (!MO.isReg())
return true;
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ?
ARM::gsub_0 : ARM::gsub_1);
O << ARMInstPrinter::getRegisterName(Reg);
@@ -343,7 +344,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned Reg = MI->getOperand(OpNum).getReg();
if (!ARM::QPRRegClass.contains(Reg))
return true;
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
ARM::dsub_0 : ARM::dsub_1);
O << ARMInstPrinter::getRegisterName(SubReg);
@@ -358,7 +359,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!MO.isReg())
return true;
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned Reg = MO.getReg();
if(!ARM::GPRPairRegClass.contains(Reg))
return false;
@@ -478,6 +479,9 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
// Emit ARM Build Attributes
if (Subtarget->isTargetELF())
emitAttributes();
+
+ if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb())
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
}
static void
@@ -558,7 +562,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (auto &stub: Stubs) {
OutStreamer.EmitLabel(stub.first);
@@ -663,7 +667,9 @@ void ARMAsmPrinter::emitAttributes() {
ARMBuildAttrs::AllowNeonARMv8);
} else {
if (Subtarget->hasFPARMv8())
- ATS.emitFPU(ARM::FP_ARMV8);
+ // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
+ // FPU, but there are two different names for it depending on the CPU.
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8);
else if (Subtarget->hasVFP4())
ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
else if (Subtarget->hasVFP3())
@@ -700,6 +706,13 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::AllowIEE754);
+ if (Subtarget->allowsUnalignedMem())
+ ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Allowed);
+ else
+ ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
+ ARMBuildAttrs::Not_Allowed);
+
// FIXME: add more flags to ARMBuildAttributes.h
// 8-bytes alignment stuff.
ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1);
@@ -757,6 +770,17 @@ void ARMAsmPrinter::emitAttributes() {
}
}
+ // TODO: We currently only support either reserving the register, or treating
+ // it as another callee-saved register, but not as SB or a TLS pointer; It
+ // would instead be nicer to push this from the frontend as metadata, as we do
+ // for the wchar and enum size tags
+ if (Subtarget->isR9Reserved())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+ ARMBuildAttrs::R9Reserved);
+ else
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+ ARMBuildAttrs::R9IsGPR);
+
if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowTZVirtualization);
@@ -834,8 +858,9 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- const DataLayout *DL = TM.getDataLayout();
- int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
+ int Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -1013,7 +1038,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
MCTargetStreamer &TS = *OutStreamer.getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
const MachineFunction &MF = *MI->getParent()->getParent();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -1151,7 +1176,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1567,6 +1592,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitJumpTable(MI);
return;
}
+ case ARM::SPACE:
+ OutStreamer.EmitZeros(MI->getOperand(1).getImm());
+ return;
case ARM::TRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7c103c6..5ff20ce 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMASMPRINTER_H
-#define ARMASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
+#define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H
#include "ARMSubtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0288db9..7a315c4 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -108,7 +108,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const {
if (usePreRAHazardRecognizer()) {
const InstrItineraryData *II =
- &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
+ static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
@@ -518,6 +518,42 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
return Found;
}
+static bool isCPSRDefined(const MachineInstr *MI) {
+ for (const auto &MO : MI->operands())
+ if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
+ return true;
+ return false;
+}
+
+static bool isEligibleForITBlock(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return true;
+ case ARM::tADC: // ADC (register) T1
+ case ARM::tADDi3: // ADD (immediate) T1
+ case ARM::tADDi8: // ADD (immediate) T2
+ case ARM::tADDrr: // ADD (register) T1
+ case ARM::tAND: // AND (register) T1
+ case ARM::tASRri: // ASR (immediate) T1
+ case ARM::tASRrr: // ASR (register) T1
+ case ARM::tBIC: // BIC (register) T1
+ case ARM::tEOR: // EOR (register) T1
+ case ARM::tLSLri: // LSL (immediate) T1
+ case ARM::tLSLrr: // LSL (register) T1
+ case ARM::tLSRri: // LSR (immediate) T1
+ case ARM::tLSRrr: // LSR (register) T1
+ case ARM::tMUL: // MUL T1
+ case ARM::tMVN: // MVN (register) T1
+ case ARM::tORR: // ORR (register) T1
+ case ARM::tROR: // ROR (register) T1
+ case ARM::tRSB: // RSB (immediate) T1
+ case ARM::tSBC: // SBC (register) T1
+ case ARM::tSUBi3: // SUB (immediate) T1
+ case ARM::tSUBi8: // SUB (immediate) T2
+ case ARM::tSUBrr: // SUB (register) T1
+ return !isCPSRDefined(MI);
+ }
+}
+
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
@@ -525,6 +561,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
+ if (!isEligibleForITBlock(MI))
+ return false;
+
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
@@ -555,16 +594,6 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
}
}
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
-LLVM_ATTRIBUTE_NOINLINE
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
- unsigned JTI);
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
- unsigned JTI) {
- assert(JTI < JT.size());
- return JT[JTI].MBBs.size();
-}
-
/// GetInstSize - Return the size of the specified MachineInstr.
///
unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
@@ -637,7 +666,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// bytes, we can use 16-bit entries instead. Then there won't be an
// alignment issue.
unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
- unsigned NumEntries = getNumJTEntries(JT, JTI);
+ unsigned NumEntries = JT[JTI].MBBs.size();
if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction
@@ -645,6 +674,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
++NumEntries;
return NumEntries * EntrySize + InstSize;
}
+ case ARM::SPACE:
+ return MI->getOperand(1).getImm();
}
}
@@ -659,6 +690,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
return Size;
}
+void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const {
+ unsigned Opc = Subtarget.isThumb()
+ ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
+ : ARM::MRS;
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
+
+ // There is only 1 A/R class MRS instruction, and it always refers to
+ // APSR. However, there are lots of other possibilities on M-class cores.
+ if (Subtarget.isMClass())
+ MIB.addImm(0x800);
+
+ AddDefaultPred(MIB);
+
+ MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
+}
+
+void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const {
+ unsigned Opc = Subtarget.isThumb()
+ ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
+ : ARM::MSR;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+
+ if (Subtarget.isMClass())
+ MIB.addImm(0x800);
+ else
+ MIB.addImm(8);
+
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+ AddDefaultPred(MIB);
+
+ MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
+}
+
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -682,7 +756,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
- else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
@@ -742,6 +816,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
+ } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+ Opc = ARM::VMOVS;
+ BeginIdx = ARM::ssub_0;
+ SubRegs = 2;
+ } else if (SrcReg == ARM::CPSR) {
+ copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
+ return;
+ } else if (DestReg == ARM::CPSR) {
+ copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
+ return;
}
assert(Opc && "Impossible reg-to-reg copy");
@@ -1174,12 +1258,26 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
-bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
+bool
+ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ Reloc::Model RM = MF.getTarget().getRelocationModel();
+
+ if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
+ assert(getSubtarget().getTargetTriple().getObjectFormat() ==
+ Triple::MachO &&
+ "LOAD_STACK_GUARD currently supported only for MachO.");
+ expandLoadStackGuard(MI, RM);
+ MI->getParent()->erase(MI);
+ return true;
+ }
+
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
+ Subtarget.isFPOnlySP())
return false;
// Look for a copy between even S-registers. That is where we keep floats
@@ -2832,7 +2930,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
// FIXME: The current MachineInstr design does not support relying on machine
// mem operands to determine the width of a memory access. Instead, we expect
// the target to provide this information based on the instruction opcode and
-// operands. However, using MachineMemOperand is a the best solution now for
+// operands. However, using MachineMemOperand is the best solution now for
// two reasons:
//
// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
@@ -3933,6 +4031,38 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
return true;
}
+// LoadStackGuard has so far only been implemented for MachO. Different code
+// sequence is needed for other targets.
+void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+ unsigned LoadImmOpc,
+ unsigned LoadOpc,
+ Reloc::Model RM) const {
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ MachineInstrBuilder MIB;
+
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+
+ if (Subtarget.GVIsIndirectSymbol(GV, RM)) {
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->
+ getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MIB.addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ }
+
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ AddDefaultPred(MIB);
+}
+
bool
ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
unsigned &AddSubOpc,
@@ -4361,29 +4491,6 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
MI->addRegisterKilled(DReg, TRI, true);
}
-void ARMBaseInstrInfo::getUnconditionalBranch(
- MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
- if (Subtarget.isThumb())
- Branch.setOpcode(ARM::tB);
- else if (Subtarget.isThumb2())
- Branch.setOpcode(ARM::t2B);
- else
- Branch.setOpcode(ARM::Bcc);
-
- Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
- Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
- Branch.addOperand(MCOperand::CreateReg(0));
-}
-
-void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
- if (Subtarget.isThumb())
- MI.setOpcode(ARM::tTRAP);
- else if (Subtarget.useNaClTrap())
- MI.setOpcode(ARM::TRAPNaCl);
- else
- MI.setOpcode(ARM::TRAP);
-}
-
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
@@ -4401,3 +4508,72 @@ bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
return false;
}
+
+bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isRegSequenceLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VMOVDRR:
+ // dX = VMOVDRR rY, rZ
+ // is the same as:
+ // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1
+ // Populate the InputRegs accordingly.
+ // rY
+ const MachineOperand *MOReg = &MI.getOperand(1);
+ InputRegs.push_back(
+ RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
+ // rZ
+ MOReg = &MI.getOperand(2);
+ InputRegs.push_back(
+ RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isExtractSubregLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VMOVRRD:
+ // rX, rY = VMOVRRD dZ
+ // is the same as:
+ // rX = EXTRACT_SUBREG dZ, ssub_0
+ // rY = EXTRACT_SUBREG dZ, ssub_1
+ const MachineOperand &MOReg = MI.getOperand(2);
+ InputReg.Reg = MOReg.getReg();
+ InputReg.SubReg = MOReg.getSubReg();
+ InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
+
+bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
+ RegSubRegPairAndIdx &InsertedReg) const {
+ assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");
+ assert(MI.isInsertSubregLike() && "Invalid kind of instruction");
+
+ switch (MI.getOpcode()) {
+ case ARM::VSETLNi32:
+ // dX = VSETLNi32 dY, rZ, imm
+ const MachineOperand &MOBaseReg = MI.getOperand(1);
+ const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ const MachineOperand &MOIndex = MI.getOperand(3);
+ BaseReg.Reg = MOBaseReg.getReg();
+ BaseReg.SubReg = MOBaseReg.getSubReg();
+
+ InsertedReg.Reg = MOInsertedReg.getReg();
+ InsertedReg.SubReg = MOInsertedReg.getSubReg();
+ InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1;
+ return true;
+ }
+ llvm_unreachable("Target dependent opcode missing");
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b8d6758..0ae291b 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMBASEINSTRUCTIONINFO_H
-#define ARMBASEINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
@@ -34,6 +35,57 @@ protected:
// Can be only subclassed.
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+ void expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
+ unsigned LoadImmOpc, unsigned LoadOpc,
+ Reloc::Model RM) const;
+
+ /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
+ /// the list is modeled as <Reg:SubReg, SubIdx>.
+ /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce
+ /// two elements:
+ /// - vreg1:sub1, sub0
+ /// - vreg2<:0>, sub1
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isRegSequenceLike().
+ bool getRegSequenceLikeInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override;
+
+ /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] InputReg of the equivalent EXTRACT_SUBREG.
+ /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce:
+ /// - vreg1:sub1, sub0
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isExtractSubregLike().
+ bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const override;
+
+ /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
+ /// and \p DefIdx.
+ /// \p [out] BaseReg and \p [out] InsertedReg contain
+ /// the equivalent inputs of INSERT_SUBREG.
+ /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce:
+ /// - BaseReg: vreg0:sub0
+ /// - InsertedReg: vreg1:sub1, sub3
+ ///
+ /// \returns true if it is possible to build such an input sequence
+ /// with the pair \p MI, \p DefIdx. False otherwise.
+ ///
+ /// \pre MI.isInsertSubregLike().
+ bool
+ getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPair &BaseReg,
+ RegSubRegPairAndIdx &InsertedReg) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -104,6 +156,13 @@ public:
unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const override;
+ void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const;
+ void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, bool KillSrc,
+ const ARMSubtarget &Subtarget) const;
+
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
@@ -230,12 +289,6 @@ public:
void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
const TargetRegisterInfo *TRI) const override;
- void
- getUnconditionalBranch(MCInst &Branch,
- const MCSymbolRefExpr *BranchTarget) const override;
-
- void getTrap(MCInst &MI) const override;
-
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr *MI) const;
@@ -286,6 +339,9 @@ private:
bool verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const override;
+ virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const = 0;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index cdd91c7..6dc0493 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -38,6 +38,8 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#define DEBUG_TYPE "arm-register-info"
+
#define GET_REGINFO_TARGET_DESC
#include "ARMGenRegisterInfo.inc"
@@ -121,7 +123,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -180,14 +182,14 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind
const TargetRegisterClass *
ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &ARM::CCRRegClass)
- return nullptr; // Can't copy CCR registers.
+ return &ARM::rGPRRegClass; // Can't copy CCR registers.
return RC;
}
unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
switch (RC->getID()) {
default:
@@ -309,7 +311,7 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
@@ -354,7 +356,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
- if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
+ if (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->hasReservedCallFrame(MF))
return true;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
@@ -365,7 +370,10 @@ bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -385,7 +393,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF))
return FramePtr;
@@ -402,7 +410,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred,
unsigned PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C =
ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
@@ -415,11 +423,6 @@ emitLoadConstPool(MachineBasicBlock &MBB,
.setMIFlags(MIFlags);
}
-bool ARMBaseRegisterInfo::mayOverrideLocalAssignment() const {
- // The native linux build hits a downstream codegen bug when this is enabled.
- return STI.isTargetDarwin();
-}
-
bool ARMBaseRegisterInfo::
requiresRegisterScavenging(const MachineFunction &MF) const {
return true;
@@ -529,7 +532,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -582,7 +585,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
int64_t Offset) const {
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
- (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -591,15 +594,15 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
const MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
- MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
- .addFrameIndex(FrameIdx).addImm(Offset));
+ MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(MIB);
+ AddDefaultCC(AddDefaultPred(MIB));
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -607,7 +610,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -706,9 +709,9 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
- const ARMFrameLowering *TFI =
- static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
@@ -775,3 +778,60 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
}
}
+
+bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const {
+ auto MBB = MI->getParent();
+ auto MF = MBB->getParent();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ // If not copying into a sub-register this should be ok because we shouldn't
+ // need to split the reg.
+ if (!DstSubReg)
+ return true;
+ // Small registers don't frequently cause a problem, so we can coalesce them.
+ if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32)
+ return true;
+
+ auto NewRCWeight =
+ MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC);
+ auto SrcRCWeight =
+ MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC);
+ auto DstRCWeight =
+ MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC);
+ // If the source register class is more expensive than the destination, the
+ // coalescing is probably profitable.
+ if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight)
+ return true;
+ if (DstRCWeight.RegWeight > NewRCWeight.RegWeight)
+ return true;
+
+ // If the register allocator isn't constrained, we can always allow coalescing
+ // unfortunately we don't know yet if we will be constrained.
+ // The goal of this heuristic is to restrict how many expensive registers
+ // we allow to coalesce in a given basic block.
+ auto AFI = MF->getInfo<ARMFunctionInfo>();
+ auto It = AFI->getCoalescedWeight(MBB);
+
+ DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: "
+ << It->second << "\n");
+ DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: "
+ << NewRCWeight.RegWeight << "\n");
+
+ // This number is the largest round number that which meets the criteria:
+ // (1) addresses PR18825
+ // (2) generates better code in some test cases (like vldm-shed-a9.ll)
+ // (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC)
+ // In practice the SizeMultiplier will only factor in for straight line code
+ // that uses a lot of NEON vectors, which isn't terribly common.
+ unsigned SizeMultiplier = MBB->size()/100;
+ SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1;
+ if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) {
+ It->second += NewRCWeight.RegWeight;
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 91df565..e9bc412 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMBASEREGISTERINFO_H
-#define ARMBASEREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -174,8 +174,6 @@ public:
unsigned MIFlags = MachineInstr::NoFlags)const;
/// Code Generation virtual methods...
- bool mayOverrideLocalAssignment() const override;
-
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
@@ -187,6 +185,14 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
+
+ /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true
+ bool shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const override;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index dc41c1c..bd07236 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMCALLINGCONV_H
-#define ARMCALLINGCONV_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
+#define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
@@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+
// AAPCS HFAs must have 1-4 elements, all of the same type
- assert(PendingHAMembers.size() < 8);
+ assert(PendingHAMembers.size() < 4);
if (PendingHAMembers.size() > 0)
assert(PendingHAMembers[0].getLocVT() == LocVT);
@@ -188,7 +189,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
if (ArgFlags.isInConsecutiveRegsLast()) {
- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
+ assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
"Homogeneous aggregates must have between 1 and 4 members");
// Try to allocate a contiguous block of registers, each of the correct
@@ -196,7 +197,6 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
const uint16_t *RegList;
unsigned NumRegs;
switch (LocVT.SimpleTy) {
- case MVT::i32:
case MVT::f32:
RegList = SRegList;
NumRegs = 16;
@@ -235,20 +235,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State.AllocateReg(SRegList[regNo]);
unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned Align = Size;
-
- if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
- // Vectors are always aligned to 8 bytes. If we've seen an i32 here
- // it's because it's been split from a larger type, also with align 8.
- Align = 8;
- }
+ unsigned Align = std::min(Size, 8U);
for (auto It : PendingHAMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
-
- // Only the first member needs to be aligned.
- Align = 1;
}
// All pending members have now been allocated
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
deleted file mode 100644
index 5fb6ebf..0000000
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ /dev/null
@@ -1,1909 +0,0 @@
-//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the ARM machine instructions into
-// relocatable machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMConstantPoolValue.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMRelocations.h"
-#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#ifndef NDEBUG
-#include <iomanip>
-#endif
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
-
- class ARMCodeEmitter : public MachineFunctionPass {
- ARMJITInfo *JTI;
- const ARMBaseInstrInfo *II;
- const DataLayout *TD;
- const ARMSubtarget *Subtarget;
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- MachineModuleInfo *MMI;
- const std::vector<MachineConstantPoolEntry> *MCPEs;
- const std::vector<MachineJumpTableEntry> *MJTEs;
- bool IsPIC;
- bool IsThumb;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
- public:
- ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(nullptr),
- II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
- TD(tm.getDataLayout()), TM(tm),
- MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "ARM Machine Code Emitter";
- }
-
- void emitInstruction(const MachineInstr &MI);
-
- private:
-
- void emitWordLE(unsigned Binary);
- void emitDWordLE(uint64_t Binary);
- void emitConstPoolInstruction(const MachineInstr &MI);
- void emitMOVi32immInstruction(const MachineInstr &MI);
- void emitMOVi2piecesInstruction(const MachineInstr &MI);
- void emitLEApcrelJTInstruction(const MachineInstr &MI);
- void emitPseudoMoveInstruction(const MachineInstr &MI);
- void addPCLabel(unsigned LabelID);
- void emitPseudoInstruction(const MachineInstr &MI);
- unsigned getMachineSoRegOpValue(const MachineInstr &MI,
- const MCInstrDesc &MCID,
- const MachineOperand &MO,
- unsigned OpIdx);
-
- unsigned getMachineSoImmOpValue(unsigned SoImm);
- unsigned getAddrModeSBit(const MachineInstr &MI,
- const MCInstrDesc &MCID) const;
-
- void emitDataProcessingInstruction(const MachineInstr &MI,
- unsigned ImplicitRd = 0,
- unsigned ImplicitRn = 0);
-
- void emitLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRd = 0,
- unsigned ImplicitRn = 0);
-
- void emitMiscLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRn = 0);
-
- void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
-
- void emitMulFrmInstruction(const MachineInstr &MI);
-
- void emitExtendInstruction(const MachineInstr &MI);
-
- void emitMiscArithInstruction(const MachineInstr &MI);
-
- void emitSaturateInstruction(const MachineInstr &MI);
-
- void emitBranchInstruction(const MachineInstr &MI);
-
- void emitInlineJumpTable(unsigned JTIndex);
-
- void emitMiscBranchInstruction(const MachineInstr &MI);
-
- void emitVFPArithInstruction(const MachineInstr &MI);
-
- void emitVFPConversionInstruction(const MachineInstr &MI);
-
- void emitVFPLoadStoreInstruction(const MachineInstr &MI);
-
- void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
-
- void emitNEONLaneInstruction(const MachineInstr &MI);
- void emitNEONDupInstruction(const MachineInstr &MI);
- void emitNEON1RegModImmInstruction(const MachineInstr &MI);
- void emitNEON2RegInstruction(const MachineInstr &MI);
- void emitNEON3RegInstruction(const MachineInstr &MI);
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
- unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
- return getMachineOpValue(MI, MI.getOperand(OpIdx));
- }
-
- // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
- // TableGen'erated getBinaryCodeForInstr() function to encode any
- // operand values, instead querying getMachineOpValue() directly for
- // each operand it needs to encode. Thus, any of the new encoder
- // helper functions can simply return 0 as the values the return
- // are already handled elsewhere. They are placeholders to allow this
- // encoder to continue to function until the MC encoder is sufficiently
- // far along that this one can be eliminated entirely.
- unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
- const { return 0; }
- unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
- const { return 0; }
- unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
- const { return 0; }
- unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val)
- const { return 0; }
- unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
- const { return 0; }
- unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
- unsigned Op) const { return 0; }
- unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI,
- unsigned Op)
- const { return 0; }
- unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
- unsigned Op) const { return 0; }
- uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0; }
-
- unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
- const {
- // {17-13} = reg
- // {12} = (U)nsigned (add == '1', sub == '0')
- // {11-0} = imm12
- const MachineOperand &MO = MI.getOperand(Op);
- const MachineOperand &MO1 = MI.getOperand(Op + 1);
- if (!MO.isReg()) {
- emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
- return 0;
- }
- unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
- int32_t Imm12 = MO1.getImm();
- uint32_t Binary;
- Binary = Imm12 & 0xfff;
- if (Imm12 >= 0)
- Binary |= (1 << 12);
- Binary |= (Reg << 13);
- return Binary;
- }
-
- unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
- return 0;
- }
-
- uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0;}
- uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0;}
- uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
- const { return 0;}
- uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
- // {17-13} = reg
- // {12} = (U)nsigned (add == '1', sub == '0')
- // {11-0} = imm12
- const MachineOperand &MO = MI.getOperand(Op);
- const MachineOperand &MO1 = MI.getOperand(Op + 1);
- if (!MO.isReg()) {
- emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
- return 0;
- }
- unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
- int32_t Imm12 = MO1.getImm();
-
- // Special value for #-0
- if (Imm12 == INT32_MIN)
- Imm12 = 0;
-
- // Immediate is always encoded as positive. The 'U' bit controls add vs
- // sub.
- bool isAdd = true;
- if (Imm12 < 0) {
- Imm12 = -Imm12;
- isAdd = false;
- }
-
- uint32_t Binary = Imm12 & 0xfff;
- if (isAdd)
- Binary |= (1 << 12);
- Binary |= (Reg << 13);
- return Binary;
- }
- unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
-
- unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
- const { return 0; }
-
- unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
- unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
- const { return 0; }
-
- /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
- /// machine operand requires relocation, record the relocation and return
- /// zero.
- unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
- unsigned Reloc);
-
- /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
- ///
- unsigned getShiftOp(unsigned Imm) const ;
-
- /// Routines that handle operands which add machine relocations which are
- /// fixed up by the relocation stage.
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub, bool Indirect,
- intptr_t ACPV = 0) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
- intptr_t JTBase = 0) const;
- unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
- unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
- };
-}
-
-char ARMCodeEmitter::ID = 0;
-
-/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
-/// code to the specified MCE object.
-FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new ARMCodeEmitter(TM, JCE);
-}
-
-bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
-
- assert((Target.getRelocationModel() != Reloc::Default ||
- Target.getRelocationModel() != Reloc::Static) &&
- "JIT relocation model must be set to static or default!");
-
- JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
- II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
- TD = Target.getDataLayout();
-
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- MCPEs = &MF.getConstantPool()->getConstants();
- MJTEs = nullptr;
- if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
- IsPIC = TM.getRelocationModel() == Reloc::PIC_;
- IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
- JTI->Initialize(MF, IsPIC);
- MMI = &getAnalysis<MachineModuleInfo>();
- MCE.setModuleInfo(MMI);
-
- do {
- DEBUG(errs() << "JITTing function '"
- << MF.getName() << "'\n");
- MCE.startFunction(MF);
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I)
- emitInstruction(*I);
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
-///
-unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
- switch (ARM_AM::getAM2ShiftOpc(Imm)) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::asr: return 2;
- case ARM_AM::lsl: return 0;
- case ARM_AM::lsr: return 1;
- case ARM_AM::ror:
- case ARM_AM::rrx: return 3;
- }
-}
-
-/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
-/// machine operand requires relocation, record the relocation and return zero.
-unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
- const MachineOperand &MO,
- unsigned Reloc) {
- assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
- && "Relocation to this function should be for movt or movw");
-
- if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), Reloc);
- else {
-#ifndef NDEBUG
- errs() << MO;
-#endif
- llvm_unreachable("Unsupported operand type for movw/movt");
- }
- return 0;
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
- if (MO.isReg())
- return II->getRegisterInfo().getEncodingValue(MO.getReg());
- else if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
- else if (MO.isCPI()) {
- const MCInstrDesc &MCID = MI.getDesc();
- // For VFP load, the immediate offset is multiplied by 4.
- unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
- ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
- emitConstPoolAddress(MO.getIndex(), Reloc);
- } else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
- else
- llvm_unreachable("Unable to encode MachineOperand!");
- return 0;
-}
-
-/// emitGlobalAddress - Emit the specified address to the code stream.
-///
-void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub, bool Indirect,
- intptr_t ACPV) const {
- MachineRelocation MR = Indirect
- ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV),
- ACPV, MayNeedFarStub)
- : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), ACPV,
- MayNeedFarStub);
- MCE.addRelocation(MR);
-}
-
-/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::
-emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES));
-}
-
-/// emitConstPoolAddress - Arrange for the address of an constant pool
-/// to be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
- // Tell JIT emitter we'll resolve the address.
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, 0, true));
-}
-
-/// emitJumpTableAddress - Arrange for the address of a jump table to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-void ARMCodeEmitter::
-emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- Reloc, JTIndex, 0, true));
-}
-
-/// emitMachineBasicBlock - Emit the specified address basic block.
-void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc,
- intptr_t JTBase) const {
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Reloc, BB, JTBase));
-}
-
-void ARMCodeEmitter::emitWordLE(unsigned Binary) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Binary) << "\n");
- MCE.emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Binary) << "\n");
- MCE.emitDWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
-
- MCE.processDebugLoc(MI.getDebugLoc(), true);
-
- ++NumEmitted; // Keep track of the # of mi's emitted
- switch (MI.getDesc().TSFlags & ARMII::FormMask) {
- default: {
- llvm_unreachable("Unhandled instruction encoding format!");
- }
- case ARMII::MiscFrm:
- if (MI.getOpcode() == ARM::LEApcrelJT) {
- // Materialize jumptable address.
- emitLEApcrelJTInstruction(MI);
- break;
- }
- llvm_unreachable("Unhandled instruction encoding!");
- case ARMII::Pseudo:
- emitPseudoInstruction(MI);
- break;
- case ARMII::DPFrm:
- case ARMII::DPSoRegFrm:
- emitDataProcessingInstruction(MI);
- break;
- case ARMII::LdFrm:
- case ARMII::StFrm:
- emitLoadStoreInstruction(MI);
- break;
- case ARMII::LdMiscFrm:
- case ARMII::StMiscFrm:
- emitMiscLoadStoreInstruction(MI);
- break;
- case ARMII::LdStMulFrm:
- emitLoadStoreMultipleInstruction(MI);
- break;
- case ARMII::MulFrm:
- emitMulFrmInstruction(MI);
- break;
- case ARMII::ExtFrm:
- emitExtendInstruction(MI);
- break;
- case ARMII::ArithMiscFrm:
- emitMiscArithInstruction(MI);
- break;
- case ARMII::SatFrm:
- emitSaturateInstruction(MI);
- break;
- case ARMII::BrFrm:
- emitBranchInstruction(MI);
- break;
- case ARMII::BrMiscFrm:
- emitMiscBranchInstruction(MI);
- break;
- // VFP instructions.
- case ARMII::VFPUnaryFrm:
- case ARMII::VFPBinaryFrm:
- emitVFPArithInstruction(MI);
- break;
- case ARMII::VFPConv1Frm:
- case ARMII::VFPConv2Frm:
- case ARMII::VFPConv3Frm:
- case ARMII::VFPConv4Frm:
- case ARMII::VFPConv5Frm:
- emitVFPConversionInstruction(MI);
- break;
- case ARMII::VFPLdStFrm:
- emitVFPLoadStoreInstruction(MI);
- break;
- case ARMII::VFPLdStMulFrm:
- emitVFPLoadStoreMultipleInstruction(MI);
- break;
-
- // NEON instructions.
- case ARMII::NGetLnFrm:
- case ARMII::NSetLnFrm:
- emitNEONLaneInstruction(MI);
- break;
- case ARMII::NDupFrm:
- emitNEONDupInstruction(MI);
- break;
- case ARMII::N1RegModImmFrm:
- emitNEON1RegModImmInstruction(MI);
- break;
- case ARMII::N2RegFrm:
- emitNEON2RegInstruction(MI);
- break;
- case ARMII::N3RegFrm:
- emitNEON3RegInstruction(MI);
- break;
- }
- MCE.processDebugLoc(MI.getDebugLoc(), false);
-}
-
-void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
- unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
- unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
- const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
-
- // Remember the CONSTPOOL_ENTRY address for later relocation.
- JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
-
- // Emit constpool island entry. In most cases, the actual values will be
- // resolved and relocated after code emission.
- if (MCPE.isMachineConstantPoolEntry()) {
- ARMConstantPoolValue *ACPV =
- static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
-
- DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ "
- << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
-
- assert(ACPV->isGlobalValue() && "unsupported constant pool value");
- const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV();
- if (GV) {
- Reloc::Model RelocM = TM.getRelocationModel();
- emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
- isa<Function>(GV),
- Subtarget->GVIsIndirectSymbol(GV, RelocM),
- (intptr_t)ACPV);
- } else {
- const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
- emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute);
- }
- emitWordLE(0);
- } else {
- const Constant *CV = MCPE.Val.ConstVal;
-
- DEBUG({
- errs() << " ** Constant pool #" << CPI << " @ "
- << (void*)MCE.getCurrentPCValue() << " ";
- if (const Function *F = dyn_cast<Function>(CV))
- errs() << F->getName();
- else
- errs() << *CV;
- errs() << '\n';
- });
-
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
- emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
- emitWordLE(0);
- } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- uint32_t Val = uint32_t(*CI->getValue().getRawData());
- emitWordLE(Val);
- } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- if (CFP->getType()->isFloatTy())
- emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
- else if (CFP->getType()->isDoubleTy())
- emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
- else {
- llvm_unreachable("Unable to handle this constantpool entry!");
- }
- } else {
- llvm_unreachable("Unable to handle this constantpool entry!");
- }
- }
-}
-
-void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
- const MachineOperand &MO0 = MI.getOperand(0);
- const MachineOperand &MO1 = MI.getOperand(1);
-
- // Emit the 'movw' instruction.
- unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
-
- unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode imm16 as imm4:imm12
- Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
- Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
- emitWordLE(Binary);
-
- unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
- // Emit the 'movt' instruction.
- Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode imm16 as imm4:imm1, same as movw above.
- Binary |= Hi16 & 0xFFF;
- Binary |= ((Hi16 >> 12) & 0xF) << 16;
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
- const MachineOperand &MO0 = MI.getOperand(0);
- const MachineOperand &MO1 = MI.getOperand(1);
- assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
- "Not a valid so_imm value!");
- unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
- unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
-
- // Emit the 'mov' instruction.
- unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode so_imm.
- // Set bit I(25) to identify this is the immediate form of <shifter_op>
- Binary |= 1 << ARMII::I_BitShift;
- Binary |= getMachineSoImmOpValue(V1);
- emitWordLE(Binary);
-
- // Now the 'orr' instruction.
- Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
-
- // Set the conditional execution predicate.
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
-
- // Encode Rn.
- Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
-
- // Encode so_imm.
- // Set bit I(25) to identify this is the immediate form of <shifter_op>
- Binary |= 1 << ARMII::I_BitShift;
- Binary |= getMachineSoImmOpValue(V2);
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
- // It's basically add r, pc, (LJTI - $+8)
-
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Emit the 'add' instruction.
- unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- Binary |= getAddrModeSBit(MI, MCID);
-
- // Encode Rd.
- Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
- // Encode Rn which is PC.
- Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
-
- // Encode the displacement.
- Binary |= 1 << ARMII::I_BitShift;
- emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
- unsigned Opcode = MI.getDesc().Opcode;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
- Binary |= 1 << ARMII::S_BitShift;
-
- // Encode register def if there is one.
- Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
- // Encode the shift operation.
- switch (Opcode) {
- default: break;
- case ARM::RRX:
- // rrx
- Binary |= 0x6 << 4;
- break;
- case ARM::MOVsrl_flag:
- // lsr #1
- Binary |= (0x2 << 4) | (1 << 7);
- break;
- case ARM::MOVsra_flag:
- // asr #1
- Binary |= (0x4 << 4) | (1 << 7);
- break;
- }
-
- // Encode register Rm.
- Binary |= getMachineOpValue(MI, 1);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
- DEBUG(errs() << " ** LPC" << LabelID << " @ "
- << (void*)MCE.getCurrentPCValue() << '\n');
- JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
-}
-
-void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
- unsigned Opcode = MI.getDesc().Opcode;
- switch (Opcode) {
- default:
- llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
- case ARM::BX_CALL:
- case ARM::BMOVPCRX_CALL: {
- // First emit mov lr, pc
- unsigned Binary = 0x01a0e00f;
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
- emitWordLE(Binary);
-
- // and then emit the branch.
- emitMiscBranchInstruction(MI);
- break;
- }
- case TargetOpcode::INLINEASM: {
- // We allow inline assembler nodes with empty bodies - they can
- // implicitly define registers, which is ok for JIT.
- if (MI.getOperand(0).getSymbolName()[0]) {
- report_fatal_error("JIT does not support inline asm!");
- }
- break;
- }
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::EH_LABEL:
- MCE.emitLabel(MI.getOperand(0).getMCSymbol());
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- // Do nothing.
- break;
- case ARM::CONSTPOOL_ENTRY:
- emitConstPoolInstruction(MI);
- break;
- case ARM::PICADD: {
- // Remember of the address of the PC label for relocation later.
- addPCLabel(MI.getOperand(2).getImm());
- // PICADD is just an add instruction that implicitly read pc.
- emitDataProcessingInstruction(MI, 0, ARM::PC);
- break;
- }
- case ARM::PICLDR:
- case ARM::PICLDRB:
- case ARM::PICSTR:
- case ARM::PICSTRB: {
- // Remember of the address of the PC label for relocation later.
- addPCLabel(MI.getOperand(2).getImm());
- // These are just load / store instructions that implicitly read pc.
- emitLoadStoreInstruction(MI, 0, ARM::PC);
- break;
- }
- case ARM::PICLDRH:
- case ARM::PICLDRSH:
- case ARM::PICLDRSB:
- case ARM::PICSTRH: {
- // Remember of the address of the PC label for relocation later.
- addPCLabel(MI.getOperand(2).getImm());
- // These are just load / store instructions that implicitly read pc.
- emitMiscLoadStoreInstruction(MI, ARM::PC);
- break;
- }
-
- case ARM::MOVi32imm:
- // Two instructions to materialize a constant.
- if (Subtarget->hasV6T2Ops())
- emitMOVi32immInstruction(MI);
- else
- emitMOVi2piecesInstruction(MI);
- break;
-
- case ARM::LEApcrelJT:
- // Materialize jumptable address.
- emitLEApcrelJTInstruction(MI);
- break;
- case ARM::RRX:
- case ARM::MOVsrl_flag:
- case ARM::MOVsra_flag:
- emitPseudoMoveInstruction(MI);
- break;
- }
-}
-
-unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
- const MCInstrDesc &MCID,
- const MachineOperand &MO,
- unsigned OpIdx) {
- unsigned Binary = getMachineOpValue(MI, MO);
-
- const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
- const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
- ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
-
- // Encode the shift opcode.
- unsigned SBits = 0;
- unsigned Rs = MO1.getReg();
- if (Rs) {
- // Set shift operand (bit[7:4]).
- // LSL - 0001
- // LSR - 0011
- // ASR - 0101
- // ROR - 0111
- // RRX - 0110 and bit[11:8] clear.
- switch (SOpc) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::lsl: SBits = 0x1; break;
- case ARM_AM::lsr: SBits = 0x3; break;
- case ARM_AM::asr: SBits = 0x5; break;
- case ARM_AM::ror: SBits = 0x7; break;
- case ARM_AM::rrx: SBits = 0x6; break;
- }
- } else {
- // Set shift operand (bit[6:4]).
- // LSL - 000
- // LSR - 010
- // ASR - 100
- // ROR - 110
- switch (SOpc) {
- default: llvm_unreachable("Unknown shift opc!");
- case ARM_AM::lsl: SBits = 0x0; break;
- case ARM_AM::lsr: SBits = 0x2; break;
- case ARM_AM::asr: SBits = 0x4; break;
- case ARM_AM::ror: SBits = 0x6; break;
- }
- }
- Binary |= SBits << 4;
- if (SOpc == ARM_AM::rrx)
- return Binary;
-
- // Encode the shift operation Rs or shift_imm (except rrx).
- if (Rs) {
- // Encode Rs bit[11:8].
- assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
- }
-
- // Encode shift_imm bit[11:7].
- return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
-}
-
-unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
- int SoImmVal = ARM_AM::getSOImmVal(SoImm);
- assert(SoImmVal != -1 && "Not a valid so_imm value!");
-
- // Encode rotate_imm.
- unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
- << ARMII::SoRotImmShift;
-
- // Encode immed_8.
- Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
- return Binary;
-}
-
-unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
- const MCInstrDesc &MCID) const {
- for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){
- const MachineOperand &MO = MI.getOperand(i-1);
- if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
- return 1 << ARMII::S_BitShift;
- }
- return 0;
-}
-
-void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
- unsigned ImplicitRd,
- unsigned ImplicitRn) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- Binary |= getAddrModeSBit(MI, MCID);
-
- // Encode register def if there is one.
- unsigned NumDefs = MCID.getNumDefs();
- unsigned OpIdx = 0;
- if (NumDefs)
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
- else if (ImplicitRd)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
-
- if (MCID.Opcode == ARM::MOVi16) {
- // Get immediate from MI.
- unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
- ARM::reloc_arm_movw);
- // Encode imm which is the same as in emitMOVi32immInstruction().
- Binary |= Lo16 & 0xFFF;
- Binary |= ((Lo16 >> 12) & 0xF) << 16;
- emitWordLE(Binary);
- return;
- } else if(MCID.Opcode == ARM::MOVTi16) {
- unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
- ARM::reloc_arm_movt) >> 16);
- Binary |= Hi16 & 0xFFF;
- Binary |= ((Hi16 >> 12) & 0xF) << 16;
- emitWordLE(Binary);
- return;
- } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
- uint32_t v = ~MI.getOperand(2).getImm();
- int32_t lsb = countTrailingZeros(v);
- int32_t msb = (32 - countLeadingZeros(v)) - 1;
- // Instr{20-16} = msb, Instr{11-7} = lsb
- Binary |= (msb & 0x1F) << 16;
- Binary |= (lsb & 0x1F) << 7;
- emitWordLE(Binary);
- return;
- } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
- // Encode Rn in Instr{0-3}
- Binary |= getMachineOpValue(MI, OpIdx++);
-
- uint32_t lsb = MI.getOperand(OpIdx++).getImm();
- uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
-
- // Instr{20-16} = widthm1, Instr{11-7} = lsb
- Binary |= (widthm1 & 0x1F) << 16;
- Binary |= (lsb & 0x1F) << 7;
- emitWordLE(Binary);
- return;
- }
-
- // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- // Encode first non-shifter register operand if there is one.
- bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
- if (!isUnary) {
- if (ImplicitRn)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
- else {
- Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
- ++OpIdx;
- }
- }
-
- // Encode shifter operand.
- const MachineOperand &MO = MI.getOperand(OpIdx);
- if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
- // Encode SoReg.
- emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
- return;
- }
-
- if (MO.isReg()) {
- // Encode register Rm.
- emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
- return;
- }
-
- // Encode so_imm.
- Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRd,
- unsigned ImplicitRn) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Form = MCID.TSFlags & ARMII::FormMask;
- bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
- if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
- MI.getOpcode() == ARM::STRi12) {
- emitWordLE(Binary);
- return;
- }
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Operand 0 of a pre- and post-indexed store is the address base
- // writeback. Skip it.
- bool Skipped = false;
- if (IsPrePost && Form == ARMII::StFrm) {
- ++OpIdx;
- Skipped = true;
- }
-
- // Set first operand
- if (ImplicitRd)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
- else
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- // Set second operand
- if (ImplicitRn)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
- else
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // If this is a two-address operand, skip it. e.g. LDR_PRE.
- if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- const MachineOperand &MO2 = MI.getOperand(OpIdx);
- unsigned AM2Opc = (ImplicitRn == ARM::PC)
- ? 0 : MI.getOperand(OpIdx+1).getImm();
-
- // Set bit U(23) according to sign of immed value (positive or negative).
- Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
- ARMII::U_BitShift);
- if (!MO2.getReg()) { // is immediate
- if (ARM_AM::getAM2Offset(AM2Opc))
- // Set the value of offset_12 field
- Binary |= ARM_AM::getAM2Offset(AM2Opc);
- emitWordLE(Binary);
- return;
- }
-
- // Set bit I(25), because this is not in immediate encoding.
- Binary |= 1 << ARMII::I_BitShift;
- assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
- // Set bit[3:0] to the corresponding Rm register
- Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
-
- // If this instr is in scaled register offset/index instruction, set
- // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
- if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
- Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
- Binary |= ShImm << ARMII::ShiftShift; // shift_immed
- }
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
- unsigned ImplicitRn) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Form = MCID.TSFlags & ARMII::FormMask;
- bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Operand 0 of a pre- and post-indexed store is the address base
- // writeback. Skip it.
- bool Skipped = false;
- if (IsPrePost && Form == ARMII::StMiscFrm) {
- ++OpIdx;
- Skipped = true;
- }
-
- // Set first operand
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- // Skip LDRD and STRD's second operand.
- if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
- ++OpIdx;
-
- // Set second operand
- if (ImplicitRn)
- // Special handling for implicit use (e.g. PC).
- Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
- else
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // If this is a two-address operand, skip it. e.g. LDRH_POST.
- if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- const MachineOperand &MO2 = MI.getOperand(OpIdx);
- unsigned AM3Opc = (ImplicitRn == ARM::PC)
- ? 0 : MI.getOperand(OpIdx+1).getImm();
-
- // Set bit U(23) according to sign of immed value (positive or negative)
- Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
- ARMII::U_BitShift);
-
- // If this instr is in register offset/index encoding, set bit[3:0]
- // to the corresponding Rm register.
- if (MO2.getReg()) {
- Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
- emitWordLE(Binary);
- return;
- }
-
- // This instr is in immediate offset/index encoding, set bit 22 to 1.
- Binary |= 1 << ARMII::AM3_I_BitShift;
- if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
- // Set operands
- Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
- Binary |= (ImmOffs & 0xF); // immedL
- }
-
- emitWordLE(Binary);
-}
-
-static unsigned getAddrModeUPBits(unsigned Mode) {
- unsigned Binary = 0;
-
- // Set addressing mode by modifying bits U(23) and P(24)
- // IA - Increment after - bit U = 1 and bit P = 0
- // IB - Increment before - bit U = 1 and bit P = 1
- // DA - Decrement after - bit U = 0 and bit P = 0
- // DB - Decrement before - bit U = 0 and bit P = 1
- switch (Mode) {
- default: llvm_unreachable("Unknown addressing sub-mode!");
- case ARM_AM::da: break;
- case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
- case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
- case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
- }
-
- return Binary;
-}
-
-void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Skip operand 0 of an instruction with base register update.
- unsigned OpIdx = 0;
- if (IsUpdating)
- ++OpIdx;
-
- // Set base address operand
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // Set addressing mode by modifying bits U(23) and P(24)
- ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
- Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
-
- // Set bit W(21)
- if (IsUpdating)
- Binary |= 0x1 << ARMII::W_BitShift;
-
- // Set registers
- for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.isImplicit())
- break;
- unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
- assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
- RegNum < 16);
- Binary |= 0x1 << RegNum;
- }
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode S bit if MI modifies CPSR.
- Binary |= getAddrModeSBit(MI, MCID);
-
- // 32x32->64bit operations have two destination registers. The number
- // of register definitions will tell us if that's what we're dealing with.
- unsigned OpIdx = 0;
- if (MCID.getNumDefs() == 2)
- Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
-
- // Encode Rm
- Binary |= getMachineOpValue(MI, OpIdx++);
-
- // Encode Rs
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
-
- // Many multiple instructions (e.g. MLA) have three src operands. Encode
- // it as Rn (for multiply, that's in the same offset as RdLo.
- if (MCID.getNumOperands() > OpIdx &&
- !MCID.OpInfo[OpIdx].isPredicate() &&
- !MCID.OpInfo[OpIdx].isOptionalDef())
- Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- const MachineOperand &MO1 = MI.getOperand(OpIdx++);
- const MachineOperand &MO2 = MI.getOperand(OpIdx);
- if (MO2.isReg()) {
- // Two register operand form.
- // Encode Rn.
- Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
-
- // Encode Rm.
- Binary |= getMachineOpValue(MI, MO2);
- ++OpIdx;
- } else {
- Binary |= getMachineOpValue(MI, MO1);
- }
-
- // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
- if (MI.getOperand(OpIdx).isImm() &&
- !MCID.OpInfo[OpIdx].isPredicate() &&
- !MCID.OpInfo[OpIdx].isOptionalDef())
- Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // PKH instructions are finished at this point
- if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
- emitWordLE(Binary);
- return;
- }
-
- unsigned OpIdx = 0;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
-
- const MachineOperand &MO = MI.getOperand(OpIdx++);
- if (OpIdx == MCID.getNumOperands() ||
- MCID.OpInfo[OpIdx].isPredicate() ||
- MCID.OpInfo[OpIdx].isOptionalDef()) {
- // Encode Rm and it's done.
- Binary |= getMachineOpValue(MI, MO);
- emitWordLE(Binary);
- return;
- }
-
- // Encode Rn.
- Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
-
- // Encode Rm.
- Binary |= getMachineOpValue(MI, OpIdx++);
-
- // Encode shift_imm.
- unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
- if (MCID.Opcode == ARM::PKHTB) {
- assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
- if (ShiftAmt == 32)
- ShiftAmt = 0;
- }
- assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
- Binary |= ShiftAmt << ARMII::ShiftShift;
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGen.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Encode Rd
- Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
-
- // Encode saturate bit position.
- unsigned Pos = MI.getOperand(1).getImm();
- if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
- Pos -= 1;
- assert((Pos < 16 || (Pos < 32 &&
- MCID.Opcode != ARM::SSAT16 &&
- MCID.Opcode != ARM::USAT16)) &&
- "saturate bit position out of range");
- Binary |= Pos << 16;
-
- // Encode Rm
- Binary |= getMachineOpValue(MI, 2);
-
- // Encode shift_imm.
- if (MCID.getNumOperands() == 4) {
- unsigned ShiftOp = MI.getOperand(3).getImm();
- ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
- if (Opc == ARM_AM::asr)
- Binary |= (1 << 6);
- unsigned ShiftAmt = MI.getOperand(3).getImm();
- if (ShiftAmt == 32 && Opc == ARM_AM::asr)
- ShiftAmt = 0;
- assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
- Binary |= ShiftAmt << ARMII::ShiftShift;
- }
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- if (MCID.Opcode == ARM::TPsoft) {
- llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
- }
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Set signed_immed_24 field
- Binary |= getMachineOpValue(MI, 0);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
- // Remember the base address of the inline jump table.
- uintptr_t JTBase = MCE.getCurrentPCValue();
- JTI->addJumpTableBaseAddr(JTIndex, JTBase);
- DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
- << '\n');
-
- // Now emit the jump table entries.
- const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
- for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
- if (IsPIC)
- // DestBB address - JT base.
- emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
- else
- // Absolute DestBB address.
- emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
- emitWordLE(0);
- }
-}
-
-void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Handle jump tables.
- if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
- // First emit a ldr pc, [] instruction.
- emitDataProcessingInstruction(MI, ARM::PC);
-
- // Then emit the inline jump table.
- unsigned JTIndex =
- (MCID.Opcode == ARM::BR_JTr)
- ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
- emitInlineJumpTable(JTIndex);
- return;
- } else if (MCID.Opcode == ARM::BR_JTm) {
- // First emit a ldr pc, [] instruction.
- emitLoadStoreInstruction(MI, ARM::PC);
-
- // Then emit the inline jump table.
- emitInlineJumpTable(MI.getOperand(3).getIndex());
- return;
- }
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
- // The return register is LR.
- Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
- else
- // otherwise, set the return register
- Binary |= getMachineOpValue(MI, 0);
-
- emitWordLE(Binary);
-}
-
-unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegD = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegClass.contains(RegD);
- RegD = II->getRegisterInfo().getEncodingValue(RegD);
- if (!isSPVFP)
- Binary |= RegD << ARMII::RegRdShift;
- else {
- Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
- Binary |= (RegD & 0x01) << ARMII::D_BitShift;
- }
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegN = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegClass.contains(RegN);
- RegN = II->getRegisterInfo().getEncodingValue(RegN);
- if (!isSPVFP)
- Binary |= RegN << ARMII::RegRnShift;
- else {
- Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
- Binary |= (RegN & 0x01) << ARMII::N_BitShift;
- }
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegM = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- bool isSPVFP = ARM::SPRRegClass.contains(RegM);
- RegM = II->getRegisterInfo().getEncodingValue(RegM);
- if (!isSPVFP)
- Binary |= RegM;
- else {
- Binary |= ((RegM & 0x1E) >> 1);
- Binary |= (RegM & 0x01) << ARMII::M_BitShift;
- }
- return Binary;
-}
-
-void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
- assert((Binary & ARMII::D_BitShift) == 0 &&
- (Binary & ARMII::N_BitShift) == 0 &&
- (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
-
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, OpIdx++);
-
- // If this is a two-address operand, skip it, e.g. FMACD.
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
-
- // Encode Dn / Sn.
- if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
- Binary |= encodeVFPRn(MI, OpIdx++);
-
- if (OpIdx == MCID.getNumOperands() ||
- MCID.OpInfo[OpIdx].isPredicate() ||
- MCID.OpInfo[OpIdx].isOptionalDef()) {
- // FCMPEZD etc. has only one operand.
- emitWordLE(Binary);
- return;
- }
-
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, OpIdx);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Form = MCID.TSFlags & ARMII::FormMask;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- switch (Form) {
- default: break;
- case ARMII::VFPConv1Frm:
- case ARMII::VFPConv2Frm:
- case ARMII::VFPConv3Frm:
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, 0);
- break;
- case ARMII::VFPConv4Frm:
- // Encode Dn / Sn.
- Binary |= encodeVFPRn(MI, 0);
- break;
- case ARMII::VFPConv5Frm:
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, 0);
- break;
- }
-
- switch (Form) {
- default: break;
- case ARMII::VFPConv1Frm:
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, 1);
- break;
- case ARMII::VFPConv2Frm:
- case ARMII::VFPConv3Frm:
- // Encode Dn / Sn.
- Binary |= encodeVFPRn(MI, 1);
- break;
- case ARMII::VFPConv4Frm:
- case ARMII::VFPConv5Frm:
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, 1);
- break;
- }
-
- if (Form == ARMII::VFPConv5Frm)
- // Encode Dn / Sn.
- Binary |= encodeVFPRn(MI, 2);
- else if (Form == ARMII::VFPConv3Frm)
- // Encode Dm / Sm.
- Binary |= encodeVFPRm(MI, 2);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- unsigned OpIdx = 0;
-
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, OpIdx++);
-
- // Encode address base.
- const MachineOperand &Base = MI.getOperand(OpIdx++);
- Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
-
- // If there is a non-zero immediate offset, encode it.
- if (Base.isReg()) {
- const MachineOperand &Offset = MI.getOperand(OpIdx);
- if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
- if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
- Binary |= 1 << ARMII::U_BitShift;
- Binary |= ImmOffs;
- emitWordLE(Binary);
- return;
- }
- }
-
- // If immediate offset is omitted, default to +0.
- Binary |= 1 << ARMII::U_BitShift;
-
- emitWordLE(Binary);
-}
-
-void
-ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
-
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- // Skip operand 0 of an instruction with base register update.
- unsigned OpIdx = 0;
- if (IsUpdating)
- ++OpIdx;
-
- // Set base address operand
- Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
-
- // Set addressing mode by modifying bits U(23) and P(24)
- ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
- Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
-
- // Set bit W(21)
- if (IsUpdating)
- Binary |= 0x1 << ARMII::W_BitShift;
-
- // First register is encoded in Dd.
- Binary |= encodeVFPRd(MI, OpIdx+2);
-
- // Count the number of registers.
- unsigned NumRegs = 1;
- for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.isImplicit())
- break;
- ++NumRegs;
- }
- // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
- // Otherwise, it will be 0, in the case of 32-bit registers.
- if(Binary & 0x100)
- Binary |= NumRegs * 2;
- else
- Binary |= NumRegs;
-
- emitWordLE(Binary);
-}
-
-unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegD = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- RegD = II->getRegisterInfo().getEncodingValue(RegD);
- Binary |= (RegD & 0xf) << ARMII::RegRdShift;
- Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegN = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- RegN = II->getRegisterInfo().getEncodingValue(RegN);
- Binary |= (RegN & 0xf) << ARMII::RegRnShift;
- Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
- return Binary;
-}
-
-unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
- unsigned OpIdx) const {
- unsigned RegM = MI.getOperand(OpIdx).getReg();
- unsigned Binary = 0;
- RegM = II->getRegisterInfo().getEncodingValue(RegM);
- Binary |= (RegM & 0xf);
- Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
- return Binary;
-}
-
-/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
-/// data-processing instruction to the corresponding Thumb encoding.
-static unsigned convertNEONDataProcToThumb(unsigned Binary) {
- assert((Binary & 0xfe000000) == 0xf2000000 &&
- "not an ARM NEON data-processing instruction");
- unsigned UBit = (Binary >> 24) & 1;
- return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
-}
-
-void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
- const MCInstrDesc &MCID = MI.getDesc();
- if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
- RegTOpIdx = 0;
- RegNOpIdx = 1;
- LnOpIdx = 2;
- } else { // ARMII::NSetLnFrm
- RegTOpIdx = 2;
- RegNOpIdx = 0;
- LnOpIdx = 3;
- }
-
- // Set the conditional execution predicate
- Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
-
- unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
- RegT = II->getRegisterInfo().getEncodingValue(RegT);
- Binary |= (RegT << ARMII::RegRdShift);
- Binary |= encodeNEONRn(MI, RegNOpIdx);
-
- unsigned LaneShift;
- if ((Binary & (1 << 22)) != 0)
- LaneShift = 0; // 8-bit elements
- else if ((Binary & (1 << 5)) != 0)
- LaneShift = 1; // 16-bit elements
- else
- LaneShift = 2; // 32-bit elements
-
- unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
- unsigned Opc1 = Lane >> 2;
- unsigned Opc2 = Lane & 3;
- assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
- Binary |= (Opc1 << 21);
- Binary |= (Opc2 << 5);
-
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
-
- unsigned RegT = MI.getOperand(1).getReg();
- RegT = II->getRegisterInfo().getEncodingValue(RegT);
- Binary |= (RegT << ARMII::RegRdShift);
- Binary |= encodeNEONRn(MI, 0);
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
- unsigned Binary = getBinaryCodeForInstr(MI);
- // Destination register is encoded in Dd.
- Binary |= encodeNEONRd(MI, 0);
- // Immediate fields: Op, Cmode, I, Imm3, Imm4
- unsigned Imm = MI.getOperand(1).getImm();
- unsigned Op = (Imm >> 12) & 1;
- unsigned Cmode = (Imm >> 8) & 0xf;
- unsigned I = (Imm >> 7) & 1;
- unsigned Imm3 = (Imm >> 4) & 0x7;
- unsigned Imm4 = Imm & 0xf;
- Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
- if (IsThumb)
- Binary = convertNEONDataProcToThumb(Binary);
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Binary = getBinaryCodeForInstr(MI);
- // Destination register is encoded in Dd; source register in Dm.
- unsigned OpIdx = 0;
- Binary |= encodeNEONRd(MI, OpIdx++);
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
- Binary |= encodeNEONRm(MI, OpIdx);
- if (IsThumb)
- Binary = convertNEONDataProcToThumb(Binary);
- // FIXME: This does not handle VDUPfdf or VDUPfqf.
- emitWordLE(Binary);
-}
-
-void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned Binary = getBinaryCodeForInstr(MI);
- // Destination register is encoded in Dd; source registers in Dn and Dm.
- unsigned OpIdx = 0;
- Binary |= encodeNEONRd(MI, OpIdx++);
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
- Binary |= encodeNEONRn(MI, OpIdx++);
- if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
- ++OpIdx;
- Binary |= encodeNEONRm(MI, OpIdx);
- if (IsThumb)
- Binary = convertNEONDataProcToThumb(Binary);
- // FIXME: This does not handle VMOVDneon or VMOVQ.
- emitWordLE(Binary);
-}
-
-#include "ARMGenCodeEmitter.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index ce264ee..29405eb 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -275,6 +275,7 @@ namespace {
private:
void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+ bool BBHasFallthrough(MachineBasicBlock *MBB);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
unsigned getCPELogAlign(const MachineInstr *CPEMI);
void scanFunctionJumpTables();
@@ -382,7 +383,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
<< MCP->getConstants().size() << " CP entries, aligned to "
<< MCP->getConstantPoolAlignment() << " bytes *****\n");
- TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
+ TII = (const ARMBaseInstrInfo *)MF->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo();
AFI = MF->getInfo<ARMFunctionInfo>();
STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
@@ -529,7 +532,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = *MF->getSubtarget().getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -554,9 +557,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
InsPoint[a] = CPEMI;
// Add a new CPEntry, but no corresponding CPUser yet.
- std::vector<CPEntry> CPEs;
- CPEs.push_back(CPEntry(CPEMI, i));
- CPEntries.push_back(CPEs);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, i));
++NumCPEs;
DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
<< Size << ", align = " << Align <<'\n');
@@ -566,7 +567,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
/// into the block immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
// Can't fall off end of function.
@@ -574,12 +575,15 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
return false;
MachineBasicBlock *NextBB = std::next(MBBI);
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I == NextBB)
- return true;
+ if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+ return false;
- return false;
+ // Try to analyze the end of the block. A potential fallthrough may already
+ // have an unconditional branch for whatever reason.
+ MachineBasicBlock *TBB, *FBB;
+ SmallVector<MachineOperand, 4> Cond;
+ bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+ return TooDifficult || FBB == nullptr;
}
/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
@@ -1203,7 +1207,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
unsigned Growth;
if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
+ Growth < BestGrowth) {
// This is the least amount of required padding seen so far.
BestGrowth = Growth;
WaterIter = IP;
@@ -1309,7 +1314,12 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
// Back past any possible branches (allow for a conditional and a maximally
// long unconditional).
if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
- BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
+ // Ensure BaseInsertOffset is larger than the offset of the instruction
+ // following UserMI so that the loop which searches for the split point
+ // iterates at least once.
+ BaseInsertOffset =
+ std::max(UserBBI.postOffset() - UPad - 8,
+ UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
}
unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1352,6 +1362,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (CC != ARMCC::AL)
MI = LastIT;
}
+
+ // We really must not split an IT block.
+ DEBUG(unsigned PredReg;
+ assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+
NewMBB = splitBlockBeforeInstr(MI);
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index c7a8415..13bef54 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
-#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Support/Casting.h"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 51d3dbb..2d80518 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -867,7 +867,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (RI.hasBasePointer(MF)) {
int32_t NumBytes = AFI->getFramePtrSpillOffset();
unsigned FramePtr = RI.getFrameRegister(MF);
- assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
"base pointer without frame pointer?");
if (AFI->isThumb2Function()) {
@@ -1343,8 +1343,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
- TRI = TM.getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
AFI = MF.getInfo<ARMFunctionInfo>();
diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def
index 1fef3b3..34ce85d 100644
--- a/lib/Target/ARM/ARMFPUName.def
+++ b/lib/Target/ARM/ARMFPUName.def
@@ -23,6 +23,7 @@ ARM_FPU_NAME("vfpv3", VFPV3)
ARM_FPU_NAME("vfpv3-d16", VFPV3_D16)
ARM_FPU_NAME("vfpv4", VFPV4)
ARM_FPU_NAME("vfpv4-d16", VFPV4_D16)
+ARM_FPU_NAME("fpv5-d16", FPV5_D16)
ARM_FPU_NAME("fp-armv8", FP_ARMV8)
ARM_FPU_NAME("neon", NEON)
ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4)
diff --git a/lib/Target/ARM/ARMFPUName.h b/lib/Target/ARM/ARMFPUName.h
index 2a64cce..86acffb 100644
--- a/lib/Target/ARM/ARMFPUName.h
+++ b/lib/Target/ARM/ARMFPUName.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMFPUNAME_H
-#define ARMFPUNAME_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
+#define LLVM_LIB_TARGET_ARM_ARMFPUNAME_H
namespace llvm {
namespace ARM {
@@ -23,4 +23,4 @@ enum FPUKind {
} // namespace ARM
} // namespace llvm
-#endif // ARMFPUNAME_H
+#endif
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index e2d90cd..a5f635e 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -92,11 +92,11 @@ class ARMFastISel final : public FastISel {
public:
explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
- : FastISel(funcInfo, libInfo),
- M(const_cast<Module&>(*funcInfo.Fn->getParent())),
- TM(funcInfo.MF->getTarget()),
- TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()) {
+ : FastISel(funcInfo, libInfo),
+ M(const_cast<Module &>(*funcInfo.Fn->getParent())),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getSubtargetImpl()->getInstrInfo()),
+ TLI(*TM.getSubtargetImpl()->getTargetLowering()) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
isThumb2 = AFI->isThumbFunction();
@@ -105,39 +105,39 @@ class ARMFastISel final : public FastISel {
// Code from FastISel.cpp.
private:
- unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
- unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
- unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
unsigned Op2, bool Op2IsKill);
- unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
- unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
- unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm);
// Backend specific FastISel code.
private:
- bool TargetSelectInstruction(const Instruction *I) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
- unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool fastSelectInstruction(const Instruction *I) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
- bool FastLowerArguments() override;
+ bool fastLowerArguments() override;
private:
#include "ARMGenFastISel.inc"
@@ -189,7 +189,9 @@ class ARMFastISel final : public FastISel {
unsigned ARMSelectCallOp(bool UseReg);
unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
- const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
+ const TargetLowering *getTargetLowering() {
+ return TM.getSubtargetImpl()->getTargetLowering();
+ }
// Call handling routines.
private:
@@ -283,7 +285,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
return MIB;
}
-unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill) {
unsigned ResultReg = createResultReg(RC);
@@ -305,7 +307,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
@@ -333,7 +335,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
@@ -365,7 +367,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
@@ -391,7 +393,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
@@ -421,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
@@ -511,7 +513,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
- return false;
+ return 0;
// If we can do this in a single instruction without a constant pool entry
// do so now.
@@ -534,7 +536,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
(ARM_AM::getSOImmVal(Imm) != -1);
if (UseImm) {
unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
- unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+ &ARM::GPRRegClass;
+ unsigned ImmReg = createResultReg(RC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ImmReg)
.addImm(Imm));
@@ -542,11 +546,16 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
}
}
+ unsigned ResultReg = 0;
+ if (Subtarget->useMovt(*FuncInfo.MF))
+ ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+
+ if (ResultReg)
+ return ResultReg;
+
// Load from constant pool. For now 32-bit only.
if (VT != MVT::i32)
- return false;
-
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ return 0;
// MachineConstantPool wants an explicit alignment.
unsigned Align = DL.getPrefTypeAlignment(C->getType());
@@ -555,21 +564,20 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
Align = DL.getTypeAllocSize(C->getType());
}
unsigned Idx = MCP.getConstantPoolIndex(C, Align);
-
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM::t2LDRpci), DestReg)
- .addConstantPoolIndex(Idx));
+ TII.get(ARM::t2LDRpci), ResultReg)
+ .addConstantPoolIndex(Idx));
else {
// The extra immediate is for addrmode2.
- DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
+ ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM::LDRcp), DestReg)
- .addConstantPoolIndex(Idx)
- .addImm(0));
+ TII.get(ARM::LDRcp), ResultReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
}
-
- return DestReg;
+ return ResultReg;
}
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
@@ -679,7 +687,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
return DestReg;
}
-unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -698,7 +706,7 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
-unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -901,7 +909,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
// Since the offset is too large for the load/store instruction
// get the reg+offset into a register.
if (needsLowering) {
- Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
/*Op0IsKill*/false, Addr.Offset, MVT::i32);
Addr.Offset = 0;
}
@@ -1074,7 +1082,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
unsigned ResultReg;
if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1276,7 +1284,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
@@ -1301,7 +1309,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
@@ -1309,7 +1317,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- FastEmitBranch(Target, DbgLoc);
+ fastEmitBranch(Target, DbgLoc);
return true;
}
@@ -1339,7 +1347,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
@@ -1497,13 +1505,13 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
(const TargetRegisterClass*)&ARM::GPRRegClass;
unsigned DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
- unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ unsigned ZeroReg = fastMaterializeConstant(Zero);
// ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
.addReg(ZeroReg).addImm(1)
.addImm(ARMPred).addReg(ARM::CPSR);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
@@ -1522,7 +1530,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VCVTDS), Result)
.addReg(Op));
- UpdateValueMap(I, Result);
+ updateValueMap(I, Result);
return true;
}
@@ -1541,7 +1549,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VCVTSD), Result)
.addReg(Op));
- UpdateValueMap(I, Result);
+ updateValueMap(I, Result);
return true;
}
@@ -1585,7 +1593,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg).addReg(FP));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1617,7 +1625,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
if (IntReg == 0) return false;
- UpdateValueMap(I, IntReg);
+ updateValueMap(I, IntReg);
return true;
}
@@ -1693,7 +1701,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
.addImm(ARMCC::EQ)
.addReg(ARM::CPSR);
}
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1783,7 +1791,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1825,7 +1833,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
.addReg(Op1).addReg(Op2));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1883,7 +1891,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
unsigned &NumBytes,
bool isVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
CCAssignFnForCall(CC, false, isVarArg));
@@ -1941,6 +1949,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
+ const Value *ArgVal = Args[VA.getValNo()];
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
@@ -1967,7 +1976,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
break;
}
case CCValAssign::BCvt: {
- unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
/*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
@@ -2001,6 +2010,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
} else {
assert(VA.isMemLoc());
// Need to store on the stack.
+
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
Address Addr;
Addr.BaseType = Address::RegBase;
Addr.Base.Reg = ARM::SP;
@@ -2026,7 +2040,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
// Copy all of the result registers out of their specified physreg.
@@ -2045,7 +2059,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
UsedRegs.push_back(RVLocs[1].getLocReg());
// Finally update the result.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
MVT CopyVT = RVLocs[0].getValVT();
@@ -2063,7 +2077,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
UsedRegs.push_back(RVLocs[0].getLocReg());
// Finally update the result.
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
}
}
@@ -2087,7 +2101,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
F.isVarArg()));
@@ -2192,7 +2206,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Can't handle non-double multi-reg retvals.
if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
if (RVLocs.size() >= 2 && RetVT != MVT::f64)
return false;
@@ -2303,7 +2317,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
RetVT != MVT::i16 && RetVT != MVT::i32) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
if (RVLocs.size() >= 2 && RetVT != MVT::f64)
return false;
@@ -2487,7 +2501,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
}
const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
+ static_cast<const ARMBaseRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = FramePtr;
@@ -2505,7 +2520,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
.addReg(SrcReg).addImm(0));
SrcReg = DestReg;
}
- UpdateValueMap(&I, SrcReg);
+ updateValueMap(&I, SrcReg);
return true;
}
case Intrinsic::memcpy:
@@ -2583,7 +2598,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
// Because the high bits are undefined, a truncate doesn't generate
// any code.
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -2745,7 +2760,7 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
MVT DestVT = DestEVT.getSimpleVT();
unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
if (ResultReg == 0) return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -2800,12 +2815,12 @@ bool ARMFastISel::SelectShift(const Instruction *I,
}
AddOptionalDefs(MIB);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// TODO: SoftFP support.
-bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
@@ -2983,7 +2998,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
return DestReg2;
}
-bool ARMFastISel::FastLowerArguments() {
+bool ARMFastISel::fastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
@@ -3050,7 +3065,7 @@ bool ARMFastISel::FastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
}
return true;
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index e191a3c..0c910ab 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_ARM_FEATURES_H
-#define TARGET_ARM_FEATURES_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFEATURES_H
+#define LLVM_LIB_TARGET_ARM_ARMFEATURES_H
#include "MCTargetDesc/ARMMCTargetDesc.h"
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a67b360..80add7a 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -47,7 +47,7 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// iOS requires FP not to be clobbered for backtracing purpose.
if (STI.isTargetIOS())
@@ -137,12 +137,27 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
}
static int sizeOfSPAdjustment(const MachineInstr *MI) {
- assert(MI->getOpcode() == ARM::VSTMDDB_UPD);
+ int RegSize;
+ switch (MI->getOpcode()) {
+ case ARM::VSTMDDB_UPD:
+ RegSize = 8;
+ break;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ RegSize = 4;
+ break;
+ case ARM::t2STR_PRE:
+ case ARM::STR_PRE_IMM:
+ return 4;
+ default:
+ llvm_unreachable("Unknown push or pop like instruction");
+ }
+
int count = 0;
// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
// pred) so the list starts at 4.
for (int i = MI->getNumOperands() - 1; i >= 4; --i)
- count += 8;
+ count += RegSize;
return count;
}
@@ -154,6 +169,46 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF,
return StackSizeInBytes >= 4096;
}
+namespace {
+struct StackAdjustingInsts {
+ struct InstInfo {
+ MachineBasicBlock::iterator I;
+ unsigned SPAdjust;
+ bool BeforeFPSet;
+ };
+
+ SmallVector<InstInfo, 4> Insts;
+
+ void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
+ bool BeforeFPSet = false) {
+ InstInfo Info = {I, SPAdjust, BeforeFPSet};
+ Insts.push_back(Info);
+ }
+
+ void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
+ auto Info = std::find_if(Insts.begin(), Insts.end(),
+ [&](InstInfo &Info) { return Info.I == I; });
+ assert(Info != Insts.end() && "invalid sp adjusting instruction");
+ Info->SPAdjust += ExtraBytes;
+ }
+
+ void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
+ DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) {
+ unsigned CFAOffset = 0;
+ for (auto &Info : Insts) {
+ if (HasFP && !Info.BeforeFPSet)
+ return;
+
+ CFAOffset -= Info.SPAdjust;
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
+ BuildMI(MBB, std::next(Info.I), dl,
+ TII.get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ }
+ }
+};
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -163,20 +218,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MCContext &Context = MMI.getContext();
const TargetMachine &TM = MF.getTarget();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = TM.getFrameLowering()->getStackAlignment();
+ unsigned Align =
+ TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
- int CFAOffset = 0;
// Determine the sizes of each callee-save spill areas and record which frame
// belongs to which callee-save spill areas.
@@ -189,15 +244,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
+ StackAdjustingInsts DefCFAOffsetCandidates;
+
// Allocate the vararg register save area.
if (ArgRegsSaveSize) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
- CFAOffset -= ArgRegsSaveSize;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true);
}
if (!AFI->hasStackFrame() &&
@@ -205,11 +258,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (NumBytes - ArgRegsSaveSize != 0) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
MachineInstr::FrameSetup);
- CFAOffset -= NumBytes - ArgRegsSaveSize;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI),
+ NumBytes - ArgRegsSaveSize, true);
}
return;
}
@@ -252,21 +302,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
// Move past area 1.
- MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
- DPRCSPush;
- if (GPRCS1Size > 0)
+ MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
+ if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
+ }
// Determine starting offsets of spill areas.
bool HasFP = hasFP(MF);
- unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size
- + GPRCS2Size + DPRCSSize);
- unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
- unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
+ unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
+ unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
+ unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign;
+ unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
- FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI)
- + GPRCS1Size + ArgRegsSaveSize;
+ FramePtrOffsetInPush =
+ MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -275,16 +327,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
// Move past area 2.
- if (GPRCS2Size > 0)
+ if (GPRCS2Size > 0) {
GPRCS2Push = LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ }
+
+ // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
+ // .cfi_offset operations will reflect that.
+ if (DPRGapSize) {
+ assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize))
+ DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
+ else {
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
+ MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
+ }
+ }
// Move past area 3.
if (DPRCSSize > 0) {
- DPRCSPush = MBBI;
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
- while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
+ DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI));
LastPush = MBBI++;
+ }
}
// Move past the aligned DPRCS2 area.
@@ -343,18 +411,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
NumBytes = 0;
}
- unsigned adjustedGPRCS1Size = GPRCS1Size;
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) {
- if (LastPush == GPRCS1Push) {
- FramePtrOffsetInPush += NumBytes;
- adjustedGPRCS1Size += NumBytes;
- NumBytes = 0;
- }
- } else
+ if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+ DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
+ else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
MachineInstr::FrameSetup);
+ DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
+ }
if (HasFP && isARM)
// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
@@ -368,13 +433,40 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
- if (adjustedGPRCS1Size > 0) {
- CFAOffset -= adjustedGPRCS1Size;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- MachineBasicBlock::iterator Pos = ++GPRCS1Push;
- BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it
+ // is in area one and the adjustment needs to take place just after
+ // that push.
+ if (HasFP) {
+ MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
+ unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push);
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
+ dl, TII, FramePtr, ARM::SP,
+ PushSize + FramePtrOffsetInPush,
+ MachineInstr::FrameSetup);
+ if (FramePtrOffsetInPush + PushSize != 0) {
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true),
+ -(ArgRegsSaveSize - FramePtrOffsetInPush)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ } else {
+ unsigned CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, MRI->getDwarfRegNum(FramePtr, true)));
+ BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+
+ // Now that the prologue's actual instructions are finalised, we can insert
+ // the necessary DWARF cf instructions to describe the situation. Start by
+ // recording where each register ended up:
+ if (GPRCS1Size > 0) {
+ MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
+ int CFIIndex;
for (const auto &Entry : CSI) {
unsigned Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
@@ -405,41 +497,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
- // Set FP to point to the stack slot that contains the previous FP.
- // For iOS, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not iOS, all the callee-saved registers go
- // into spill area 1, including the FP in R11. In either case, it
- // is in area one and the adjustment needs to take place just after
- // that push.
- if (HasFP) {
- emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII,
- FramePtr, ARM::SP, FramePtrOffsetInPush,
- MachineInstr::FrameSetup);
- if (FramePtrOffsetInPush) {
- CFAOffset += FramePtrOffsetInPush;
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
- BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- } else {
- unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
- nullptr, MRI->getDwarfRegNum(FramePtr, true)));
- BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
- }
-
if (GPRCS2Size > 0) {
- MachineBasicBlock::iterator Pos = ++GPRCS2Push;
- if (!HasFP) {
- CFAOffset -= GPRCS2Size;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
+ MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
for (const auto &Entry : CSI) {
unsigned Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
@@ -465,17 +524,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (DPRCSSize > 0) {
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
- do {
- MachineBasicBlock::iterator Push = DPRCSPush++;
- if (!HasFP) {
- CFAOffset -= sizeOfSPAdjustment(Push);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
- } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD);
-
+ MachineBasicBlock::iterator Pos = std::next(LastPush);
for (const auto &Entry : CSI) {
unsigned Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
@@ -485,21 +534,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Offset = MFI->getObjectOffset(FI);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
- BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
}
}
- if (NumBytes) {
- if (!HasFP) {
- CFAOffset -= NumBytes;
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
- BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
- }
+ // Now we can emit descriptions of where the canonical frame address was
+ // throughout the process. If we have a frame pointer, it takes over the job
+ // half-way through, so only the first few .cfi_def_cfa_offset instructions
+ // actually get emitted.
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
@@ -507,6 +552,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedGapSize(DPRGapSize);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
// If we need dynamic stack realignment, do it here. Be paranoid and make
@@ -574,14 +620,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
assert(!AFI->isThumb1OnlyFunction() &&
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -609,6 +658,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
NumBytes -= (ArgRegsSaveSize +
AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedGapSize() +
AFI->getDPRCalleeSavedAreaSize());
// Reset SP based on frame pointer only if the stack frame extends beyond
@@ -657,6 +707,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
MBBI++;
}
+ if (AFI->getDPRCalleeSavedGapSize()) {
+ assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
+ "unexpected DPR alignment gap");
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize());
+ }
+
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
@@ -717,8 +773,8 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg,
int SPAdj) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
int FPOffset = Offset - AFI->getFramePtrSpillOffset();
@@ -803,7 +859,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned NumAlignedDPRCS2Regs,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -876,7 +932,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool(*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
@@ -966,7 +1022,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineFrameInfo &MFI = *MF.getFrameInfo();
// Mark the D-register spill slots as properly aligned. Since MFI computes
@@ -1125,7 +1181,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Find the frame index assigned to d8.
int D8SpillFI = 0;
@@ -1340,12 +1396,15 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Don't bother if the default stack alignment is sufficiently high.
- if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+ if (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= 8)
return;
// Aligned spills require stack realignment.
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
if (!RegInfo->canRealignStack(MF))
return;
@@ -1384,10 +1443,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
unsigned NumGPRSpills = 0;
SmallVector<unsigned, 4> UnspilledCS1GPRs;
SmallVector<unsigned, 4> UnspilledCS2GPRs;
- const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1550,7 +1609,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// of GPRs, spill one extra callee save GPR so we won't have to pad between
// the integer and double callee save areas.
unsigned TargetAlign = getStackAlignment();
- if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
unsigned Reg = UnspilledCS1GPRs[i];
@@ -1628,7 +1687,7 @@ void ARMFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -1746,7 +1805,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MCContext &Context = MMI.getContext();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 709afbc..a83b773 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARM_FRAMEINFO_H
-#define ARM_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 0885c4e..0e4f81c 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -46,8 +46,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
const MCInstrDesc &LastMCID = LastMI->getDesc();
const TargetMachine &TM =
MI->getParent()->getParent()->getTarget();
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
// Skip over one non-VFP / NEON instruction.
if (!LastMI->isBarrier() &&
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index a8198e2..ccf09db 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMHAZARDRECOGNIZER_H
-#define ARMHAZARDRECOGNIZER_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
+#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -46,4 +46,4 @@ public:
} // end namespace llvm
-#endif // ARMHAZARDRECOGNIZER_H
+#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 38547cf..6941579 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -425,7 +425,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
return true;
if (Use->isMachineOpcode()) {
const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
- CurDAG->getTarget().getInstrInfo());
+ CurDAG->getSubtarget().getInstrInfo());
const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
if (MCID.mayStore())
@@ -526,8 +526,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -542,16 +541,15 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
}
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
+ int RHSC = (int)RHS->getSExtValue();
if (N.getOpcode() == ISD::SUB)
RHSC = -RHSC;
- if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -697,8 +695,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -718,8 +715,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -896,8 +892,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
@@ -911,8 +906,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -957,8 +951,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
@@ -975,8 +968,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1199,8 +1191,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1217,8 +1208,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1266,8 +1256,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
if (N.getOpcode() == ISD::FrameIndex) {
// Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1296,8 +1285,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1326,8 +1314,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -1392,10 +1379,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
OffReg = OffReg.getOperand(0);
else {
ShAmt = 0;
- ShOpcVal = ARM_AM::no_shift;
}
- } else {
- ShOpcVal = ARM_AM::no_shift;
}
}
@@ -1425,7 +1409,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32);
@@ -2361,6 +2345,25 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
+ unsigned LSB = 0;
+ if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
+ !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
+ return nullptr;
+
+ if (LSB + Width > 32)
+ return nullptr;
+
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ CurDAG->getTargetConstant(Width - 1, MVT::i32),
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+ }
+
return nullptr;
}
@@ -2457,10 +2460,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
if (UseCP) {
- SDValue CPIdx =
- CurDAG->getTargetConstantPool(ConstantInt::get(
- Type::getInt32Ty(*CurDAG->getContext()), Val),
- getTargetLowering()->getPointerTy());
+ SDValue CPIdx = CurDAG->getTargetConstantPool(
+ ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI->getPointerTy());
SDNode *ResNode;
if (Subtarget->isThumb()) {
@@ -2490,12 +2492,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::FrameIndex: {
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI,
- getTargetLowering()->getPointerTy());
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
if (Subtarget->isThumb1Only()) {
- SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
- getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops);
+ return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
@@ -2509,6 +2509,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
return I;
break;
+ case ISD::SIGN_EXTEND_INREG:
case ISD::SRA:
if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
return I;
@@ -3393,7 +3394,6 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
CurDAG->UpdateNodeOperands(GU, Ops);
- GU = T1.getNode();
}
else {
// For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4bfa5a8..0d0d81f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -70,9 +71,9 @@ namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C, ParmContext PC)
- : CCState(CC, isVarArg, MF, TM, locs, C) {
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ ParmContext PC)
+ : CCState(CC, isVarArg, MF, locs, C) {
assert(((PC == Call) || (PC == Prologue)) &&
"ARMCCState users must specify whether their context is call"
"or prologue generation.");
@@ -155,19 +156,11 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO())
- return new TargetLoweringObjectFileMachO();
- if (TT.isOSWindows())
- return new TargetLoweringObjectFileCOFF();
- return new ARMElfTargetObjectFile();
-}
-
-ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
- RegInfo = TM.getRegisterInfo();
- Itins = TM.getInstrItineraryData();
+ RegInfo = TM.getSubtargetImpl()->getRegisterInfo();
+ Itins = TM.getSubtargetImpl()->getInstrItineraryData();
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -312,6 +305,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Conversions between floating types.
// RTABI chapter 4.1.2, Table 7
{ RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Integer to floating-point conversions.
@@ -387,6 +381,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
+ // The half <-> float conversion functions are always soft-float, but are
+ // needed for some targets which use a hard-float calling convention by
+ // default.
+ if (Subtarget->isAAPCS_ABI()) {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
+ } else {
+ setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
@@ -394,10 +401,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
- if (!Subtarget->isFPOnlySP())
- addRegisterClass(MVT::f64, &ARM::DPRRegClass);
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -579,11 +583,50 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::ADDC);
+ if (Subtarget->isFPOnlySP()) {
+ // When targetting a floating-point unit with only single-precision
+ // operations, f64 is legal for the few double-precision instructions which
+ // are present However, no double-precision operations other than moves,
+ // loads and stores are provided by the hardware.
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ }
computeRegisterProperties();
- // ARM does not have f32 extending load.
+ // ARM does not have floating-point extending loads.
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+ // ... or truncating stores
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
// ARM does not have i1 sign extending load.
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -716,8 +759,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
- // the default expansion.
- if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // the default expansion. If we are targeting a single threaded system,
+ // then set them all for expand so we can lower them later into their
+ // non-atomic form.
+ if (TM.Options.ThreadModel == ThreadModel::Single)
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
// ATOMIC_FENCE needs custom lowering; the others should have been expanded
// to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
@@ -725,7 +772,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasV8Ops()) {
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
}
} else {
@@ -825,10 +872,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
}
- // Special handling for half-precision FP.
+
+ // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
+ if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ }
+
+ // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
- setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
- setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
}
@@ -836,7 +890,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ if (Subtarget->getTargetTriple().isiOS()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -844,6 +898,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
}
+ // FP-ARMv8 implements a lot of rounding-like FP operations.
+ if (Subtarget->hasFPARMv8()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ if (!Subtarget->isFPOnlySP()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
@@ -1119,7 +1190,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Load are scheduled for latency even if there instruction itinerary
// is not available.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
if (MCID.getNumDefs() == 0)
@@ -1256,8 +1328,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallResult(Ins,
CCAssignFnForNode(CallConv, /* Return*/ true,
isVarArg));
@@ -1417,8 +1489,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
@@ -1510,7 +1582,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// True if this byval aggregate will be split between registers
// and memory.
unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
- unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
if (CurByValIdx < ByValArgsCount) {
@@ -1646,14 +1718,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
bool isStub = (isExt && Subtarget->isTargetMachO()) &&
getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || isStub;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
- DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+ DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+ 0, ARMII::MO_NONLAZY));
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(), false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -1679,7 +1754,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isDirect = true;
bool isStub = Subtarget->isTargetMachO() &&
getTargetMachine().getRelocationModel() != Reloc::Static;
- isARMFunc = !Subtarget->isThumb() || isStub;
+ isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
@@ -1740,7 +1815,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
if (!isTailCall) {
const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
@@ -1940,17 +2016,30 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (Subtarget->isThumb1Only())
return false;
+ // Externally-defined functions with weak linkage should not be
+ // tail-called on ARM when the OS does not support dynamic
+ // pre-emption of symbols, as the AAELF spec requires normal calls
+ // to undefined weak functions to be replaced with a NOP or jump to the
+ // next instruction. The behaviour of branch instructions in this
+ // situation (as used for tail calls) is implementation-defined, so we
+ // cannot rely on the linker replacing the tail call with a return.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ if (GV->hasExternalWeakLinkage())
+ return false;
+ }
+
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
+ ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext(), Call);
CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
- ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
+ ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext(), Call);
CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
@@ -1984,8 +2073,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CalleeCC, false, isVarArg));
if (CCInfo.getNextStackOffset()) {
@@ -1995,7 +2084,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
@@ -2038,7 +2128,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
isVarArg));
}
@@ -2086,8 +2176,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext(), Call);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext(), Call);
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
@@ -2098,6 +2188,10 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
bool isLittleEndian = Subtarget->isLittle();
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ AFI->setReturnRegsCount(RVLocs.size());
+
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
@@ -2216,9 +2310,15 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
Copy = *UI;
- else
+ else {
+ // We are at the top of this chain.
+ // If the copy has a glue operand, we conservatively assume it
+ // isn't safe to perform a tail call.
+ if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
// First CopyToReg
TCChain = UseChain;
+ }
}
} else if (Copy->getOpcode() == ISD::BITCAST) {
// f32 returned in a single GPR.
@@ -2227,6 +2327,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
Copy = *Copy->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+ return false;
TCChain = Copy->getOperand(0);
} else {
return false;
@@ -2567,9 +2671,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::arm_rbit: {
- assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ assert(Op.getOperand(1).getValueType() == MVT::i32 &&
"RBIT intrinsic must have i32 type!");
- return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+ return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
}
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -2626,7 +2730,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
- unsigned Domain = ARM_MB::ISH;
+ ARM_MB::MemBOpt Domain = ARM_MB::ISH;
if (Subtarget->isMClass()) {
// Only a full system barrier exists in the M-class architectures.
Domain = ARM_MB::SY;
@@ -2739,7 +2843,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
}
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
ArgRegsSize = NumGPRs * 4;
// If parameter is split between stack and GPRs...
@@ -2916,8 +3023,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
+ ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), Prologue);
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
@@ -2951,7 +3058,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (Flags.isByVal()) {
unsigned ExtraArgRegsSize;
unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(),
+ computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
Flags.getByValSize(),
ExtraArgRegsSize, ExtraArgRegsSaveSize);
@@ -2966,7 +3073,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
}
CCInfo.rewindByValRegsInfo();
lastInsIndex = -1;
- if (isVarArg) {
+ if (isVarArg && MFI->hasVAStart()) {
unsigned ExtraArgRegsSize;
unsigned ExtraArgRegsSaveSize;
computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
@@ -3075,7 +3182,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
int FrameIndex = StoreByValRegs(
@@ -3108,7 +3215,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
}
// varargs
- if (isVarArg)
+ if (isVarArg && MFI->hasVAStart())
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset(),
TotalArgRegsSaveSize);
@@ -3130,6 +3237,18 @@ static bool isFloatingPointZero(SDValue Op) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
return CFP->getValueAPF().isPosZero();
}
+ } else if (Op->getOpcode() == ISD::BITCAST &&
+ Op->getValueType(0) == MVT::f64) {
+ // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
+ // created by LowerConstantFP().
+ SDValue BitcastOp = Op->getOperand(0);
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
+ SDValue MoveOp = BitcastOp->getOperand(0);
+ if (MoveOp->getOpcode() == ISD::TargetConstant &&
+ cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
+ return true;
+ }
+ }
}
return false;
}
@@ -3198,6 +3317,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
SDLoc dl) const {
+ assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
@@ -3313,9 +3433,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
- return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
- ARMcc, CCR, OverflowCmp);
-
+ return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
+ OverflowCmp, DAG);
}
// Convert:
@@ -3349,7 +3468,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = Cond.getOperand(3);
SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
assert(True.getValueType() == VT);
- return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
}
}
}
@@ -3419,6 +3538,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
}
+SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
+ SDValue TrueVal, SDValue ARMcc, SDValue CCR,
+ SDValue Cmp, SelectionDAG &DAG) const {
+ if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+ FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
+ TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
+
+ SDValue TrueLow = TrueVal.getValue(0);
+ SDValue TrueHigh = TrueVal.getValue(1);
+ SDValue FalseLow = FalseVal.getValue(0);
+ SDValue FalseHigh = FalseVal.getValue(1);
+
+ SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
+ ARMcc, CCR, Cmp);
+ SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
+ ARMcc, CCR, duplicateCmp(Cmp, DAG));
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
+ } else {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -3428,6 +3573,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue FalseVal = Op.getOperand(3);
SDLoc dl(Op);
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
if (LHS.getValueType() == MVT::i32) {
// Try to generate VSEL on ARMv8.
// The VSEL instruction can't use all the usual ARM condition
@@ -3452,8 +3609,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
- Cmp);
+ return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -3468,12 +3624,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// select c, a, b
// We only do this in unsafe-fp-math, because signed zeros and NaNs are
// handled differently than the original code sequence.
- if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
- RHS == FalseVal) {
- if (CC == ISD::SETOGT || CC == ISD::SETUGT)
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETOLT || CC == ISD::SETULT)
- return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ if (getTargetMachine().Options.UnsafeFPMath) {
+ if (LHS == TrueVal && RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ } else if (LHS == FalseVal && RHS == TrueVal) {
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
}
bool swpCmpOps = false;
@@ -3492,14 +3654,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMcc, CCR, Cmp);
+ SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
- Result = DAG.getNode(ARMISD::CMOV, dl, VT,
- Result, TrueVal, ARMcc2, CCR, Cmp2);
+ Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
}
@@ -3632,6 +3792,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
@@ -3724,11 +3896,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
SDLoc dl(Op);
unsigned Opc;
@@ -3778,11 +3962,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(Opc, dl, VT, Op);
}
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
SDLoc dl(Op);
unsigned Opc;
@@ -4291,7 +4487,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
- if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+ if (Op1.getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
@@ -4555,6 +4751,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ // Use the default (constant pool) lowering for double constants when we have
+ // an SP-only FPU
+ if (IsDouble && Subtarget->isFPOnlySP())
+ return SDValue();
+
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
@@ -5733,7 +5934,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
// operation legalization where we can't create illegal types.
return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
- LD->getMemoryVT(), LD->isVolatile(),
+ LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(),
LD->isNonTemporal(), LD->getAlignment());
}
@@ -6088,7 +6289,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
// Create stack object for sret.
const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
@@ -6258,6 +6459,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
return LowerDYNAMIC_STACKALLOC(Op, DAG);
llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
}
}
@@ -6294,7 +6497,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
void ARMTargetLowering::
SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6377,9 +6581,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3, RegState::Kill));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
- .addFrameIndex(FI)
- .addImm(36)); // &jbuf[1] :: pc
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
+ .addFrameIndex(FI)
+ .addImm(36); // &jbuf[1] :: pc
AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
.addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg5, RegState::Kill)
@@ -6409,7 +6613,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
MachineBasicBlock *ARMTargetLowering::
EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
@@ -6738,16 +6943,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
for (std::vector<MachineBasicBlock*>::iterator
I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
MachineBasicBlock *CurMBB = *I;
- if (SeenMBBs.insert(CurMBB))
+ if (SeenMBBs.insert(CurMBB).second)
DispContBB->addSuccessor(CurMBB);
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
- for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
- I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
- MachineBasicBlock *BB = *I;
+ for (MachineBasicBlock *BB : InvokeBBs) {
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
@@ -6926,7 +7129,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
@@ -7160,7 +7364,7 @@ MachineBasicBlock *
ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const TargetMachine &TM = getTargetMachine();
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(Subtarget->isTargetWindows() &&
@@ -7216,8 +7420,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
ARM::SP)
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::R4, RegState::Kill)));
+ .addReg(ARM::SP).addReg(ARM::R4)));
MI->eraseFromParent();
return MBB;
@@ -7226,7 +7429,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
@@ -7479,12 +7683,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- if (!MI->hasPostISelHook()) {
- assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
- "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
- return;
- }
-
const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -7496,8 +7694,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
if (NewOpc) {
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
+ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
MCID = &TII->get(NewOpc);
assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
@@ -8398,10 +8596,11 @@ static SDValue PerformBFICombine(SDNode *N,
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
- if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
// vmovrrd(load f64) -> (load i32), (load i32)
@@ -8432,8 +8631,6 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
- DCI.RemoveFromWorklist(LD);
- DAG.DeleteNode(LD);
return Result;
}
@@ -8596,7 +8793,7 @@ static SDValue PerformSTORECombine(SDNode *N,
return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), St->getAlignment(),
- St->getTBAAInfo());
+ St->getAAInfo());
}
/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
@@ -8616,7 +8813,8 @@ static bool hasNormalLoadOperand(SDNode *N) {
/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
/// ISD::BUILD_VECTOR.
static SDValue PerformBUILD_VECTORCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI){
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
// into a pair of GPRs, which is fine when the value is used as a scalar,
@@ -8922,7 +9120,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
Tys[n] = VecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2));
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
@@ -9001,7 +9199,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1));
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
@@ -9631,10 +9829,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
- case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
case ISD::STORE: return PerformSTORECombine(N, DCI);
- case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
@@ -9686,8 +9884,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned,
- bool *Fast) const {
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
@@ -9741,11 +9941,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) {
+ (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
return MVT::v2f64;
} else if (Size >= 8 &&
(memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) {
+ (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+ Fast))) {
return MVT::f64;
}
}
@@ -10348,6 +10549,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return RCPair(0U, &ARM::hGPRRegClass);
break;
case 'r':
+ if (Subtarget->isThumb1Only())
+ return RCPair(0U, &ARM::tGPRRegClass);
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
if (VT == MVT::Other)
@@ -10552,7 +10755,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
- "Invalid opcode for Div/Rem lowering");
+ "Invalid opcode for Div/Rem lowering");
bool isSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
@@ -10560,10 +10763,10 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
RTLIB::Libcall LC;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
- case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
- case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
- case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
}
SDValue InChain = DAG.getEntryNode();
@@ -10583,7 +10786,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy());
- Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
+ Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -10611,7 +10814,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
Flag = Chain.getValue(1);
- SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
@@ -10621,6 +10824,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, DL);
}
+SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_EXTEND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+}
+
+SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(0).getValueType() == MVT::f64 &&
+ Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_ROUND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -10648,7 +10876,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return false;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
- if (VT == MVT::f64)
+ if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
@@ -10775,31 +11003,154 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
- // Loads and stores less than 64-bits are already atomic; ones above that
- // are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
- // anything for those.
- bool IsMClass = Subtarget->isMClass();
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- return Size == 64 && !IsMClass;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass;
+bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
+
+Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
+ ARM_MB::MemBOpt Domain) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+
+ // First, if the target has no DMB, see what fallback we can use.
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
+ Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
+ Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
+ Builder.getInt32(0), Builder.getInt32(7),
+ Builder.getInt32(10), Builder.getInt32(5)};
+ return Builder.CreateCall(MCR, args);
+ } else {
+ // Instead of using barriers, atomic accesses on these subtargets use
+ // libcalls.
+ llvm_unreachable("makeDMB on a target so old that it has no barriers");
+ }
+ } else {
+ Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
+ Constant *CDomain = Builder.getInt32(Domain);
+ return Builder.CreateCall(DMB, CDomain);
+ }
+}
+
+// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/non-atomic");
+ case Monotonic:
+ case Acquire:
+ return nullptr; // Nothing to do
+ case SequentiallyConsistent:
+ if (!IsStore)
+ return nullptr; // Nothing to do
+ /*FALLTHROUGH*/
+ case Release:
+ case AcquireRelease:
+ if (Subtarget->isSwift())
+ return makeDMB(Builder, ARM_MB::ISHST);
+ // FIXME: add a comment with a link to documentation justifying this.
+ else
+ return makeDMB(Builder, ARM_MB::ISH);
+ }
+ llvm_unreachable("Unknown fence ordering in emitLeadingFence");
+}
+
+Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (!getInsertFencesForAtomic())
+ return nullptr;
+
+ switch (Ord) {
+ case NotAtomic:
+ case Unordered:
+ llvm_unreachable("Invalid fence: unordered/not-atomic");
+ case Monotonic:
+ case Release:
+ return nullptr; // Nothing to do
+ case Acquire:
+ case AcquireRelease:
+ case SequentiallyConsistent:
+ return makeDMB(Builder, ARM_MB::ISH);
}
+ llvm_unreachable("Unknown fence ordering in emitTrailingFence");
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return (Size == 64) && !Subtarget->isMClass();
+}
+
+// Loads and stores less than 64-bits are already atomic; ones above that
+// are doomed anyway, so defer to the default libcall and blame the OS when
+// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+// anything for those.
+// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
+// guarantee, see DDI0406C ARM architecture reference manual,
+// sections A8.8.72-74 LDRD)
+bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ unsigned Size = LI->getType()->getPrimitiveSizeInBits();
+ return (Size == 64) && !Subtarget->isMClass();
+}
+
+// For the real atomic operations, we have ldrex/strex up to 32 bits,
+// and up to 64 bits on the non-M profiles
+bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ unsigned Size = AI->getType()->getPrimitiveSizeInBits();
+ return Size <= (Subtarget->isMClass() ? 32U : 64U);
+}
+
+// This has so far only been implemented for MachO.
+bool ARMTargetLowering::useLoadStackGuardNode() const {
+ return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO;
+}
+
+bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const {
+ // If we do not have NEON, vector types are not natively supported.
+ if (!Subtarget->hasNEON())
+ return false;
- // For the real atomic operations, we have ldrex/strex up to 32 bits,
- // and up to 64 bits on the non-M profiles
- unsigned AtomicLimit = IsMClass ? 32 : 64;
- return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
+ // Floating point values and vector values map to the same register file.
+ // Therefore, althought we could do a store extract of a vector type, this is
+ // better to leave at float as we have more freedom in the addressing mode for
+ // those.
+ if (VectorTy->isFPOrFPVectorTy())
+ return false;
+
+ // If the index is unknown at compile time, this is very expensive to lower
+ // and it is not possible to combine the store with the extract.
+ if (!isa<ConstantInt>(Idx))
+ return false;
+
+ assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ // We can do a store + vector extract on any vector that fits perfectly in a D
+ // or Q register.
+ if (BitWidth == 64 || BitWidth == 128) {
+ Cost = 0;
+ return true;
+ }
+ return false;
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire =
- Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsAcquire = isAtLeastAcquire(Ord);
// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i32, i32} and we have to recombine them into a
@@ -10835,8 +11186,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease =
- Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+ bool IsRelease = isAtLeastRelease(Ord);
// Since the intrinsics must have legal type, the i64 intrinsics take two
// parameters: "i32, i32". We must marshal Val into the appropriate form
@@ -10934,6 +11284,6 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
HABaseType Base = HA_UNKNOWN;
uint64_t Members = 0;
bool result = isHomogeneousAggregate(Ty, Base, Members);
- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+ DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
return result;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 1ace0f3..89b0c31 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMISELLOWERING_H
-#define ARMISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
+#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -232,7 +232,7 @@ namespace llvm {
class ARMTargetLowering : public TargetLowering {
public:
- explicit ARMTargetLowering(TargetMachine &TM);
+ explicit ARMTargetLowering(const TargetMachine &TM);
unsigned getJumpTableEncoding() const override;
@@ -266,11 +266,12 @@ namespace llvm {
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
- /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// allowsMisalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses of the specified type. Returns whether it
/// is "fast" by reference in the second argument.
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- bool *Fast) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ unsigned Align,
+ bool *Fast) const override;
EVT getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
@@ -391,12 +392,26 @@ namespace llvm {
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
+ bool hasLoadLinkedStoreConditional() const override;
+ Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
+ bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
+ bool useLoadStackGuardNode() const override;
+
+ bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+ unsigned &Cost) const override;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
@@ -473,6 +488,10 @@ namespace llvm {
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -562,6 +581,9 @@ namespace llvm {
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
+ SDValue ARMcc, SDValue CCR, SDValue Cmp,
+ SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 59e9260..7d27cf3 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -203,6 +203,16 @@ def msr_mask : Operand<i32> {
let ParserMatchClass = MSRMaskOperand;
}
+def BankedRegOperand : AsmOperandClass {
+ let Name = "BankedReg";
+ let ParserMethod = "parseBankedRegOperand";
+}
+def banked_reg : Operand<i32> {
+ let PrintMethod = "printBankedRegOperand";
+ let DecoderMethod = "DecodeBankedReg";
+ let ParserMatchClass = BankedRegOperand;
+}
+
// Shift Right Immediate - A shift right immediate is encoded differently from
// other shift immediates. The imm6 field is encoded like so:
//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index f235ac2..17d1ffa 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -90,6 +90,49 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
+void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>();
+
+ if (!Subtarget.useMovt(MF)) {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM);
+ return;
+ }
+
+ if (RM != Reloc::PIC_) {
+ expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM);
+ return;
+ }
+
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+ if (!Subtarget.GVIsIndirectSymbol(GV, RM)) {
+ expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM);
+ return;
+ }
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Reg = MI->getOperand(0).getReg();
+ MachineInstrBuilder MIB;
+
+ MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
+ .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MIB.addMemOperand(MMO);
+ MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ AddDefaultPred(MIB);
+}
+
namespace {
/// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for ARM ELF.
@@ -113,8 +156,9 @@ namespace {
ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
*Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
- unsigned Align = TM->getDataLayout()
- ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+ unsigned Align =
+ TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment(
+ Type::getInt32PtrTy(*Context));
unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
MachineBasicBlock &FirstMBB = MF.front();
@@ -124,7 +168,7 @@ namespace {
MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
ARM::t2LDRpci : ARM::LDRcp;
- const TargetInstrInfo &TII = *TM->getInstrInfo();
+ const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
TII.get(Opc), TempReg)
.addConstantPoolIndex(Idx);
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index b09958a..90f34ea 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMINSTRUCTIONINFO_H
-#define ARMINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
@@ -37,6 +37,10 @@ public:
/// always be able to get register info as well (through this method).
///
const ARMRegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2bb8976..3177114 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -241,6 +241,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
AssemblerPredicate<"FeatureMP",
"mp-extensions">;
+def HasVirtualization: Predicate<"false">,
+ AssemblerPredicate<"FeatureVirtualization",
+ "virtualization-extensions">;
def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
AssemblerPredicate<"FeatureTrustZone",
"TrustZone">;
@@ -633,6 +636,8 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
let ParserMatchClass = Imm32AsmOperand;
}
+def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>;
+
/// imm1_7 predicate - Immediate in the range [1,7].
def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
@@ -1961,7 +1966,7 @@ def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
}
def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
- []>, Requires<[IsARM, HasV7]> {
+ [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> {
bits<4> opt;
let Inst{27-4} = 0b001100100000111100001111;
let Inst{3-0} = opt;
@@ -2708,7 +2713,8 @@ multiclass AI2_stridx<bit isByte, string opc,
def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
StFrm, iii,
- opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ opc, "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
let Inst{23} = addr{12}; // U (add = ('U' == 1))
@@ -2720,7 +2726,8 @@ multiclass AI2_stridx<bit isByte, string opc,
def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, ldst_so_reg:$addr),
IndexModePre, StFrm, iir,
- opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ opc, "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 1;
let Inst{23} = addr{12}; // U (add = ('U' == 1))
@@ -2733,7 +2740,7 @@ multiclass AI2_stridx<bit isByte, string opc,
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
IndexModePost, StFrm, iir,
opc, "\t$Rt, $addr, $offset",
- "$addr.base = $Rn_wb", []> {
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
bits<14> offset;
@@ -2751,7 +2758,7 @@ multiclass AI2_stridx<bit isByte, string opc,
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
IndexModePost, StFrm, iii,
opc, "\t$Rt, $addr, $offset",
- "$addr.base = $Rn_wb", []> {
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
// {12} isAdd
// {11-0} imm12/Rm
bits<14> offset;
@@ -2828,7 +2835,8 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
StMiscFrm, IIC_iStore_bh_ru,
- "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ "strh", "\t$Rt, $addr!",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
bits<14> addr;
let Inst{23} = addr{8}; // U bit
let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
@@ -2841,7 +2849,8 @@ def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset),
IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
- "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb",
+ "strh", "\t$Rt, $addr, $offset",
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
addr_offset_none:$addr,
am3offset:$offset))]> {
@@ -3417,7 +3426,8 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR),
(SBCri GPR:$src, so_imm_not:$imm)>;
def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR),
- (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>;
+ (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>,
+ Requires<[IsARM, HasV6T2]>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -3932,14 +3942,12 @@ multiclass AI_smul<string opc, PatFrag opnode> {
def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (sra (opnode GPR:$Rn,
- (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ []>,
Requires<[IsARM, HasV5TE]>;
def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (sra (opnode GPR:$Rn,
- (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ []>,
Requires<[IsARM, HasV5TE]>;
}
@@ -3981,17 +3989,13 @@ multiclass AI_smla<string opc, PatFrag opnode> {
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
- (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
+ []>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set GPRnopc:$Rd,
- (add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
- (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
+ []>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
}
}
@@ -4111,7 +4115,7 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
// Misc. Arithmetic Instructions.
//
-def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "clz", "\t$Rd, $Rm",
[(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
Sched<[WriteALU]>;
@@ -4629,7 +4633,7 @@ def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Acquire || Ordering == SequentiallyConsistent;
+ return isAtLeastAcquire(Ordering);
}]>;
def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
@@ -4639,7 +4643,7 @@ def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Release || Ordering == SequentiallyConsistent;
+ return isAtLeastRelease(Ordering);
}]>;
def atomic_store_release_8 : releasing_store<atomic_store_8>;
@@ -5060,12 +5064,31 @@ def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary,
let Unpredictable{11-0} = 0b110100001111;
}
+// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked),
+ NoItinerary, "mrs", "\t$Rd, $banked", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rd;
+
+ let Inst{23} = 0;
+ let Inst{22} = banked{5}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = Rd;
+ let Inst{11-9} = 0b001;
+ let Inst{8} = banked{4};
+ let Inst{7-0} = 0b00000000;
+}
+
// Move from ARM core register to Special Register
//
-// No need to have both system and application versions, the encodings are the
-// same and the assembly parser has no way to distinguish between them. The mask
-// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
-// the mask with the fields to be accessed in the special register.
+// No need to have both system and application versions of MSR (immediate) or
+// MSR (register), the encodings are the same and the assembly parser has no way
+// to distinguish between them. The mask operand contains the special register
+// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be
+// accessed in the special register.
def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
"msr", "\t$mask, $Rn", []> {
bits<5> mask;
@@ -5093,6 +5116,25 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
let Inst{11-0} = a;
}
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn),
+ NoItinerary, "msr", "\t$banked, $Rn", []>,
+ Requires<[IsARM, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rn;
+
+ let Inst{23} = 0;
+ let Inst{22} = banked{5}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-9} = 0b001;
+ let Inst{8} = banked{4};
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = Rn;
+}
+
// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
// are needed to probe the stack when allocating more than
// 4k bytes in one go. Touching the stack at 4K increments is necessary to
@@ -5278,11 +5320,6 @@ def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
(SMULTB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
(SMULTB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (i32 16)),
- (SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
- (SMULWB GPR:$a, GPR:$b)>;
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
@@ -5305,13 +5342,6 @@ def : ARMV5MOPat<(add GPR:$acc,
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
- (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (i32 16))),
- (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
- (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
- (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
// Pre-v7 uses MCR for synchronization barriers.
@@ -5591,3 +5621,8 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
// is discarded.
def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
ComplexDeprecationPredicate<"IT">;
+
+let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
+def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
+ NoItinerary,
+ [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index c02bb3b..a0c627c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -34,6 +34,14 @@ def nImmSplatI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmSplatI32AsmOperand;
}
+def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
+def nImmSplatNotI16 : Operand<i32> {
+ let ParserMatchClass = nImmSplatNotI16AsmOperand;
+}
+def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
+def nImmSplatNotI32 : Operand<i32> {
+ let ParserMatchClass = nImmSplatNotI32AsmOperand;
+}
def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
def nImmVMOVI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
@@ -4376,7 +4384,7 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlsl", "s", null_frag>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
@@ -5429,7 +5437,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
imm:$lane))]>,
- Requires<[HasNEON, HasFastVGETLNi32]> {
+ Requires<[HasVFP2, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
@@ -5497,8 +5505,12 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
(ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
[(set DPR:$V, (insertelt (v2i32 DPR:$src1),
- GPR:$R, imm:$lane))]> {
+ GPR:$R, imm:$lane))]>,
+ Requires<[HasVFP2]> {
let Inst{21} = lane{0};
+ // This instruction is equivalent as
+ // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
+ let isInsertSubreg = 1;
}
}
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
@@ -6635,6 +6647,16 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
(VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+// ... immediates
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+ (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+ (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
+ (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
+def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
+ (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
+
// VLD1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e17f73a..a867844 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -311,7 +311,7 @@ def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
}
def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
- []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> {
+ []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated<HasV8Ops> {
bits<1> end;
// A8.6.156
let Inst{9-5} = 0b10010;
@@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
let DecoderMethod = "DecodeThumbAddSpecialReg";
}
+// Thumb1 frame lowering is rather fragile, we hope to be able to use
+// tADDrSPi, but we may need to insert a sequence that clobbers CPSR.
+def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset),
+ NoItinerary, []>,
+ Requires<[IsThumb, IsThumb1Only]> {
+ let Defs = [CPSR];
+}
+
// ADD sp, sp, #<imm7>
def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "add", "\t$Rdn, $imm", []>,
@@ -466,7 +474,7 @@ let isCall = 1,
(outs), (ins pred:$p, t_blxtarget:$func), IIC_Br,
"blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> {
+ Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> {
bits<24> func;
let Inst{26} = func{23};
let Inst{25-16} = func{20-11};
@@ -1355,7 +1363,7 @@ def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
Requires<[IsThumb]>;
def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
- Requires<[IsThumb, HasV5T]>;
+ Requires<[IsThumb, HasV5T, IsNotMClass]>;
// Indirect calls to ARM routines
def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 85e9351..807c252 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1262,15 +1262,15 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR,
// Loads with zero extension
defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(zextloadi16 node:$Src)>>;
+ GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>;
defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(zextloadi8 node:$Src)>>;
+ GPRnopc, UnOpFrag<(zextloadi8 node:$Src)>>;
// Loads with sign extension
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(sextloadi16 node:$Src)>>;
+ GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
- GPR, UnOpFrag<(sextloadi8 node:$Src)>>;
+ GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
@@ -1973,6 +1973,16 @@ def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
+// A simple right-shift can also be used in most cases (the exception is the
+// SXTH operations with a rotate of 24: there the non-contiguous bits are
+// relevant).
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+
// Zero extenders
let AddedComplexity = 16 in {
@@ -1999,8 +2009,16 @@ def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
}
+
//===----------------------------------------------------------------------===//
// Arithmetic Instructions.
//
@@ -2708,8 +2726,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
- (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -2721,8 +2738,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
- (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -2791,8 +2807,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WB : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
- (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -2804,8 +2819,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WT : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
- (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+ []>,
Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -3291,7 +3305,8 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
(ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
- [], {?, ?, ?, ?}> {
+ [], {?, ?, ?, ?}>,
+ Requires<[IsThumb2, IsNotMClass]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
@@ -3367,7 +3382,8 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
- {?, ?, ?, ?}> {
+ {?, ?, ?, ?}>,
+ Requires<[IsThumb2, IsNotMClass]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
@@ -3614,7 +3630,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
- Sched<[WriteBr]> {
+ Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> {
bits<4> func;
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -3656,7 +3672,8 @@ let isBranch = 1, isTerminator = 1 in {
// operands, create 3 versions of the same instruction. Once there's a clean
// framework to represent optional operands, change this behavior.
class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
- !strconcat("cps", asm_op), []> {
+ !strconcat("cps", asm_op), []>,
+ Requires<[IsThumb2, IsNotMClass]> {
bits<2> imod;
bits<3> iflags;
bits<5> mode;
@@ -3702,7 +3719,8 @@ def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
let Predicates = [IsThumb2, HasV8];
}
-def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
+ [(int_arm_dbg imm0_15:$opt)]> {
bits<4> opt;
let Inst{31-20} = 0b111100111010;
let Inst{19-16} = 0b1111;
@@ -3739,7 +3757,8 @@ def t2DCPS3 : T2DCPS<0b11, "dcps3">;
class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+ : T2I<oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsThumb2,IsNotMClass]> {
bits<5> mode;
let Inst{31-25} = 0b1110100;
let Inst{24-23} = Op;
@@ -3770,7 +3789,8 @@ def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
// Return From Exception is a system instruction.
class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+ : T2I<oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsThumb2,IsNotMClass]> {
let Inst{31-20} = op31_20{11-0};
bits<4> Rn;
@@ -3797,7 +3817,7 @@ let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
"subs", "\tpc, lr, $imm",
[(ARMintretflag imm0_255:$imm)]>,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2,IsNotMClass]> {
let Inst{31-8} = 0b111100111101111010001111;
bits<8> imm;
@@ -3941,10 +3961,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
-defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>;
-defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>;
//===----------------------------------------------------------------------===//
@@ -3960,7 +3980,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
bits<4> Rd;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
- let Inst{7-0} = 0b0000;
+ let Inst{7-0} = 0b00000000;
}
def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
@@ -3970,22 +3990,41 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
bits<4> Rd;
let Inst{31-12} = 0b11110011111111111000;
let Inst{11-8} = Rd;
- let Inst{7-0} = 0b0000;
+ let Inst{7-0} = 0b00000000;
+}
+
+def t2MRSbanked : T2I<(outs rGPR:$Rd), (ins banked_reg:$banked),
+ NoItinerary, "mrs", "\t$Rd, $banked", []>,
+ Requires<[IsThumb, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rd;
+
+ let Inst{31-21} = 0b11110011111;
+ let Inst{20} = banked{5}; // R bit
+ let Inst{19-16} = banked{3-0};
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = Rd;
+ let Inst{7-5} = 0b001;
+ let Inst{4} = banked{4};
+ let Inst{3-0} = 0b0000;
}
+
// M class MRS.
//
// This MRS has a mask field in bits 7-0 and can take more values than
// the A/R class (a full msr_mask).
-def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
- "mrs", "\t$Rd, $mask", []>,
+def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary,
+ "mrs", "\t$Rd, $SYSm", []>,
Requires<[IsThumb,IsMClass]> {
bits<4> Rd;
- bits<8> mask;
+ bits<8> SYSm;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
- let Inst{19-16} = 0b1111;
- let Inst{7-0} = mask;
+ let Inst{7-0} = SYSm;
+
+ let Unpredictable{20-16} = 0b11111;
+ let Unpredictable{13} = 0b1;
}
@@ -4010,6 +4049,25 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
let Inst{7-0} = 0;
}
+// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a
+// separate encoding (distinguished by bit 5.
+def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn),
+ NoItinerary, "msr", "\t$banked, $Rn", []>,
+ Requires<[IsThumb, HasVirtualization]> {
+ bits<6> banked;
+ bits<4> Rn;
+
+ let Inst{31-21} = 0b11110011100;
+ let Inst{20} = banked{5}; // R bit
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-8} = banked{3-0};
+ let Inst{7-5} = 0b001;
+ let Inst{4} = banked{4};
+ let Inst{3-0} = 0b0000;
+}
+
+
// M class MSR.
//
// Move from ARM core register to Special Register
@@ -4022,7 +4080,13 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn),
let Inst{20} = 0b0;
let Inst{19-16} = Rn;
let Inst{15-12} = 0b1000;
- let Inst{11-0} = SYSm;
+ let Inst{11-10} = SYSm{11-10};
+ let Inst{9-8} = 0b00;
+ let Inst{7-0} = SYSm{7-0};
+
+ let Unpredictable{20} = 0b1;
+ let Unpredictable{13} = 0b1;
+ let Unpredictable{9-8} = 0b11;
}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 1d7802a..d78f2ac 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -515,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
let Inst{5} = Sm{0};
let Inst{15-12} = Dd{3-0};
let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Special case encoding: bits 11-8 is 0b1011.
@@ -551,12 +553,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def : Pat<(f32_to_f16 SPR:$a),
- (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def : Pat<(f16_to_f32 GPR:$a),
- (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
@@ -619,26 +615,42 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
let Inst{5} = Dm{4};
}
-multiclass vcvt_inst<string opc, bits<2> rm> {
+def : Pat<(fp_to_f16 SPR:$a),
+ (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(fp_to_f16 (f64 DPR:$a)),
+ (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+
+def : Pat<(f16_to_fp GPR:$a),
+ (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : Pat<(f64 (f16_to_fp GPR:$a)),
+ (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+multiclass vcvt_inst<string opc, bits<2> rm,
+ SDPatternOperator node = null_frag> {
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
- []>, Requires<[HasFPARMv8]> {
+ [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
- []>, Requires<[HasFPARMv8]> {
+ [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -652,7 +664,8 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -665,10 +678,10 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
}
}
-defm VCVTA : vcvt_inst<"a", 0b00>;
+defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
defm VCVTN : vcvt_inst<"n", 0b01>;
-defm VCVTP : vcvt_inst<"p", 0b10>;
-defm VCVTM : vcvt_inst<"m", 0b11>;
+defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
+defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
@@ -684,18 +697,20 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
let D = VFPNeonA8Domain;
}
-multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
+multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
- []>, Requires<[HasFPARMv8]> {
+ [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{7} = op2;
let Inst{16} = op;
}
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{7} = op2;
let Inst{16} = op;
}
@@ -708,22 +723,25 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
Requires<[HasFPARMv8,HasDPVFP]>;
}
-defm VRINTZ : vrint_inst_zrx<"z", 0, 1>;
-defm VRINTR : vrint_inst_zrx<"r", 0, 0>;
-defm VRINTX : vrint_inst_zrx<"x", 1, 0>;
+defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>;
+defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>;
+defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
-multiclass vrint_inst_anpm<string opc, bits<2> rm> {
+multiclass vrint_inst_anpm<string opc, bits<2> rm,
+ SDPatternOperator node = null_frag> {
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
- []>, Requires<[HasFPARMv8]> {
+ [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
+ Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
+ Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{17-16} = rm;
}
}
@@ -736,10 +754,10 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm> {
Requires<[HasFPARMv8,HasDPVFP]>;
}
-defm VRINTA : vrint_inst_anpm<"a", 0b00>;
+defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>;
defm VRINTN : vrint_inst_anpm<"n", 0b01>;
-defm VRINTP : vrint_inst_anpm<"p", 0b10>;
-defm VRINTM : vrint_inst_anpm<"m", 0b11>;
+defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
+defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
@@ -830,6 +848,11 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+
+ // This instruction is equivalent to
+ // $Rt = EXTRACT_SUBREG $Dm, ssub_0
+ // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1
+ let isExtractSubreg = 1;
}
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
@@ -878,6 +901,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
+
+ // This instruction is equivalent to
+ // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1
+ let isRegSequence = 1;
}
let neverHasSideEffects = 1 in
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
deleted file mode 100644
index 6d1114d..0000000
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the ARM target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARMJITInfo.h"
-#include "ARMConstantPoolValue.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMRelocations.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
-}
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-// Get the ASMPREFIX for the current host. This is often '_'.
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__
-#endif
-#define GETASMPREFIX2(X) #X
-#define GETASMPREFIX(X) GETASMPREFIX2(X)
-#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because the prolog/epilog inserted by GCC won't work for us. (We need
-// to preserve more context and manipulate the stack directly). Instead,
-// write our own wrapper, which does things our way, so we have complete
-// control over register saving and restoring.
-extern "C" {
-#if defined(__arm__)
- void ARMCompilationCallback();
- asm(
- ".text\n"
- ".align 2\n"
- ".globl " ASMPREFIX "ARMCompilationCallback\n"
- ASMPREFIX "ARMCompilationCallback:\n"
- // Save caller saved registers since they may contain stuff
- // for the real target function right now. We have to act as if this
- // whole compilation callback doesn't exist as far as the caller is
- // concerned, so we can't just preserve the callee saved regs.
- "stmdb sp!, {r0, r1, r2, r3, lr}\n"
-#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
-#endif
- // The LR contains the address of the stub function on entry.
- // pass it as the argument to the C part of the callback
- "mov r0, lr\n"
- "sub sp, sp, #4\n"
- // Call the C portion of the callback
- "bl " ASMPREFIX "ARMCompilationCallbackC\n"
- "add sp, sp, #4\n"
- // Restoring the LR to the return address of the function that invoked
- // the stub and de-allocating the stack space for it requires us to
- // swap the two saved LR values on the stack, as they're backwards
- // for what we need since the pop instruction has a pre-determined
- // order for the registers.
- // +--------+
- // 0 | LR | Original return address
- // +--------+
- // 1 | LR | Stub address (start of stub)
- // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
- // 6-20 | D0..D7 | Saved VFP registers
- // +--------+
- //
-#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- // Restore VFP caller-saved registers.
- "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
-#endif
- //
- // We need to exchange the values in slots 0 and 1 so we can
- // return to the address in slot 1 with the address in slot 0
- // restored to the LR.
- "ldr r0, [sp,#20]\n"
- "ldr r1, [sp,#16]\n"
- "str r1, [sp,#20]\n"
- "str r0, [sp,#16]\n"
- // Return to the (newly modified) stub to invoke the real function.
- // The above twiddling of the saved return addresses allows us to
- // deallocate everything, including the LR the stub saved, with two
- // updating load instructions.
- "ldmia sp!, {r0, r1, r2, r3, lr}\n"
- "ldr pc, [sp], #4\n"
- );
-#else // Not an ARM host
- void ARMCompilationCallback() {
- llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
- }
-#endif
-}
-
-/// ARMCompilationCallbackC - This is the target-specific function invoked
-/// by the function stub when we did not know the real target of a call.
-/// This function must locate the start of the stub or call site and pass
-/// it into the JIT compiler function.
-extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
- // Get the address of the compiled code for this function.
- intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
-
- // Rewrite the call target... so that we don't end up here every time we
- // execute the call. We're replacing the first two instructions of the
- // stub with:
- // ldr pc, [pc,#-4]
- // <addr>
- if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
- *(intptr_t *)(StubAddr+4) = NewVal;
- if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
-}
-
-TargetJITInfo::LazyResolverFn
-ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
- JITCompilerFunction = F;
- return ARMCompilationCallback;
-}
-
-void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
- JITCodeEmitter &JCE) {
- uint8_t Buffer[4];
- uint8_t *Cur = Buffer;
- MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
- void *PtrAddr = JCE.allocIndirectGV(
- GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
- addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
- return PtrAddr;
-}
-
-TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
- // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
- // 4-byte address. See emitFunctionStub for details.
- StubLayout Result = {16, 4};
- return Result;
-}
-
-void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
- void *Addr;
- // If this is just a call to an external function, emit a branch instead of a
- // call. The code is the same except for one bit of the last instruction.
- if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
- // Branch to the corresponding function addr.
- if (IsPIC) {
- // The stub is 16-byte size and 4-aligned.
- intptr_t LazyPtr = getIndirectSymAddr(Fn);
- if (!LazyPtr) {
- // In PIC mode, the function stub is loading a lazy-ptr.
- LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE);
- DEBUG(if (F)
- errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
- << "] for GV '" << F->getName() << "'\n";
- else
- errs() << "JIT: Stub emitted at [" << LazyPtr
- << "] for external function at '" << Fn << "'\n");
- }
- JCE.emitAlignment(4);
- Addr = (void*)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
- JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
- JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
- JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
- sys::Memory::InvalidateInstructionCache(Addr, 16);
- if (!sys::Memory::setRangeExecutable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
- } else {
- // The stub is 8-byte size and 4-aligned.
- JCE.emitAlignment(4);
- Addr = (void*)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable(Addr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
- JCE.emitWordLE((intptr_t)Fn); // addr of function
- sys::Memory::InvalidateInstructionCache(Addr, 8);
- if (!sys::Memory::setRangeExecutable(Addr, 8)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
- }
- } else {
- // The compilation callback will overwrite the first two words of this
- // stub with indirect branch instructions targeting the compiled code.
- // This stub sets the return address to restart the stub, so that
- // the new branch will be invoked when we come back.
- //
- // Branch and link to the compilation callback.
- // The stub is 16-byte size and 4-byte aligned.
- JCE.emitAlignment(4);
- Addr = (void*)JCE.getCurrentPCValue();
- if (!sys::Memory::setRangeWritable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub writable");
- }
- // Save LR so the callback can determine which stub called it.
- // The compilation callback is responsible for popping this prior
- // to returning.
- JCE.emitWordLE(0xe92d4000); // push {lr}
- // Set the return address to go back to the start of this stub.
- JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
- // Invoke the compilation callback.
- JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
- // The address of the compilation callback.
- JCE.emitWordLE((intptr_t)ARMCompilationCallback);
- sys::Memory::InvalidateInstructionCache(Addr, 16);
- if (!sys::Memory::setRangeExecutable(Addr, 16)) {
- llvm_unreachable("ERROR: Unable to mark stub executable");
- }
- }
-
- return Addr;
-}
-
-intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
- ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
- switch (RT) {
- default:
- return (intptr_t)(MR->getResultPointer());
- case ARM::reloc_arm_pic_jt:
- // Destination address - jump table base.
- return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
- case ARM::reloc_arm_jt_base:
- // Jump table base address.
- return getJumpTableBaseAddr(MR->getJumpTableIndex());
- case ARM::reloc_arm_cp_entry:
- case ARM::reloc_arm_vfp_cp_entry:
- // Constant pool entry address.
- return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
- case ARM::reloc_arm_machine_cp_entry: {
- ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
- assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
- "Can't handle this machine constant pool entry yet!");
- intptr_t Addr = (intptr_t)(MR->getResultPointer());
- Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
- return Addr;
- }
- }
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = resolveRelocDestAddr(MR);
- switch ((ARM::RelocationType)MR->getRelocationType()) {
- case ARM::reloc_arm_cp_entry:
- case ARM::reloc_arm_vfp_cp_entry:
- case ARM::reloc_arm_relative: {
- // It is necessary to calculate the correct PC relative value. We
- // subtract the base addr from the target addr to form a byte offset.
- ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
- // If the result is positive, set bit U(23) to 1.
- if (ResultPtr >= 0)
- *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
- else {
- // Otherwise, obtain the absolute value and set bit U(23) to 0.
- *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
- ResultPtr = - ResultPtr;
- }
- // Set the immed value calculated.
- // VFP immediate offset is multiplied by 4.
- if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
- ResultPtr = ResultPtr >> 2;
- *((intptr_t*)RelocPos) |= ResultPtr;
- // Set register Rn to PC (which is register 15 on all architectures).
- // FIXME: This avoids the need for register info in the JIT class.
- *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
- break;
- }
- case ARM::reloc_arm_pic_jt:
- case ARM::reloc_arm_machine_cp_entry:
- case ARM::reloc_arm_absolute: {
- // These addresses have already been resolved.
- *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
- break;
- }
- case ARM::reloc_arm_branch: {
- // It is necessary to calculate the correct value of signed_immed_24
- // field. We subtract the base addr from the target addr to form a
- // byte offset, which must be inside the range -33554432 and +33554428.
- // Then, we set the signed_immed_24 field of the instruction to bits
- // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
- ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
- ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
- assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
- *((intptr_t*)RelocPos) |= ResultPtr;
- break;
- }
- case ARM::reloc_arm_jt_base: {
- // JT base - (instruction addr + 8)
- ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
- *((intptr_t*)RelocPos) |= ResultPtr;
- break;
- }
- case ARM::reloc_arm_movw: {
- ResultPtr = ResultPtr & 0xFFFF;
- *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
- *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
- break;
- }
- case ARM::reloc_arm_movt: {
- ResultPtr = (ResultPtr >> 16) & 0xFFFF;
- *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
- *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
- break;
- }
- }
- }
-}
-
-void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
- JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
- IsPIC = isPIC;
-}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
deleted file mode 100644
index 27e2a20..0000000
--- a/lib/Target/ARM/ARMJITInfo.h
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the ARMJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMJITINFO_H
-#define ARMJITINFO_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
- class ARMTargetMachine;
-
- class ARMJITInfo : public TargetJITInfo {
- // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
- // CONSTPOOL_ENTRY addresses.
- SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
-
- // JumpTableId2AddrMap - A map from inline jumptable ids to the
- // corresponding inline jump table bases.
- SmallVector<intptr_t, 16> JumpTableId2AddrMap;
-
- // PCLabelMap - A map from PC labels to addresses.
- DenseMap<unsigned, intptr_t> PCLabelMap;
-
- // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
- // addresses to their indirect symbol addresses.
- DenseMap<void*, intptr_t> Sym2IndirectSymMap;
-
- // IsPIC - True if the relocation model is PIC. This is used to determine
- // how to codegen function stubs.
- bool IsPIC;
-
- public:
- explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
- /// to emit an indirect symbol which contains the address of the specified
- /// ptr.
- void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
- JITCodeEmitter &JCE) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on ARM.
- StubLayout getStubLayout() override;
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) override;
-
- /// hasCustomConstantPool - Allows a target to specify that constant
- /// pool address resolution is handled by the target.
- bool hasCustomConstantPool() const override { return true; }
-
- /// hasCustomJumpTables - Allows a target to specify that jumptables
- /// are emitted by the target.
- bool hasCustomJumpTables() const override { return true; }
-
- /// allocateSeparateGVMemory - If true, globals should be placed in
- /// separately allocated heap memory rather than in the same
- /// code memory allocated by JITCodeEmitter.
- bool allocateSeparateGVMemory() const override {
-#ifdef __APPLE__
- return true;
-#else
- return false;
-#endif
- }
-
- /// Initialize - Initialize internal stage for the function being JITted.
- /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
- /// jump table ids to jump table bases map; remember if codegen relocation
- /// model is PIC.
- void Initialize(const MachineFunction &MF, bool isPIC);
-
- /// getConstantPoolEntryAddr - The ARM target puts all constant
- /// pool entries into constant islands. This returns the address of the
- /// constant pool entry of the specified index.
- intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
- assert(CPI < ConstPoolId2AddrMap.size());
- return ConstPoolId2AddrMap[CPI];
- }
-
- /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
- /// where its associated value is stored. When relocations are processed,
- /// this value will be used to resolve references to the constant.
- void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
- assert(CPI < ConstPoolId2AddrMap.size());
- ConstPoolId2AddrMap[CPI] = Addr;
- }
-
- /// getJumpTableBaseAddr - The ARM target inline all jump tables within
- /// text section of the function. This returns the address of the base of
- /// the jump table of the specified index.
- intptr_t getJumpTableBaseAddr(unsigned JTI) const {
- assert(JTI < JumpTableId2AddrMap.size());
- return JumpTableId2AddrMap[JTI];
- }
-
- /// addJumpTableBaseAddr - Map a jump table index to the address where
- /// the corresponding inline jump table is emitted. When relocations are
- /// processed, this value will be used to resolve references to the
- /// jump table.
- void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
- assert(JTI < JumpTableId2AddrMap.size());
- JumpTableId2AddrMap[JTI] = Addr;
- }
-
- /// getPCLabelAddr - Retrieve the address of the PC label of the
- /// specified id.
- intptr_t getPCLabelAddr(unsigned Id) const {
- DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
- assert(I != PCLabelMap.end());
- return I->second;
- }
-
- /// addPCLabelAddr - Remember the address of the specified PC label.
- void addPCLabelAddr(unsigned Id, intptr_t Addr) {
- PCLabelMap.insert(std::make_pair(Id, Addr));
- }
-
- /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
- /// specified symbol located at address. Returns 0 if the indirect symbol
- /// has not been emitted.
- intptr_t getIndirectSymAddr(void *Addr) const {
- DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
- if (I != Sym2IndirectSymMap.end())
- return I->second;
- return 0;
- }
-
- /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
- /// its indirect symbol address.
- void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
- Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
- }
-
- private:
- /// resolveRelocDestAddr - Resolve the resulting address of the relocation
- /// if it's not already solved. Constantpool entries must be resolved by
- /// ARM target.
- intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
- };
-}
-
-#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a03bcdb..c429ac1 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -144,6 +144,46 @@ namespace {
char ARMLoadStoreOpt::ID = 0;
}
+static bool definesCPSR(const MachineInstr *MI) {
+ for (const auto &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+ // If the instruction has live CPSR def, then it's not safe to fold it
+ // into load / store.
+ return true;
+ }
+
+ return false;
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+ if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+ Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
+ return OffField;
+
+ // Thumb1 immediate offsets are scaled by 4
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+ return OffField * 4;
+
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
+ : ARM_AM::getAM5Op(OffField);
+
+ if (Op == ARM_AM::sub)
+ return -Offset;
+
+ return Offset;
+}
+
static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
@@ -335,40 +375,50 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
-
- // Start updating any instructions with immediate offsets. Insert a sub before
+ // Start updating any instructions with immediate offsets. Insert a SUB before
// the first non-updateable instruction (if any).
for (; MBBI != MBB.end(); ++MBBI) {
+ bool InsertSub = false;
+ unsigned Opc = MBBI->getOpcode();
+
if (MBBI->readsRegister(Base)) {
- unsigned Opc = MBBI->getOpcode();
int Offset;
- bool InsertSub = false;
+ bool IsLoad =
+ Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
+ bool IsStore =
+ Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
- if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
- Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
- Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
+ if (IsLoad || IsStore) {
// Loads and stores with immediate offsets can be updated, but only if
// the new offset isn't negative.
// The MachineOperand containing the offset immediate is the last one
// before predicates.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
- // The offsets are scaled by 1, 2 or 4 depending on the Opcode
+ // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
- if (Offset >= 0)
+
+ // If storing the base register, it needs to be reset first.
+ unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+
+ if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
MO.setImm(Offset);
else
InsertSub = true;
- } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
- // SUB/ADD using this register. Merge it with the update.
- // If the merged offset is too large, insert a new sub instead.
+ } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
+ !definesCPSR(MBBI)) {
+ // SUBS/ADDS using this register, with a dead def of the CPSR.
+ // Merge it with the update; if the merged offset is too large,
+ // insert a new sub instead.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
Offset = (Opc == ARM::tSUBi8) ?
MO.getImm() + WordOffset * 4 :
MO.getImm() - WordOffset * 4 ;
- if (TL->isLegalAddImmediate(Offset)) {
+ if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
+ // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
+ // Offset == 0.
MO.setImm(Offset);
// The base register has now been reset, so exit early.
return;
@@ -381,13 +431,19 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
InsertSub = true;
}
- if (InsertSub) {
- // An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
- .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
- .addImm(Pred).addReg(PredReg);
- return;
- }
+ } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
+ // Since SUBS sets the condition flags, we can't place the base reset
+ // after an instruction that has a live CPSR def.
+ // The base register might also contain an argument for a function call.
+ InsertSub = true;
+ }
+
+ if (InsertSub) {
+ // An instruction above couldn't be updated, so insert a sub.
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
+ .addImm(Pred).addReg(PredReg);
+ return;
}
if (MBBI->killsRegister(Base))
@@ -395,15 +451,18 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
return;
}
- // The end of the block was reached. This means register liveness escapes the
- // block, and it's necessary to insert a sub before the last instruction.
- if (MBB.succ_size() > 0)
- // But only insert the SUB if there is actually a successor block.
- // FIXME: Check more carefully if register is live at this point, e.g. by
- // also examining the successor block's register liveness information.
- AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
- .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
+ // End of block was reached.
+ if (MBB.succ_size() > 0) {
+ // FIXME: Because of a bug, live registers are sometimes missing from
+ // the successor blocks' live-in sets. This means we can't trust that
+ // information and *always* have to reset at the end of a block.
+ // See PR21029.
+ if (MBBI != MBB.end()) --MBBI;
+ AddDefaultT1CC(
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
.addImm(Pred).addReg(PredReg);
+ }
}
/// MergeOps - Create and insert a LDM or STM with Base as base register and
@@ -422,6 +481,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NumRegs <= 1)
return false;
+ // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
+ // Compute liveness information for that register to make the decision.
+ bool SafeToClobberCPSR = !isThumb1 ||
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+ MachineBasicBlock::LQR_Dead);
+
+ bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+ // Exception: If the base register is in the input reglist, Thumb1 LDM is
+ // non-writeback.
+ // It's also not possible to merge an STR of the base register in Thumb1.
+ if (isThumb1)
+ for (unsigned I = 0; I < NumRegs; ++I)
+ if (Base == Regs[I].first) {
+ if (Opcode == ARM::tLDRi) {
+ Writeback = false;
+ break;
+ } else if (Opcode == ARM::tSTRi) {
+ return false;
+ }
+ }
+
ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
@@ -445,6 +526,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NumRegs <= 2)
return false;
+ // On Thumb1, it's not worth materializing a new base register without
+ // clobbering the CPSR (i.e. not using ADDS/SUBS).
+ if (!SafeToClobberCPSR)
+ return false;
+
unsigned NewBase;
if (isi32Load(Opcode)) {
// If it is a load, then just use one of the destination register to
@@ -459,13 +545,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
int BaseOpc =
isThumb2 ? ARM::t2ADDri :
+ (isThumb1 && Offset < 8) ? ARM::tADDi3 :
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
if (Offset < 0) {
+ Offset = - Offset;
BaseOpc =
isThumb2 ? ARM::t2SUBri :
+ (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
- Offset = - Offset;
}
if (!TL->isLegalAddImmediate(Offset))
@@ -473,22 +561,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false; // Probably not worth it then.
if (isThumb1) {
- if (Base != NewBase) {
+ // Thumb1: depending on immediate size, use either
+ // ADDS NewBase, Base, #imm3
+ // or
+ // MOV NewBase, Base
+ // ADDS NewBase, #imm8.
+ if (Base != NewBase && Offset >= 8) {
// Need to insert a MOV to the new base first.
- // FIXME: If the immediate fits in 3 bits, use ADD instead.
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
+ // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+ Base = NewBase;
+ BaseKill = false;
}
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
- .addReg(NewBase, getKillRegState(true)).addImm(Offset)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
}
-
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
}
@@ -501,16 +595,16 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false;
- bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
-
- // Exception: If the base register is in the input reglist, Thumb1 LDM is
- // non-writeback. Check for this.
- if (Opcode == ARM::tLDMIA && isThumb1)
- for (unsigned I = 0; I < NumRegs; ++I)
- if (Base == Regs[I].first) {
- Writeback = false;
- break;
- }
+ // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
+ // - There is no writeback (LDM of base register),
+ // - the base register is killed by the merged instruction,
+ // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
+ // to reset the base register.
+ // Otherwise, don't merge.
+ // It's safe to return here since the code to materialize a new base register
+ // above is also conditional on SafeToClobberCPSR.
+ if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
+ return false;
MachineInstrBuilder MIB;
@@ -525,11 +619,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
- // The base isn't dead after a merged instruction with writeback. Update
- // future uses of the base with the added offset (if possible), or reset
- // the base register as necessary.
+ // The base isn't dead after a merged instruction with writeback.
+ // Insert a sub instruction after the newly formed instruction to reset.
if (!BaseKill)
UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
@@ -700,6 +794,11 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
memOps[i].MBBI = Merges.back();
memOps[i].Position = insertPos;
}
+
+ // Update memOps offsets, since they may have been modified by MergeOps.
+ for (auto &MemOp : memOps) {
+ MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
+ }
}
/// MergeLDR_STR - Merge a number of load / store instructions into one or more
@@ -721,7 +820,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Count = 1;
unsigned Limit = ~0U;
-
+ bool BaseKill = false;
// vldm / vstm limit are 32 for S variants, 16 for D variants.
switch (Opcode) {
@@ -760,36 +859,28 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
++Count;
} else {
// Can't merge this in. Try merge the earlier ones first.
- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
- Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ // We need to compute BaseKill here because the MemOps may have been
+ // reordered.
+ BaseKill = Loc->killsRegister(Base);
+
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
+ BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
MemOps, Merges);
return;
}
- if (MemOps[i].Position > MemOps[insertAfter].Position)
+ if (MemOps[i].Position > MemOps[insertAfter].Position) {
insertAfter = i;
+ Loc = MemOps[i].MBBI;
+ }
}
- bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ BaseKill = Loc->killsRegister(Base);
MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
}
-static bool definesCPSR(MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
- // If the instruction has live CPSR def, then it's not safe to fold it
- // into load / store.
- return true;
- }
-
- return false;
-}
-
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
unsigned Bytes, unsigned Limit,
ARMCC::CondCodes Pred, unsigned PredReg) {
@@ -1327,34 +1418,6 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
RS->forward(std::prev(Loc));
}
-static int getMemoryOpOffset(const MachineInstr *MI) {
- int Opcode = MI->getOpcode();
- bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
- unsigned NumOperands = MI->getDesc().getNumOperands();
- unsigned OffField = MI->getOperand(NumOperands-3).getImm();
-
- if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
- Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
- Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
- Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
- return OffField;
-
- // Thumb1 immediate offsets are scaled by 4
- if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
- return OffField * 4;
-
- int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
- : ARM_AM::getAM5Offset(OffField) * 4;
- if (isAM3) {
- if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- } else {
- if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- }
- return Offset;
-}
-
static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
int Offset, bool isDef,
@@ -1725,21 +1788,15 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
- TL = TM.getTargetLowering();
+ TL = TM.getSubtargetImpl()->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = TM.getInstrInfo();
- TRI = TM.getRegisterInfo();
+ TII = TM.getSubtargetImpl()->getInstrInfo();
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
- // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
- if (isThumb1) {
- delete RS;
- return false;
- }
-
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
@@ -1793,10 +1850,10 @@ namespace {
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getDataLayout();
- TII = Fn.getTarget().getInstrInfo();
- TRI = Fn.getTarget().getRegisterInfo();
- STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ TD = Fn.getSubtarget().getDataLayout();
+ TII = Fn.getSubtarget().getInstrInfo();
+ TRI = Fn.getSubtarget().getRegisterInfo();
+ STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
MRI = &Fn.getRegInfo();
MF = &Fn;
@@ -1811,7 +1868,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E,
- SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallPtrSetImpl<MachineInstr*> &MemOps,
SmallSet<unsigned, 4> &MemRegs,
const TargetRegisterInfo *TRI) {
// Are there stores / loads / calls between them?
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 44a9e34..4e67fa1 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMMACHINEFUNCTIONINFO_H
-#define ARMMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
#include "ARMSubtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
namespace llvm {
@@ -47,6 +48,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
///
unsigned ArgRegsSaveSize;
+ /// ReturnRegsCount - Number of registers used up in the return.
+ unsigned ReturnRegsCount;
+
/// HasStackFrame - True if this function has a stack frame. Set by
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
@@ -82,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// areas.
unsigned GPRCS1Size;
unsigned GPRCS2Size;
+ unsigned DPRCSAlignGapSize;
unsigned DPRCSSize;
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
@@ -118,14 +123,19 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// being passed on the stack
unsigned ArgumentStackSize;
+ /// CoalescedWeights - mapping of basic blocks to the rolling counter of
+ /// coalesced weights.
+ DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
+
public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false),
+ RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -146,6 +156,9 @@ public:
}
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
+ unsigned getReturnRegsCount() const { return ReturnRegsCount; }
+ void setReturnRegsCount(unsigned s) { ReturnRegsCount = s; }
+
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
@@ -171,10 +184,12 @@ public:
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
@@ -221,7 +236,16 @@ public:
else
return -1U;
}
+
+ DenseMap<const MachineBasicBlock*, unsigned>::iterator getCoalescedWeight(
+ MachineBasicBlock* MBB) {
+ auto It = CoalescedWeights.find(MBB);
+ if (It == CoalescedWeights.end()) {
+ It = CoalescedWeights.insert(std::make_pair(MBB, 0)).first;
+ }
+ return It;
+ }
};
} // End llvm namespace
-#endif // ARMMACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index efa22fb..3ff0bee 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H
+
// 31 entries have cost 0
// 242 entries have cost 1
// 1447 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
+
+#endif
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 3e6af3f..b623173 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMREGISTERINFO_H
-#define ARMREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H
#include "ARMBaseRegisterInfo.h"
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
deleted file mode 100644
index 21877fd..0000000
--- a/lib/Target/ARM/ARMRelocations.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ARM target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMRELOCATIONS_H
-#define ARMRELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace ARM {
- enum RelocationType {
- // reloc_arm_absolute - Absolute relocation, just add the relocated value
- // to the value already in memory.
- reloc_arm_absolute,
-
- // reloc_arm_relative - PC relative relocation, add the relocated value to
- // the value already in memory, after we adjust it for where the PC is.
- reloc_arm_relative,
-
- // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
- // addresses are kept locally in a map.
- reloc_arm_cp_entry,
-
- // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
- // should be divided by 4.
- reloc_arm_vfp_cp_entry,
-
- // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
- // entry.
- reloc_arm_machine_cp_entry,
-
- // reloc_arm_jt_base - PC relative relocation for jump tables whose
- // addresses are kept locally in a map.
- reloc_arm_jt_base,
-
- // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
- reloc_arm_pic_jt,
-
- // reloc_arm_branch - Branch address relocation.
- reloc_arm_branch,
-
- // reloc_arm_movt - MOVT immediate relocation.
- reloc_arm_movt,
-
- // reloc_arm_movw - MOVW immediate relocation.
- reloc_arm_movw
- };
- }
-}
-
-#endif
-
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 3dcc0df..fa30ac3 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -157,7 +157,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
return SDValue();
const ARMTargetLowering &TLI =
- *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering());
+ *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 13769dc..94b98e6 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMSELECTIONDAGINFO_H
-#define ARMSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 0eb24ef..600f39d 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -15,9 +15,9 @@
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
+#include "ARMMachineFunctionInfo.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
@@ -27,6 +27,8 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
@@ -47,11 +49,13 @@ static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
cl::init(true), cl::Hidden);
+namespace {
enum AlignMode {
DefaultAlign,
StrictAlign,
NoStrictAlign
};
+}
static cl::opt<AlignMode>
Align(cl::desc("Load/store alignment support"),
@@ -98,11 +102,6 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
// Pointers are 32 bits and aligned to 32 bits.
Ret += "-p:32:32";
- // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
- // align to 32.
- if (ST.isThumb())
- Ret += "-i1:8:32-i8:8:32-i16:16:32";
-
// ABIs other than APCS have 64 bit integers with natural alignment.
if (!ST.isAPCS_ABI())
Ret += "-i64:64";
@@ -119,10 +118,9 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
else
Ret += "-v128:64:128";
- // On thumb and APCS, only try to align aggregates to 32 bits (the default is
- // 64 bits).
- if (ST.isThumb() || ST.isAPCS_ABI())
- Ret += "-a:0:32";
+ // Try to align aggregates to 32 bits (the default is 64 bits, which has no
+ // particular hardware support on 32-bit ARM).
+ Ret += "-a:0:32";
// Integer registers are 32 bits.
Ret += "-n32";
@@ -144,18 +142,18 @@ static std::string computeDataLayout(ARMSubtarget &ST) {
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, FS);
return *this;
}
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, TargetMachine &TM,
- bool IsLittle, const TargetOptions &Options)
+ const std::string &FS, const TargetMachine &TM,
+ bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
+ TargetTriple(TT), Options(TM.Options), TargetABI(ARM_ABI_UNKNOWN),
DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
- TSInfo(DL), JITInfo(),
+ TSInfo(DL),
InstrInfo(isThumb1Only()
? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
: !isThumb()
@@ -188,7 +186,6 @@ void ARMSubtarget::initializeEnvironment() {
InThumbMode = false;
HasThumb2 = false;
NoARM = false;
- PostRAScheduler = false;
IsR9Reserved = ReserveR9;
UseMovt = false;
SupportsTailCall = false;
@@ -217,23 +214,7 @@ void ARMSubtarget::initializeEnvironment() {
UseLong64 = false;
}
-void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-cpu");
- Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-features");
- std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
- std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty()) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
- }
-}
-
-void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty()) {
if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
// Default to the Swift CPU when targeting armv7s/thumbv7s.
@@ -275,9 +256,8 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
TargetABI = ARM_ABI_AAPCS;
break;
default:
- if ((isTargetIOS() && isMClass()) ||
- (TargetTriple.isOSBinFormatMachO() &&
- TargetTriple.getOS() == Triple::UnknownOS))
+ if (TargetTriple.isOSBinFormatMachO() &&
+ TargetTriple.getOS() == Triple::UnknownOS)
TargetABI = ARM_ABI_AAPCS;
else
TargetABI = ARM_ABI_APCS;
@@ -299,49 +279,39 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
UseMovt = hasV6T2Ops() && ArmUseMOVT;
if (isTargetMachO()) {
- IsR9Reserved = ReserveR9 | !HasV6Ops;
+ IsR9Reserved = ReserveR9 || !HasV6Ops;
SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
} else {
IsR9Reserved = ReserveR9;
SupportsTailCall = !isThumb1Only();
}
- if (!isThumb() || hasThumb2())
- PostRAScheduler = true;
-
- switch (Align) {
- case DefaultAlign:
- // Assume pre-ARMv6 doesn't support unaligned accesses.
- //
- // ARMv6 may or may not support unaligned accesses depending on the
- // SCTLR.U bit, which is architecture-specific. We assume ARMv6
- // Darwin and NetBSD targets support unaligned accesses, and others don't.
- //
- // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
- // which raises an alignment fault on unaligned accesses. Linux
- // defaults this bit to 0 and handles it as a system-wide (not
- // per-process) setting. It is therefore safe to assume that ARMv7+
- // Linux targets support unaligned accesses. The same goes for NaCl.
- //
- // The above behavior is consistent with GCC.
- AllowsUnalignedMem =
- (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
- isTargetNetBSD())) ||
- (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
- // The one exception is cortex-m0, which despite being v6, does not
- // support unaligned accesses. Rather than make the above boolean
- // expression even more obtuse, just override the value here.
- if (isThumb1Only() && isMClass())
- AllowsUnalignedMem = false;
- break;
- case StrictAlign:
- AllowsUnalignedMem = false;
- break;
- case NoStrictAlign:
- AllowsUnalignedMem = true;
- break;
+ if (Align == DefaultAlign) {
+ // Assume pre-ARMv6 doesn't support unaligned accesses.
+ //
+ // ARMv6 may or may not support unaligned accesses depending on the
+ // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+ // Darwin and NetBSD targets support unaligned accesses, and others don't.
+ //
+ // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+ // which raises an alignment fault on unaligned accesses. Linux
+ // defaults this bit to 0 and handles it as a system-wide (not
+ // per-process) setting. It is therefore safe to assume that ARMv7+
+ // Linux targets support unaligned accesses. The same goes for NaCl.
+ //
+ // The above behavior is consistent with GCC.
+ AllowsUnalignedMem =
+ (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
+ isTargetNetBSD())) ||
+ (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
+ } else {
+ AllowsUnalignedMem = !(Align == StrictAlign);
}
+ // No v6M core supports unaligned memory access (v6M ARM ARM A3.2)
+ if (isV6M())
+ AllowsUnalignedMem = false;
+
switch (IT) {
case DefaultIT:
RestrictIT = hasV8Ops() ? true : false;
@@ -368,11 +338,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
if (RelocM == Reloc::Static)
return false;
- // Materializable GVs (in JIT lazy compilation mode) do not require an extra
- // load from stub.
- bool isDecl = GV->hasAvailableExternallyLinkage();
- if (GV->isDeclaration() && !GV->isMaterializable())
- isDecl = true;
+ bool isDecl = GV->isDeclarationForLinker();
if (!isTargetMachO()) {
// Extra load is needed for all externally visible.
@@ -415,33 +381,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
}
unsigned ARMSubtarget::getMispredictionPenalty() const {
- return SchedModel->MispredictPenalty;
+ return SchedModel.MispredictPenalty;
}
bool ARMSubtarget::hasSinCos() const {
- return getTargetTriple().getOS() == Triple::IOS &&
- !getTargetTriple().isOSVersionLT(7, 0);
+ return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
}
-// Enable the PostMachineScheduler if the target selects it instead of
-// PostRAScheduler. Currently only available on the command line via
-// -misched-postra.
+// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostMachineScheduler() const {
- return PostRAScheduler;
+ return (!isThumb() || hasThumb2());
}
-bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+bool ARMSubtarget::enableAtomicExpand() const {
return hasAnyDataBarrier() && !isThumb1Only();
}
-bool ARMSubtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_NONE;
- return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
-}
-
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8f6c165..d5ee009 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -11,20 +11,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMSUBTARGET_H
-#define ARMSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
+#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "ARMJITInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
@@ -44,7 +42,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexR5, Swift, CortexA53, CortexA57, Krait
+ CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait,
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
@@ -105,9 +103,6 @@ protected:
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
- /// PostRAScheduler - True if using post-register-allocation scheduler.
- bool PostRAScheduler;
-
/// IsR9Reserved - True if R9 is a not available as general purpose register.
bool IsR9Reserved;
@@ -191,7 +186,7 @@ protected:
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
- /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+ /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
bool AllowsUnalignedMem;
/// RestrictIT - If true, the subtarget disallows generation of deprecated IT
@@ -225,7 +220,7 @@ protected:
Triple TargetTriple;
/// SchedModel - Processor specific instruction costs.
- const MCSchedModel *SchedModel;
+ MCSchedModel SchedModel;
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
@@ -244,8 +239,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, TargetMachine &TM, bool IsLittle,
- const TargetOptions &Options);
+ const std::string &FS, const TargetMachine &TM, bool IsLittle);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -256,27 +250,30 @@ protected:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// \brief Reset the features for the ARM target.
- void resetSubtargetFeatures(const MachineFunction *MF) override;
-
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- const DataLayout *getDataLayout() const { return &DL; }
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- ARMJITInfo *getJITInfo() { return &JITInfo; }
- const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
- const ARMTargetLowering *getTargetLowering() const { return &TLInfo; }
- const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); }
- const ARMBaseRegisterInfo *getRegisterInfo() const {
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const ARMBaseInstrInfo *getInstrInfo() const override {
+ return InstrInfo.get();
+ }
+ const ARMTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const ARMFrameLowering *getFrameLowering() const override {
+ return FrameLowering.get();
+ }
+ const ARMBaseRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
private:
const DataLayout DL;
ARMSelectionDAGInfo TSInfo;
- ARMJITInfo JITInfo;
// Either Thumb1InstrInfo or Thumb2InstrInfo.
std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
ARMTargetLowering TLInfo;
@@ -284,7 +281,7 @@ private:
std::unique_ptr<ARMFrameLowering> FrameLowering;
void initializeEnvironment();
- void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
void computeIssueWidth();
@@ -411,6 +408,10 @@ public:
bool isRClass() const { return ARMProcClass == RClass; }
bool isAClass() const { return ARMProcClass == AClass; }
+ bool isV6M() const {
+ return isThumb1Only() && isMClass();
+ }
+
bool isR9Reserved() const { return IsR9Reserved; }
bool useMovt(const MachineFunction &MF) const;
@@ -432,19 +433,16 @@ public:
bool hasSinCos() const;
/// True for some subtargets at > -O0.
- bool enablePostMachineScheduler() const;
-
- /// enablePostRAScheduler - True at 'More' optimization.
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const override;
+ bool enablePostMachineScheduler() const override;
- // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
- bool enableAtomicExpandLoadLinked() const override;
+ // enableAtomicExpand- True if we need to expand our atomics.
+ bool enableAtomicExpand() const override;
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
@@ -454,6 +452,7 @@ public:
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
/// symbol.
bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index d85194b..88d6c5e 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -13,7 +13,9 @@
#include "ARM.h"
#include "ARMTargetMachine.h"
#include "ARMFrameLowering.h"
+#include "ARMTargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
@@ -42,6 +44,13 @@ extern "C" void LLVMInitializeARMTarget() {
RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget);
}
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
+ return make_unique<TargetLoweringObjectFileMachO>();
+ if (TT.isOSWindows())
+ return make_unique<TargetLoweringObjectFileCOFF>();
+ return make_unique<ARMElfTargetObjectFile>();
+}
/// TargetMachine ctor - Create an ARM architecture model.
///
@@ -51,7 +60,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, isLittle, Options) {
+ TLOF(createTLOF(Triple(getTargetTriple()))),
+ Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
// Default to triple-appropriate float ABI
if (Options.FloatABIType == FloatABI::Default)
@@ -59,6 +69,46 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
}
+ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+
+const ARMSubtarget *
+ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ Attribute SFAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+ bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
+ ? SFAttr.getValueAsString() == "true"
+ : Options.UseSoftFloat;
+
+ auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true"
+ : "use-soft-float=false")];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+ }
+ return I.get();
+}
+
void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our ARM pass. This
// allows the ARM pass to delegate to the target independent layer when
@@ -158,7 +208,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void ARMPassConfig::addIRPasses() {
- addPass(createAtomicExpandLoadLinkedPass(TM));
+ if (TM->Options.ThreadModel == ThreadModel::Single)
+ addPass(createLowerAtomicPass());
+ else
+ addPass(createAtomicExpandPass(TM));
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
@@ -244,10 +297,3 @@ bool ARMPassConfig::addPreEmitPass() {
return true;
}
-
-bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Machine code emitter pass for ARM.
- PM.add(createARMJITCodeEmitterPass(*this, JCE));
- return false;
-}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index b72b1df..fba0ec2 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMTARGETMACHINE_H
-#define ARMTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
#include "ARMInstrInfo.h"
#include "ARMSubtarget.h"
@@ -23,7 +23,11 @@ namespace llvm {
class ARMBaseTargetMachine : public LLVMTargetMachine {
protected:
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
ARMSubtarget Subtarget;
+ bool isLittle;
+ mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
+
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -31,30 +35,10 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool isLittle);
+ ~ARMBaseTargetMachine() override;
const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const ARMBaseRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const ARMTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const ARMBaseInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const ARMFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+ const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
/// \brief Register ARM analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
@@ -62,7 +46,9 @@ public:
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// ARMTargetMachine - ARM target machine.
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c926421..98e8763 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
-#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index a2ace62..ec834e8 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -49,7 +49,7 @@ public:
ARMTTI(const ARMBaseTargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeARMTTIPass(*PassRegistry::getPassRegistry());
}
@@ -104,7 +104,7 @@ public:
return 32;
}
- unsigned getMaximumUnrollFactor() const override {
+ unsigned getMaxInterleaveFactor() const override {
// These are out of order CPUs:
if (ST->isCortexA15() || ST->isSwift())
return 2;
@@ -126,10 +126,11 @@ public:
unsigned getAddressComputationCost(Type *Val,
bool IsComplex) const override;
- unsigned
- getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info = OK_AnyValue,
- OperandValueKind Op2Info = OK_AnyValue) const override;
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue,
+ OperandValueKind Op2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const override;
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override;
@@ -389,6 +390,13 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->getScalarSizeInBits() <= 32)
return 3;
+ // Cross-class copies are expensive on many microarchitectures,
+ // so assume they are expensive by default.
+ if ((Opcode == Instruction::InsertElement ||
+ Opcode == Instruction::ExtractElement) &&
+ ValTy->getVectorElementType()->isIntegerTy())
+ return 3;
+
return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
}
@@ -497,9 +505,10 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned ARMTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -555,8 +564,8 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (Idx != -1)
return LT.first * CostTbl[Idx].Cost;
- unsigned Cost =
- TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+ unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
index 095955b..55a5775 100644
--- a/lib/Target/ARM/Android.mk
+++ b/lib/Target/ARM/Android.mk
@@ -19,7 +19,6 @@ arm_codegen_SRC_FILES := \
ARMAsmPrinter.cpp \
ARMBaseInstrInfo.cpp \
ARMBaseRegisterInfo.cpp \
- ARMCodeEmitter.cpp \
ARMConstantIslandPass.cpp \
ARMConstantPoolValue.cpp \
ARMExpandPseudoInsts.cpp \
@@ -29,7 +28,6 @@ arm_codegen_SRC_FILES := \
ARMISelDAGToDAG.cpp \
ARMISelLowering.cpp \
ARMInstrInfo.cpp \
- ARMJITInfo.cpp \
ARMLoadStoreOptimizer.cpp \
ARMMCInstLower.cpp \
ARMMachineFunctionInfo.cpp \
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index b62706c..9cc89bd 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -129,12 +129,13 @@ public:
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
UnwindContext UC;
ARMTargetStreamer &getTargetStreamer() {
+ assert(getParser().getStreamer().getTargetStreamer() &&
+ "do not have a target streamer");
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<ARMTargetStreamer &>(TS);
}
@@ -173,20 +174,16 @@ class ARMAsmParser : public MCTargetAsmParser {
ITState.CurPosition = ~0U; // Done with the IT block after this.
}
-
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) {
- return Parser.Note(L, Msg, Ranges);
+ return getParser().Note(L, Msg, Ranges);
}
bool Warning(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None) {
- return Parser.Warning(L, Msg, Ranges);
+ return getParser().Warning(L, Msg, Ranges);
}
bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None) {
- return Parser.Error(L, Msg, Ranges);
+ return getParser().Error(L, Msg, Ranges);
}
int tryParseRegister();
@@ -265,9 +262,15 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasARM() const {
return !(STI.getFeatureBits() & ARM::FeatureNoARM);
}
+ bool hasThumb2DSP() const {
+ return STI.getFeatureBits() & ARM::FeatureDSPThumb2;
+ }
+ bool hasD16() const {
+ return STI.getFeatureBits() & ARM::FeatureD16;
+ }
void SwitchMode() {
- unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+ uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
bool isMClass() const {
@@ -290,6 +293,7 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
+ OperandMatchResultTy parseBankedRegOperand(OperandVector &);
OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
int High);
OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
@@ -329,10 +333,9 @@ public:
};
- ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) {
+ ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
// Cache the MCRegisterInfo.
@@ -359,7 +362,7 @@ public:
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
void onLabelParsed(MCSymbol *Symbol) override;
};
@@ -383,6 +386,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_Memory,
k_PostIndexRegister,
k_MSRMask,
+ k_BankedReg,
k_ProcIFlags,
k_VectorIndex,
k_Register,
@@ -435,6 +439,10 @@ class ARMOperand : public MCParsedAsmOperand {
unsigned Val;
};
+ struct BankedRegOp {
+ unsigned Val;
+ };
+
struct TokOp {
const char *Data;
unsigned Length;
@@ -517,6 +525,7 @@ class ARMOperand : public MCParsedAsmOperand {
struct ITMaskOp ITMask;
struct IFlagsOp IFlags;
struct MMaskOp MMask;
+ struct BankedRegOp BankedReg;
struct TokOp Tok;
struct RegOp Reg;
struct VectorListOp VectorList;
@@ -585,6 +594,9 @@ public:
case k_MSRMask:
MMask = o.MMask;
break;
+ case k_BankedReg:
+ BankedReg = o.BankedReg;
+ break;
case k_ProcIFlags:
IFlags = o.IFlags;
break;
@@ -679,6 +691,11 @@ public:
return MMask.Val;
}
+ unsigned getBankedReg() const {
+ assert(Kind == k_BankedReg && "Invalid access!");
+ return BankedReg.Val;
+ }
+
bool isCoprocNum() const { return Kind == k_CoprocNum; }
bool isCoprocReg() const { return Kind == k_CoprocReg; }
bool isCoprocOption() const { return Kind == k_CoprocOption; }
@@ -1384,6 +1401,7 @@ public:
}
bool isMSRMask() const { return Kind == k_MSRMask; }
+ bool isBankedReg() const { return Kind == k_BankedReg; }
bool isProcIFlags() const { return Kind == k_ProcIFlags; }
// NEON operands.
@@ -1601,9 +1619,18 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
- int64_t Value = CE->getValue();
- // i16 value in the range [0,255] or [0x0100, 0xff00]
- return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00);
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi16splat(Value);
+ }
+
+ bool isNEONi16splatNot() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi16splat(~Value & 0xffff);
}
bool isNEONi32splat() const {
@@ -1614,12 +1641,18 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
- int64_t Value = CE->getValue();
- // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
- return (Value >= 0 && Value < 256) ||
- (Value >= 0x0100 && Value <= 0xff00) ||
- (Value >= 0x010000 && Value <= 0xff0000) ||
- (Value >= 0x01000000 && Value <= 0xff000000);
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi32splat(Value);
+ }
+
+ bool isNEONi32splatNot() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ unsigned Value = CE->getValue();
+ return ARM_AM::isNEONi32splat(~Value);
}
bool isNEONByteReplicate(unsigned NumBytes) const {
@@ -1655,6 +1688,7 @@ public:
int64_t Value = CE->getValue();
// i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
// for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ // FIXME: This is probably wrong and a copy and paste from previous example
return (Value >= 0 && Value < 256) ||
(Value >= 0x0100 && Value <= 0xff00) ||
(Value >= 0x010000 && Value <= 0xff0000) ||
@@ -1670,6 +1704,7 @@ public:
int64_t Value = ~CE->getValue();
// i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
// for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+ // FIXME: This is probably wrong and a copy and paste from previous example
return (Value >= 0 && Value < 256) ||
(Value >= 0x0100 && Value <= 0xff00) ||
(Value >= 0x010000 && Value <= 0xff0000) ||
@@ -2334,6 +2369,11 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
}
+ void addBankedRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getBankedReg())));
+ }
+
void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
@@ -2378,10 +2418,16 @@ public:
// The immediate encodes the type of constant as well as the value.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
- if (Value >= 256)
- Value = (Value >> 8) | 0xa00;
- else
- Value |= 0x800;
+ Value = ARM_AM::encodeNEONi16splat(Value);
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff);
Inst.addOperand(MCOperand::CreateImm(Value));
}
@@ -2390,12 +2436,16 @@ public:
// The immediate encodes the type of constant as well as the value.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
unsigned Value = CE->getValue();
- if (Value >= 256 && Value <= 0xff00)
- Value = (Value >> 8) | 0x200;
- else if (Value > 0xffff && Value <= 0xff0000)
- Value = (Value >> 16) | 0x400;
- else if (Value > 0xffffff)
- Value = (Value >> 24) | 0x600;
+ Value = ARM_AM::encodeNEONi32splat(Value);
+ Inst.addOperand(MCOperand::CreateImm(Value));
+ }
+
+ void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ Value = ARM_AM::encodeNEONi32splat(~Value);
Inst.addOperand(MCOperand::CreateImm(Value));
}
@@ -2736,6 +2786,14 @@ public:
Op->EndLoc = S;
return Op;
}
+
+ static std::unique_ptr<ARMOperand> CreateBankedReg(unsigned Reg, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_BankedReg);
+ Op->BankedReg.Val = Reg;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
};
} // end anonymous namespace.
@@ -2769,6 +2827,9 @@ void ARMOperand::print(raw_ostream &OS) const {
case k_MSRMask:
OS << "<mask: " << getMSRMask() << ">";
break;
+ case k_BankedReg:
+ OS << "<banked reg: " << getBankedReg() << ">";
+ break;
case k_Immediate:
getImm()->print(OS);
break;
@@ -2871,8 +2932,9 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
- StartLoc = Parser.getTok().getLoc();
- EndLoc = Parser.getTok().getEndLoc();
+ const AsmToken &Tok = getParser().getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
RegNo = tryParseRegister();
return (RegNo == (unsigned)-1);
@@ -2883,6 +2945,7 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo,
/// returned. Otherwise return -1.
///
int ARMAsmParser::tryParseRegister() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
@@ -2924,6 +2987,10 @@ int ARMAsmParser::tryParseRegister() {
return Entry->getValue();
}
+ // Some FPUs only have 16 D registers, so D16-D31 are invalid
+ if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+ return -1;
+
Parser.Lex(); // Eat identifier token.
return RegNum;
@@ -2935,6 +3002,7 @@ int ARMAsmParser::tryParseRegister() {
// consumed in the process of trying to parse the shifter (i.e., when it is
// indeed a shifter operand, but malformed).
int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3037,6 +3105,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
/// TODO this is likely to change to allow different register types and or to
/// parse for a specific register type.
bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &RegTok = Parser.getTok();
int RegNo = tryParseRegister();
if (RegNo == -1)
@@ -3118,9 +3187,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
return -1;
switch (Name[1]) {
default: return -1;
- // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
- case '0': return CoprocOp == 'p'? -1: 10;
- case '1': return CoprocOp == 'p'? -1: 11;
+ // CP10 and CP11 are VFP/NEON and so vector instructions should be used.
+ // However, old cores (v5/v6) did use them in that way.
+ case '0': return 10;
+ case '1': return 11;
case '2': return 12;
case '3': return 13;
case '4': return 14;
@@ -3132,6 +3202,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
/// parseITCondCode - Try to parse a condition code for an IT instruction.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseITCondCode(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3169,6 +3240,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) {
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3177,6 +3249,9 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
if (Num == -1)
return MatchOperand_NoMatch;
+ // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions
+ if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11))
+ return MatchOperand_NoMatch;
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
@@ -3188,6 +3263,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
/// number, the token is eaten and the operand is added to the operand list.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3206,6 +3282,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
/// coproc_option : '{' imm0_255 '}'
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
// If this isn't a '{', this isn't a coprocessor immediate operand.
@@ -3283,6 +3360,7 @@ static unsigned getDRegFromQReg(unsigned QReg) {
/// Parse a register list.
bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
assert(Parser.getTok().is(AsmToken::LCurly) &&
"Token is not a Left Curly Brace");
SMLoc S = Parser.getTok().getLoc();
@@ -3414,6 +3492,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
// Helper function to parse the lane index for vector lists.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
Index = 0; // Always return a defined index value.
if (Parser.getTok().is(AsmToken::LBrac)) {
Parser.Lex(); // Eat the '['.
@@ -3465,6 +3544,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
// parse a vector register list
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseVectorList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
VectorLaneTy LaneKind;
unsigned LaneIndex;
SMLoc S = Parser.getTok().getLoc();
@@ -3716,6 +3796,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3787,6 +3868,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3838,6 +3920,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3872,6 +3955,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3892,9 +3976,6 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
// should really only be allowed when writing a special register. Note
// they get dropped in the MRS instruction reading a special register as
// the SYSm field is only 8 bits.
- //
- // FIXME: the _g and _nzcvqg versions are only allowed if the processor
- // includes the DSP extension but that is not checked.
.Case("apsr", 0x800)
.Case("apsr_nzcvq", 0x800)
.Case("apsr_g", 0x400)
@@ -3926,6 +4007,11 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
+ if (!hasThumb2DSP() && (FlagsVal & 0x400))
+ // The _g and _nzcvqg versions are only valid if the DSP extension is
+ // available.
+ return MatchOperand_NoMatch;
+
if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
@@ -3998,9 +4084,67 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
return MatchOperand_Success;
}
+/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for
+/// use in the MRS/MSR instructions added to support virtualization.
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ StringRef RegName = Tok.getString();
+
+ // The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM
+ // and bit 5 is R.
+ unsigned Encoding = StringSwitch<unsigned>(RegName.lower())
+ .Case("r8_usr", 0x00)
+ .Case("r9_usr", 0x01)
+ .Case("r10_usr", 0x02)
+ .Case("r11_usr", 0x03)
+ .Case("r12_usr", 0x04)
+ .Case("sp_usr", 0x05)
+ .Case("lr_usr", 0x06)
+ .Case("r8_fiq", 0x08)
+ .Case("r9_fiq", 0x09)
+ .Case("r10_fiq", 0x0a)
+ .Case("r11_fiq", 0x0b)
+ .Case("r12_fiq", 0x0c)
+ .Case("sp_fiq", 0x0d)
+ .Case("lr_fiq", 0x0e)
+ .Case("lr_irq", 0x10)
+ .Case("sp_irq", 0x11)
+ .Case("lr_svc", 0x12)
+ .Case("sp_svc", 0x13)
+ .Case("lr_abt", 0x14)
+ .Case("sp_abt", 0x15)
+ .Case("lr_und", 0x16)
+ .Case("sp_und", 0x17)
+ .Case("lr_mon", 0x1c)
+ .Case("sp_mon", 0x1d)
+ .Case("elr_hyp", 0x1e)
+ .Case("sp_hyp", 0x1f)
+ .Case("spsr_fiq", 0x2e)
+ .Case("spsr_irq", 0x30)
+ .Case("spsr_svc", 0x32)
+ .Case("spsr_abt", 0x34)
+ .Case("spsr_und", 0x36)
+ .Case("spsr_mon", 0x3c)
+ .Case("spsr_hyp", 0x3e)
+ .Default(~0U);
+
+ if (Encoding == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S));
+ return MatchOperand_Success;
+}
+
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
int High) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
Error(Parser.getTok().getLoc(), Op + " operand expected.");
@@ -4048,6 +4192,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4077,6 +4222,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
/// n == 32 encoded as n == 0.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseShifterImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4147,6 +4293,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) {
/// ror #n 'n' in {0, 8, 16, 24}
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseRotImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier))
@@ -4193,6 +4340,7 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) {
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseBitfield(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
if (Parser.getTok().isNot(AsmToken::Hash) &&
@@ -4269,6 +4417,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
// This method must return MatchOperand_NoMatch without consuming any tokens
// in the case where there is no match, as other alternatives take other
// parse methods.
+ MCAsmParser &Parser = getParser();
AsmToken Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
bool haveEaten = false;
@@ -4321,6 +4470,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
// This method must return MatchOperand_NoMatch without consuming any tokens
// in the case where there is no match, as other alternatives take other
// parse methods.
+ MCAsmParser &Parser = getParser();
AsmToken Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
@@ -4458,6 +4608,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
bool ARMAsmParser::parseMemory(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
"Token is not a Left Bracket");
@@ -4649,6 +4800,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) {
/// return true if it parses a shift otherwise it returns false.
bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
unsigned &Amount) {
+ MCAsmParser &Parser = getParser();
SMLoc Loc = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -4709,6 +4861,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
/// parseFPImm - A floating point immediate expression operand.
ARMAsmParser::OperandMatchResultTy
ARMAsmParser::parseFPImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
// Anything that can accept a floating point constant as an operand
// needs to go through here, as the regular parseExpression is
// integer only.
@@ -4789,6 +4942,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ MCAsmParser &Parser = getParser();
SMLoc S, E;
// Check if the current operand has a custom associated parser, if so, try to
@@ -4921,6 +5075,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
// :lower16: and :upper16:.
bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ MCAsmParser &Parser = getParser();
RefKind = ARMMCExpr::VK_ARM_None;
// consume an optional '#' (GNU compatibility)
@@ -5271,7 +5426,7 @@ static bool isDataTypeToken(StringRef Tok) {
static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
unsigned VariantID);
static bool RequiresVFPRegListValidation(StringRef Inst,
@@ -5296,6 +5451,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst,
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
// FIXME: Can this be done via tablegen in some fashion?
bool RequireVFPRegisterListCheck;
bool AcceptSinglePrecisionOnly;
@@ -5309,7 +5465,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// The generic tblgen'erated code does this later, at the start of
// MatchInstructionImpl(), but that's too late for aliases that include
// any sort of suffix.
- unsigned AvailableFeatures = getAvailableFeatures();
+ uint64_t AvailableFeatures = getAvailableFeatures();
unsigned AssemblerDialect = getParser().getAssemblerDialect();
applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
@@ -5415,6 +5571,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands.push_back(ARMOperand::CreateImm(
MCConstantExpr::Create(ProcessorIMod, getContext()),
NameLoc, NameLoc));
+ } else if (Mnemonic == "cps" && isMClass()) {
+ return Error(NameLoc, "instruction 'cps' requires effect for M-class");
}
// Add the remaining tokens in the mnemonic.
@@ -5546,6 +5704,48 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
+ // If first 2 operands of a 3 operand instruction are the same
+ // then transform to 2 operand version of the same instruction
+ // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (isThumbOne() && Operands.size() == 6 &&
+ (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
+ Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
+ Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) {
+ ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]);
+ ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]);
+
+ // If both registers are the same then remove one of them from
+ // the operand list.
+ if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) {
+ // If 3rd operand (variable Op5) is a register and the instruction is adds/sub
+ // then do not transform as the backend already handles this instruction
+ // correctly.
+ if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) {
+ Operands.erase(Operands.begin() + 3);
+ if (Mnemonic == "add" && !CarrySetting) {
+ // Special case for 'add' (not 'adds') instruction must
+ // remove the CCOut operand as well.
+ Operands.erase(Operands.begin() + 1);
+ }
+ }
+ }
+ }
+
+ // If instruction is 'add' and first two register operands
+ // use SP register, then remove one of the SP registers from
+ // the instruction.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) {
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
+ ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) {
+ Operands.erase(Operands.begin() + 2);
+ }
+ }
+
// GNU Assembler extension (compatibility)
if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
@@ -5728,6 +5928,48 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"source operands must be sequential");
return false;
}
+ case ARM::STR_PRE_IMM:
+ case ARM::STR_PRE_REG:
+ case ARM::STR_POST_IMM:
+ case ARM::STR_POST_REG:
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ case ARM::STRB_PRE_IMM:
+ case ARM::STRB_PRE_REG:
+ case ARM::STRB_POST_IMM:
+ case ARM::STRB_POST_REG: {
+ // Rt must be different from Rn.
+ const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+ if (Rt == Rn)
+ return Error(Operands[3]->getStartLoc(),
+ "source register and base register can't be identical");
+ return false;
+ }
+ case ARM::LDR_PRE_IMM:
+ case ARM::LDR_PRE_REG:
+ case ARM::LDR_POST_IMM:
+ case ARM::LDR_POST_REG:
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ case ARM::LDRB_PRE_IMM:
+ case ARM::LDRB_PRE_REG:
+ case ARM::LDRB_POST_IMM:
+ case ARM::LDRB_POST_REG:
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST: {
+ // Rt must be different from Rn.
+ const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg());
+
+ if (Rt == Rn)
+ return Error(Operands[3]->getStartLoc(),
+ "destination register and base register can't be identical");
+ return false;
+ }
case ARM::SBFX:
case ARM::UBFX: {
// Width must be in range [1, 32-lsb].
@@ -5764,7 +6006,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Operands[3]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
-
+ if (listContainsReg(Inst, 3 + HasWritebackToken, ARM::SP))
+ return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
+ "SP not allowed in register list");
break;
}
case ARM::LDMIA_UPD:
@@ -5775,7 +6019,19 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
// UNPREDICTABLE on v7 upwards. Goodness knows what they did before.
if (!hasV7Ops())
break;
- // Fallthrough
+ if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
+ return Error(Operands.back()->getStartLoc(),
+ "writeback register not allowed in register list");
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB: {
+ if (listContainsReg(Inst, 3, ARM::SP))
+ return Error(Operands.back()->getStartLoc(),
+ "SP not allowed in register list");
+ break;
+ }
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
case ARM::t2STMIA_UPD:
@@ -5783,6 +6039,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
return Error(Operands.back()->getStartLoc(),
"writeback register not allowed in register list");
+
+ if (listContainsReg(Inst, 4, ARM::SP))
+ return Error(Operands.back()->getStartLoc(),
+ "SP not allowed in register list");
break;
}
case ARM::sysLDMIA_UPD:
@@ -5851,6 +6111,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Operands[4]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
+ if (listContainsReg(Inst, 4, ARM::SP) && !inITBlock())
+ return Error(Operands.back()->getStartLoc(),
+ "SP not allowed in register list");
break;
}
case ARM::tADDrSP: {
@@ -8010,7 +8273,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
// Some high-register supporting Thumb1 encodings only allow both registers
// to be from r0-r7 when in Thumb2.
- else if (Opc == ARM::tADDhirr && isThumbOne() &&
+ else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg()))
return Match_RequiresThumb2;
@@ -8028,10 +8291,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
}
}
-static const char *getSubtargetFeatureName(unsigned Val);
+static const char *getSubtargetFeatureName(uint64_t Val);
bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -8085,7 +8348,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Special case the error message for the very common case where only
// a single subtarget feature is missing (Thumb vs. ARM, e.g.).
std::string Msg = "instruction requires:";
- unsigned Mask = 1;
+ uint64_t Mask = 1;
for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
if (ErrorInfo & Mask) {
Msg += " ";
@@ -8097,7 +8360,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -8174,6 +8437,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
const MCObjectFileInfo::Environment Format =
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+ bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
@@ -8225,7 +8489,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
else if (IDVal == ".thumb_set")
return parseDirectiveThumbSet(DirectiveID.getLoc());
- if (!IsMachO) {
+ if (!IsMachO && !IsCOFF) {
if (IDVal == ".arch")
return parseDirectiveArch(DirectiveID.getLoc());
else if (IDVal == ".cpu")
@@ -8256,6 +8520,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
/// ::= .short expression [, expression]*
/// ::= .word expression [, expression]*
bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -8285,6 +8550,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
/// parseDirectiveThumb
/// ::= .thumb
bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
Error(L, "unexpected token in directive");
return false;
@@ -8306,6 +8572,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
/// parseDirectiveARM
/// ::= .arm
bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
Error(L, "unexpected token in directive");
return false;
@@ -8334,12 +8601,13 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
/// parseDirectiveThumbFunc
/// ::= .thumbfunc symbol_name
bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
+ MCAsmParser &Parser = getParser();
+ const auto Format = getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
// Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
- if (isMachO) {
+ if (IsMachO) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::EndOfStatement)) {
if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) {
@@ -8356,7 +8624,8 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Error(L, "unexpected token in directive");
+ Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Parser.eatToEndOfStatement();
return false;
}
@@ -8367,6 +8636,7 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
/// parseDirectiveSyntax
/// ::= .syntax unified | divided
bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
Error(L, "unexpected token in .syntax directive");
@@ -8398,6 +8668,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
/// parseDirectiveCode
/// ::= .code 16 | 32
bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Integer)) {
Error(L, "unexpected token in .code directive");
@@ -8442,6 +8713,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
/// parseDirectiveReq
/// ::= name .req registername
bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat the '.req' token.
unsigned Reg;
SMLoc SRegLoc, ERegLoc;
@@ -8460,7 +8732,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
Parser.Lex(); // Consume the EndOfStatement
- if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) {
+ if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) {
Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
return false;
}
@@ -8471,6 +8743,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// parseDirectiveUneq
/// ::= .unreq registername
bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier)) {
Parser.eatToEndOfStatement();
Error(L, "unexpected input in .unreq directive.");
@@ -8507,6 +8780,7 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
/// ::= .eabi_attribute int, int [, "str"]
/// ::= .eabi_attribute Tag_name, int [, "str"]
bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+ MCAsmParser &Parser = getParser();
int64_t Tag;
SMLoc TagLoc;
TagLoc = Parser.getTok().getLoc();
@@ -8617,6 +8891,32 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
return false;
}
+// FIXME: This is duplicated in getARMFPUFeatures() in
+// tools/clang/lib/Driver/Tools.cpp
+static const struct {
+ const unsigned Fpu;
+ const uint64_t Enabled;
+ const uint64_t Disabled;
+} Fpus[] = {
+ {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
+ {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
+ {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON},
+ {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON},
+ {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON},
+ {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON},
+ {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16,
+ ARM::FeatureNEON | ARM::FeatureCrypto},
+ {ARM::FP_ARMV8, ARM::FeatureFPARMv8,
+ ARM::FeatureNEON | ARM::FeatureCrypto},
+ {ARM::NEON, ARM::FeatureNEON, 0},
+ {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0},
+ {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON,
+ ARM::FeatureCrypto},
+ {ARM::CRYPTO_NEON_FP_ARMV8,
+ ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0},
+ {ARM::SOFTVFP, 0, 0},
+};
+
/// parseDirectiveFPU
/// ::= .fpu str
bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
@@ -8632,6 +8932,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
return false;
}
+ for (const auto &Fpu : Fpus) {
+ if (Fpu.Fpu != ID)
+ continue;
+
+ // Need to toggle features that should be on but are off and that
+ // should off but are on.
+ uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) |
+ (Fpu.Disabled & STI.getFeatureBits());
+ setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
+ break;
+ }
+
getTargetStreamer().emitFPU(ID);
return false;
}
@@ -8698,6 +9010,7 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
/// parseDirectivePersonality
/// ::= .personality name
bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
+ MCAsmParser &Parser = getParser();
bool HasExistingPersonality = UC.hasPersonality();
UC.recordPersonality(L);
@@ -8761,6 +9074,7 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
/// parseDirectiveSetFP
/// ::= .setfp fpreg, spreg [, offset]
bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
+ MCAsmParser &Parser = getParser();
// Check the ordering of unwind directives
if (!UC.hasFnStart()) {
Error(L, ".fnstart must precede .setfp directive");
@@ -8838,6 +9152,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
/// parseDirective
/// ::= .pad offset
bool ARMAsmParser::parseDirectivePad(SMLoc L) {
+ MCAsmParser &Parser = getParser();
// Check the ordering of unwind directives
if (!UC.hasFnStart()) {
Error(L, ".fnstart must precede .pad directive");
@@ -8912,6 +9227,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
/// ::= .inst.n opcode [, ...]
/// ::= .inst.w opcode [, ...]
bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
+ MCAsmParser &Parser = getParser();
int Width;
if (isThumb()) {
@@ -9008,7 +9324,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
}
if (!Section) {
- getStreamer().InitSections();
+ getStreamer().InitSections(false);
Section = getStreamer().getCurrentSection().first;
}
@@ -9024,6 +9340,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
/// parseDirectivePersonalityIndex
/// ::= .personalityindex index
bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
+ MCAsmParser &Parser = getParser();
bool HasExistingPersonality = UC.hasPersonality();
UC.recordPersonalityIndex(L);
@@ -9079,6 +9396,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
/// parseDirectiveUnwindRaw
/// ::= .unwind_raw offset, opcode [, opcode...]
bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (!UC.hasFnStart()) {
Parser.eatToEndOfStatement();
Error(L, ".fnstart must precede .unwind_raw directives");
@@ -9160,6 +9478,8 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
/// parseDirectiveTLSDescSeq
/// ::= .tlsdescseq tls-variable
bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
if (getLexer().isNot(AsmToken::Identifier)) {
TokError("expected variable after '.tlsdescseq' directive");
Parser.eatToEndOfStatement();
@@ -9184,6 +9504,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
/// parseDirectiveMovSP
/// ::= .movsp reg [, #offset]
bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (!UC.hasFnStart()) {
Parser.eatToEndOfStatement();
Error(L, ".fnstart must precede .movsp directives");
@@ -9247,6 +9568,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
/// parseDirectiveObjectArch
/// ::= .object_arch name
bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier)) {
Error(getLexer().getLoc(), "unexpected token");
Parser.eatToEndOfStatement();
@@ -9303,6 +9625,8 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
/// parseDirectiveThumbSet
/// ::= .thumb_set name, value
bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
StringRef Name;
if (Parser.parseIdentifier(Name)) {
TokError("expected identifier after '.thumb_set'");
@@ -9349,8 +9673,8 @@ extern "C" void LLVMInitializeARMAsmParser() {
#define GET_MATCHER_IMPLEMENTATION
#include "ARMGenAsmMatcher.inc"
-static const struct ExtMapEntry {
- const char *Extension;
+static const struct {
+ const char *Name;
const unsigned ArchCheck;
const uint64_t Features;
} Extensions[] = {
@@ -9381,46 +9705,47 @@ static const struct ExtMapEntry {
/// parseDirectiveArchExtension
/// ::= .arch_extension [no]feature
bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
if (getLexer().isNot(AsmToken::Identifier)) {
Error(getLexer().getLoc(), "unexpected token");
Parser.eatToEndOfStatement();
return false;
}
- StringRef Extension = Parser.getTok().getString();
+ StringRef Name = Parser.getTok().getString();
SMLoc ExtLoc = Parser.getTok().getLoc();
getLexer().Lex();
bool EnableFeature = true;
- if (Extension.startswith_lower("no")) {
+ if (Name.startswith_lower("no")) {
EnableFeature = false;
- Extension = Extension.substr(2);
+ Name = Name.substr(2);
}
- for (unsigned EI = 0, EE = array_lengthof(Extensions); EI != EE; ++EI) {
- if (Extensions[EI].Extension != Extension)
+ for (const auto &Extension : Extensions) {
+ if (Extension.Name != Name)
continue;
- unsigned FB = getAvailableFeatures();
- if ((FB & Extensions[EI].ArchCheck) != Extensions[EI].ArchCheck) {
- Error(ExtLoc, "architectural extension '" + Extension + "' is not "
+ if (!Extension.Features)
+ report_fatal_error("unsupported architectural extension: " + Name);
+
+ if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) {
+ Error(ExtLoc, "architectural extension '" + Name + "' is not "
"allowed for the current base architecture");
return false;
}
- if (!Extensions[EI].Features)
- report_fatal_error("unsupported architectural extension: " + Extension);
-
- if (EnableFeature)
- FB |= ComputeAvailableFeatures(Extensions[EI].Features);
- else
- FB &= ~ComputeAvailableFeatures(Extensions[EI].Features);
-
- setAvailableFeatures(FB);
+ uint64_t ToggleFeatures = EnableFeature
+ ? (~STI.getFeatureBits() & Extension.Features)
+ : ( STI.getFeatureBits() & Extension.Features);
+ uint64_t Features =
+ ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+ setAvailableFeatures(Features);
return false;
}
- Error(ExtLoc, "unknown architectural extension: " + Extension);
+ Error(ExtLoc, "unknown architectural extension: " + Name);
Parser.eatToEndOfStatement();
return false;
}
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9b5fa75..2530640 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -2,8 +2,7 @@ set(LLVM_TARGET_DEFINITIONS ARM.td)
tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher)
@@ -19,7 +18,6 @@ add_llvm_target(ARMCodeGen
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
- ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
ARMExpandPseudoInsts.cpp
@@ -29,7 +27,6 @@ add_llvm_target(ARMCodeGen
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
- ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp
ARMMachineFunctionInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4d4038d..ef65418 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -20,7 +20,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
@@ -85,42 +84,34 @@ namespace {
}
namespace {
-/// ARMDisassembler - ARM disassembler for all ARM platforms.
+/// ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {
}
- ~ARMDisassembler() {
- }
+ ~ARMDisassembler() {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
-/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
+/// Thumb disassembler for all Thumb platforms.
class ThumbDisassembler : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {
}
- ~ThumbDisassembler() {
- }
+ ~ThumbDisassembler() {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
private:
mutable ITStatus ITBlock;
@@ -281,6 +272,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
@@ -413,103 +406,99 @@ static MCDisassembler *createThumbDisassembler(const Target &T,
}
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
- CommentStream = &cs;
-
- uint8_t bytes[4];
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
assert(!(STI.getFeatureBits() & ARM::ModeThumb) &&
- "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!");
+ "Asked to disassemble an ARM instruction but Subtarget is in Thumb "
+ "mode!");
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, bytes) == -1) {
+ if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
// Encoded as a small-endian 32-bit word in the stream.
- uint32_t insn = (bytes[3] << 24) |
- (bytes[2] << 16) |
- (bytes[1] << 8) |
- (bytes[0] << 0);
+ uint32_t Insn =
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
// Calling the auto-generated decoder function.
- DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
- Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ DecodeStatus Result =
+ decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
// VFP and NEON instructions, similarly, are shared between ARM
// and Thumb modes.
MI.clear();
- result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
// definitions with Thumb2 where these instructions are predicable.
if (!DecodePredicateOperand(MI, 0xE, Address, this))
return MCDisassembler::Fail;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+ Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
// definitions with Thumb2 where these instructions are predicable.
if (!DecodePredicateOperand(MI, 0xE, Address, this))
return MCDisassembler::Fail;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
// Add a fake predicate operand, because we share these instruction
// definitions with Thumb2 where these instructions are predicable.
if (!DecodePredicateOperand(MI, 0xE, Address, this))
return MCDisassembler::Fail;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
@@ -681,55 +670,53 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
}
DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
+ ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
- CommentStream = &cs;
-
- uint8_t bytes[4];
+ raw_ostream &OS,
+ raw_ostream &CS) const {
+ CommentStream = &CS;
assert((STI.getFeatureBits() & ARM::ModeThumb) &&
"Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
// We want to read exactly 2 bytes of data.
- if (Region.readBytes(Address, 2, bytes) == -1) {
+ if (Bytes.size() < 2) {
Size = 0;
return MCDisassembler::Fail;
}
- uint16_t insn16 = (bytes[1] << 8) | bytes[0];
- DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
- Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+ DecodeStatus Result =
+ decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 2;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
- Address, this, STI);
- if (result) {
+ Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
+ STI);
+ if (Result) {
Size = 2;
bool InITBlock = ITBlock.instrInITBlock();
- Check(result, AddThumbPredicate(MI));
+ Check(Result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableThumb216, MI, insn16,
- Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 2;
// Nested IT blocks are UNPREDICTABLE. Must be checked before we add
// the Thumb predicate.
if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock())
- result = MCDisassembler::SoftFail;
+ Result = MCDisassembler::SoftFail;
- Check(result, AddThumbPredicate(MI));
+ Check(Result, AddThumbPredicate(MI));
// If we find an IT instruction, we need to parse its condition
// code and mask operands so that we can apply them correctly
@@ -741,115 +728,115 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ITBlock.setITState(Firstcond, Mask);
}
- return result;
+ return Result;
}
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, bytes) == -1) {
+ if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
- uint32_t insn32 = (bytes[3] << 8) |
- (bytes[2] << 0) |
- (bytes[1] << 24) |
- (bytes[0] << 16);
+ uint32_t Insn32 =
+ (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
MI.clear();
- result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
bool InITBlock = ITBlock.instrInITBlock();
- Check(result, AddThumbPredicate(MI));
+ Check(Result, AddThumbPredicate(MI));
AddThumb1SBit(MI, InITBlock);
- return result;
+ return Result;
}
MI.clear();
- result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
- if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+ if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
MI.clear();
- result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
UpdateThumbVFPPredicate(MI);
- return result;
+ return Result;
}
}
MI.clear();
- result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ Result =
+ decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
- if (fieldFromInstruction(insn32, 28, 4) == 0xE) {
+ if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
MI.clear();
- result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
+ Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
+ STI);
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
}
- if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
+ if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
MI.clear();
- uint32_t NEONLdStInsn = insn32;
+ uint32_t NEONLdStInsn = Insn32;
NEONLdStInsn &= 0xF0FFFFFF;
NEONLdStInsn |= 0x04000000;
- result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+ Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
}
- if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
+ if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
MI.clear();
- uint32_t NEONDataInsn = insn32;
+ uint32_t NEONDataInsn = Insn32;
NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
NEONDataInsn |= 0x12000000; // Set bits 28 and 25
- result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+ Result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- Check(result, AddThumbPredicate(MI));
- return result;
+ Check(Result, AddThumbPredicate(MI));
+ return Result;
}
MI.clear();
- uint32_t NEONCryptoInsn = insn32;
+ uint32_t NEONCryptoInsn = Insn32;
NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
- result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+ Result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
Address, this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
MI.clear();
- uint32_t NEONv8Insn = insn32;
+ uint32_t NEONv8Insn = Insn32;
NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
- result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
+ Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
this, STI);
- if (result != MCDisassembler::Fail) {
+ if (Result != MCDisassembler::Fail) {
Size = 4;
- return result;
+ return Result;
}
}
@@ -1015,7 +1002,11 @@ static const uint16_t DPRDecoderTable[] = {
static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasD16 = featureBits & ARM::FeatureD16;
+
+ if (RegNo > 31 || (hasD16 && RegNo > 15))
return MCDisassembler::Fail;
unsigned Register = DPRDecoderTable[RegNo];
@@ -2973,11 +2964,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
if (size == 0x3) {
if (align == 0)
return MCDisassembler::Fail;
- size = 4;
align = 16;
} else {
if (size == 2) {
- size = 1 << size;
align *= 8;
} else {
size = 1 << size;
@@ -3267,6 +3256,11 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
unsigned Rt = fieldFromInstruction(Insn, 12, 4);
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasMP = featureBits & ARM::FeatureMP;
+ bool hasV7Ops = featureBits & ARM::HasV7Ops;
+
if (Rn == 15) {
switch (Inst.getOpcode()) {
case ARM::t2LDRBs:
@@ -3302,11 +3296,10 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
case ARM::t2LDRSHs:
return MCDisassembler::Fail;
case ARM::t2LDRHs:
- // FIXME: this instruction is only available with MP extensions,
- // this should be checked first but we don't have access to the
- // feature bits here.
Inst.setOpcode(ARM::t2PLDWs);
break;
+ case ARM::t2LDRSBs:
+ Inst.setOpcode(ARM::t2PLIs);
default:
break;
}
@@ -3314,8 +3307,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2PLDs:
- case ARM::t2PLDWs:
+ break;
case ARM::t2PLIs:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
+ break;
+ case ARM::t2PLDWs:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
@@ -3341,6 +3340,12 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
unsigned imm = fieldFromInstruction(Insn, 0, 8);
imm |= (U << 8);
imm |= (Rn << 9);
+ unsigned add = fieldFromInstruction(Insn, 9, 1);
+
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasMP = featureBits & ARM::FeatureMP;
+ bool hasV7Ops = featureBits & ARM::HasV7Ops;
if (Rn == 15) {
switch (Inst.getOpcode()) {
@@ -3375,6 +3380,13 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2LDRSHi8:
return MCDisassembler::Fail;
+ case ARM::t2LDRHi8:
+ if (!add)
+ Inst.setOpcode(ARM::t2PLDWi8);
+ break;
+ case ARM::t2LDRSBi8:
+ Inst.setOpcode(ARM::t2PLIi8);
+ break;
default:
break;
}
@@ -3382,9 +3394,15 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2PLDi8:
+ break;
case ARM::t2PLIi8:
- case ARM::t2PLDWi8:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
break;
+ case ARM::t2PLDWi8:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
+ break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3404,6 +3422,11 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
unsigned imm = fieldFromInstruction(Insn, 0, 12);
imm |= (Rn << 13);
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasMP = (featureBits & ARM::FeatureMP);
+ bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+
if (Rn == 15) {
switch (Inst.getOpcode()) {
case ARM::t2LDRi12:
@@ -3438,7 +3461,10 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
case ARM::t2LDRSHi12:
return MCDisassembler::Fail;
case ARM::t2LDRHi12:
- Inst.setOpcode(ARM::t2PLDi12);
+ Inst.setOpcode(ARM::t2PLDWi12);
+ break;
+ case ARM::t2LDRSBi12:
+ Inst.setOpcode(ARM::t2PLIi12);
break;
default:
break;
@@ -3447,9 +3473,15 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn,
switch (Inst.getOpcode()) {
case ARM::t2PLDi12:
- case ARM::t2PLDWi12:
+ break;
case ARM::t2PLIi12:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
break;
+ case ARM::t2PLDWi12:
+ if (!hasV7Ops || !hasMP)
+ return MCDisassembler::Fail;
+ break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
@@ -3507,6 +3539,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
unsigned U = fieldFromInstruction(Insn, 23, 1);
int imm = fieldFromInstruction(Insn, 0, 12);
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ bool hasV7Ops = (featureBits & ARM::HasV7Ops);
+
if (Rt == 15) {
switch (Inst.getOpcode()) {
case ARM::t2LDRBpci:
@@ -3525,7 +3561,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
switch(Inst.getOpcode()) {
case ARM::t2PLDpci:
+ break;
case ARM::t2PLIpci:
+ if (!hasV7Ops)
+ return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
@@ -3974,7 +4013,85 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!Val) return MCDisassembler::Fail;
+ DecodeStatus S = MCDisassembler::Success;
+ uint64_t FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if (FeatureBits & ARM::FeatureMClass) {
+ unsigned ValLow = Val & 0xff;
+
+ // Validate the SYSm value first.
+ switch (ValLow) {
+ case 0: // apsr
+ case 1: // iapsr
+ case 2: // eapsr
+ case 3: // xpsr
+ case 5: // ipsr
+ case 6: // epsr
+ case 7: // iepsr
+ case 8: // msp
+ case 9: // psp
+ case 16: // primask
+ case 20: // control
+ break;
+ case 17: // basepri
+ case 18: // basepri_max
+ case 19: // faultmask
+ if (!(FeatureBits & ARM::HasV7Ops))
+ // Values basepri, basepri_max and faultmask are only valid for v7m.
+ return MCDisassembler::Fail;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+
+ if (Inst.getOpcode() == ARM::t2MSR_M) {
+ unsigned Mask = fieldFromInstruction(Val, 10, 2);
+ if (!(FeatureBits & ARM::HasV7Ops)) {
+ // The ARMv6-M MSR bits {11-10} can be only 0b10, other values are
+ // unpredictable.
+ if (Mask != 2)
+ S = MCDisassembler::SoftFail;
+ }
+ else {
+ // The ARMv7-M architecture stores an additional 2-bit mask value in
+ // MSR bits {11-10}. The mask is used only with apsr, iapsr, eapsr and
+ // xpsr, it has to be 0b10 in other cases. Bit mask{1} indicates if
+ // the NZCVQ bits should be moved by the instruction. Bit mask{0}
+ // indicates the move for the GE{3:0} bits, the mask{0} bit can be set
+ // only if the processor includes the DSP extension.
+ if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
+ (!(FeatureBits & ARM::FeatureDSPThumb2) && (Mask & 1)))
+ S = MCDisassembler::SoftFail;
+ }
+ }
+ } else {
+ // A/R class
+ if (Val == 0)
+ return MCDisassembler::Fail;
+ }
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return S;
+}
+
+static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+
+ unsigned R = fieldFromInstruction(Val, 5, 1);
+ unsigned SysM = fieldFromInstruction(Val, 0, 5);
+
+ // The table of encodings for these banked registers comes from B9.2.3 of the
+ // ARM ARM. There are patterns, but nothing regular enough to make this logic
+ // neater. So by fiat, these values are UNPREDICTABLE:
+ if (!R) {
+ if (SysM == 0x7 || SysM == 0xf || SysM == 0x18 || SysM == 0x19 ||
+ SysM == 0x1a || SysM == 0x1b)
+ return MCDisassembler::SoftFail;
+ } else {
+ if (SysM != 0xe && SysM != 0x10 && SysM != 0x12 && SysM != 0x14 &&
+ SysM != 0x16 && SysM != 0x1c && SysM != 0x1e)
+ return MCDisassembler::SoftFail;
+ }
+
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
}
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
index 52d8338..a64a8a9 100644
--- a/lib/Target/ARM/Disassembler/LLVMBuild.txt
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = ARMDisassembler
parent = ARM
-required_libraries = ARMDesc ARMInfo MC Support
+required_libraries = ARMDesc ARMInfo MCDisassembler Support
add_to_library_groups = ARM
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 228fb57..0570084 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -503,30 +503,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
// Addressing Mode #3
//===--------------------------------------------------------------------===//
-void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- const MCOperand &MO3 = MI->getOperand(Op+2);
-
- O << markup("<mem:") << "[";
- printRegName(O, MO1.getReg());
- O << "], " << markup(">");
-
- if (MO2.getReg()) {
- O << (char)ARM_AM::getAM3Op(MO3.getImm());
- printRegName(O, MO2.getReg());
- return;
- }
-
- unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
- O << markup("<imm:")
- << '#'
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << ImmOffs
- << markup(">");
-}
-
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
raw_ostream &O,
bool AlwaysPrintImm0) {
@@ -568,13 +544,9 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
return;
}
- const MCOperand &MO3 = MI->getOperand(Op+2);
- unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
-
- if (IdxMode == ARMII::IndexModePost) {
- printAM3PostIndexOp(MI, Op, O);
- return;
- }
+ assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) !=
+ ARMII::IndexModePost &&
+ "unexpected idxmode");
printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
}
@@ -807,52 +779,56 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &Op = MI->getOperand(OpNum);
unsigned SpecRegRBit = Op.getImm() >> 4;
unsigned Mask = Op.getImm() & 0xf;
+ uint64_t FeatureBits = getAvailableFeatures();
- if (getAvailableFeatures() & ARM::FeatureMClass) {
+ if (FeatureBits & ARM::FeatureMClass) {
unsigned SYSm = Op.getImm();
unsigned Opcode = MI->getOpcode();
- // For reads of the special registers ignore the "mask encoding" bits
- // which are only for writes.
- if (Opcode == ARM::t2MRS_M)
- SYSm &= 0xff;
+
+ // For writes, handle extended mask bits if the DSP extension is present.
+ if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) {
+ switch (SYSm) {
+ case 0x400: O << "apsr_g"; return;
+ case 0xc00: O << "apsr_nzcvqg"; return;
+ case 0x401: O << "iapsr_g"; return;
+ case 0xc01: O << "iapsr_nzcvqg"; return;
+ case 0x402: O << "eapsr_g"; return;
+ case 0xc02: O << "eapsr_nzcvqg"; return;
+ case 0x403: O << "xpsr_g"; return;
+ case 0xc03: O << "xpsr_nzcvqg"; return;
+ }
+ }
+
+ // Handle the basic 8-bit mask.
+ SYSm &= 0xff;
+
+ if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::HasV7Ops)) {
+ // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
+ // alias for MSR APSR_nzcvq.
+ switch (SYSm) {
+ case 0: O << "apsr_nzcvq"; return;
+ case 1: O << "iapsr_nzcvq"; return;
+ case 2: O << "eapsr_nzcvq"; return;
+ case 3: O << "xpsr_nzcvq"; return;
+ }
+ }
+
switch (SYSm) {
default: llvm_unreachable("Unexpected mask value!");
- case 0:
- case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr
- case 0x400: O << "apsr_g"; return;
- case 0xc00: O << "apsr_nzcvqg"; return;
- case 1:
- case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr
- case 0x401: O << "iapsr_g"; return;
- case 0xc01: O << "iapsr_nzcvqg"; return;
- case 2:
- case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr
- case 0x402: O << "eapsr_g"; return;
- case 0xc02: O << "eapsr_nzcvqg"; return;
- case 3:
- case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr
- case 0x403: O << "xpsr_g"; return;
- case 0xc03: O << "xpsr_nzcvqg"; return;
- case 5:
- case 0x805: O << "ipsr"; return;
- case 6:
- case 0x806: O << "epsr"; return;
- case 7:
- case 0x807: O << "iepsr"; return;
- case 8:
- case 0x808: O << "msp"; return;
- case 9:
- case 0x809: O << "psp"; return;
- case 0x10:
- case 0x810: O << "primask"; return;
- case 0x11:
- case 0x811: O << "basepri"; return;
- case 0x12:
- case 0x812: O << "basepri_max"; return;
- case 0x13:
- case 0x813: O << "faultmask"; return;
- case 0x14:
- case 0x814: O << "control"; return;
+ case 0: O << "apsr"; return;
+ case 1: O << "iapsr"; return;
+ case 2: O << "eapsr"; return;
+ case 3: O << "xpsr"; return;
+ case 5: O << "ipsr"; return;
+ case 6: O << "epsr"; return;
+ case 7: O << "iepsr"; return;
+ case 8: O << "msp"; return;
+ case 9: O << "psp"; return;
+ case 16: O << "primask"; return;
+ case 17: O << "basepri"; return;
+ case 18: O << "basepri_max"; return;
+ case 19: O << "faultmask"; return;
+ case 20: O << "control"; return;
}
}
@@ -882,6 +858,42 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
}
}
+void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ uint32_t Banked = MI->getOperand(OpNum).getImm();
+ uint32_t R = (Banked & 0x20) >> 5;
+ uint32_t SysM = Banked & 0x1f;
+
+ // Nothing much we can do about this, the encodings are specified in B9.2.3 of
+ // the ARM ARM v7C, and are all over the shop.
+ if (R) {
+ O << "SPSR_";
+
+ switch(SysM) {
+ case 0x0e: O << "fiq"; return;
+ case 0x10: O << "irq"; return;
+ case 0x12: O << "svc"; return;
+ case 0x14: O << "abt"; return;
+ case 0x16: O << "und"; return;
+ case 0x1c: O << "mon"; return;
+ case 0x1e: O << "hyp"; return;
+ default: llvm_unreachable("Invalid banked SPSR register");
+ }
+ }
+
+ assert(!R && "should have dealt with SPSR regs");
+ const char *RegNames[] = {
+ "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "",
+ "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "",
+ "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", "sp_abt", "lr_und", "sp_und",
+ "", "", "", "", "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"
+ };
+ const char *Name = RegNames[SysM];
+ assert(Name[0] && "invalid banked register operand");
+
+ O << Name;
+}
+
void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index f671fe4..09fd536 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMINSTPRINTER_H
-#define ARMINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
+#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -51,7 +51,6 @@ public:
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
- void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
bool AlwaysPrintImm0);
void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
@@ -117,6 +116,7 @@ public:
void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index b6c85c2..f0eed9b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
-#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -575,6 +575,53 @@ namespace ARM_AM {
return Val;
}
+ // Generic validation for single-byte immediate (0X00, 00X0, etc).
+ static inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
+ assert(Size >= 1 && Size <= 4 && "Invalid size");
+ unsigned count = 0;
+ for (unsigned i = 0; i < Size; ++i) {
+ if (Value & 0xff) count++;
+ Value >>= 8;
+ }
+ return count == 1;
+ }
+
+ /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+ static inline bool isNEONi16splat(unsigned Value) {
+ if (Value > 0xffff)
+ return false;
+ // i16 value with set bits only in one byte X0 or 0X.
+ return Value == 0 || isNEONBytesplat(Value, 2);
+ }
+
+ // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
+ static inline unsigned encodeNEONi16splat(unsigned Value) {
+ assert(isNEONi16splat(Value) && "Invalid NEON splat value");
+ if (Value >= 0x100)
+ Value = (Value >> 8) | 0xa00;
+ else
+ Value |= 0x800;
+ return Value;
+ }
+
+ /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
+ static inline bool isNEONi32splat(unsigned Value) {
+ // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+ return Value == 0 || isNEONBytesplat(Value, 4);
+ }
+
+ /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
+ static inline unsigned encodeNEONi32splat(unsigned Value) {
+ assert(isNEONi32splat(Value) && "Invalid NEON splat value");
+ if (Value >= 0x100 && Value <= 0xff00)
+ Value = (Value >> 8) | 0x200;
+ else if (Value > 0xffff && Value <= 0xff0000)
+ Value = (Value >> 16) | 0x400;
+ else if (Value > 0xffffff)
+ Value = (Value >> 24) | 0x600;
+ return Value;
+ }
+
AMSubMode getLoadStoreMultipleSubMode(int Opcode);
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/lib/Target/ARM/MCTargetDesc/ARMArchName.h
index 34b9fc1..bc05673 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMArchName.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMARCHNAME_H
-#define ARMARCHNAME_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H
namespace llvm {
namespace ARM {
@@ -24,4 +24,4 @@ enum ArchKind {
} // namespace ARM
} // namespace llvm
-#endif // ARMARCHNAME_H
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 7acd9cc..0b2e3b0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -9,6 +9,10 @@
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMAsmBackend.h"
+#include "MCTargetDesc/ARMAsmBackendDarwin.h"
+#include "MCTargetDesc/ARMAsmBackendELF.h"
+#include "MCTargetDesc/ARMAsmBackendWinCOFF.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/StringSwitch.h"
@@ -35,164 +39,136 @@ namespace {
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
public:
ARMELFObjectWriter(uint8_t OSABI)
- : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
- /*HasRelocationAddend*/ false) {}
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
};
-class ARMAsmBackend : public MCAsmBackend {
- const MCSubtargetInfo* STI;
- bool isThumbMode; // Currently emitting Thumb code.
- bool IsLittleEndian; // Big or little endian.
-public:
- ARMAsmBackend(const Target &T, const StringRef TT, bool IsLittle)
- : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
- isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {}
-
- ~ARMAsmBackend() {
- delete STI;
- }
-
- unsigned getNumFixupKinds() const override {
- return ARM::NumTargetFixupKinds;
- }
-
- bool hasNOP() const {
- return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0;
- }
+const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // ARMFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_ldst_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_10", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_thumb_adr_pcrel_10", 0, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_adr_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cp", 0, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
+ // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+ // - 19.
+ {"fixup_arm_movt_hi16", 0, 20, 0},
+ {"fixup_arm_movw_lo16", 0, 20, 0},
+ {"fixup_t2_movt_hi16", 0, 20, 0},
+ {"fixup_t2_movw_lo16", 0, 20, 0},
+ };
+ const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // ARMFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_ldst_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_pcrel_10", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_thumb_adr_pcrel_10", 8, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_adr_pcrel_12", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_arm_thumb_cp", 8, 8,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+ {"fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel},
+ // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16
+ // - 19.
+ {"fixup_arm_movt_hi16", 12, 20, 0},
+ {"fixup_arm_movw_lo16", 12, 20, 0},
+ {"fixup_t2_movt_hi16", 12, 20, 0},
+ {"fixup_t2_movw_lo16", 12, 20, 0},
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
+}
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
- const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
-// This table *must* be in the order that the fixup_* kinds are defined in
-// ARMFixupKinds.h.
-//
-// Name Offset (bits) Size (bits) Flags
-{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
-// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
-{ "fixup_arm_movt_hi16", 0, 20, 0 },
-{ "fixup_arm_movw_lo16", 0, 20, 0 },
-{ "fixup_t2_movt_hi16", 0, 20, 0 },
-{ "fixup_t2_movw_lo16", 0, 20, 0 },
- };
- const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
-// This table *must* be in the order that the fixup_* kinds are defined in
-// ARMFixupKinds.h.
-//
-// Name Offset (bits) Size (bits) Flags
-{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_thumb_adr_pcrel_10",8, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_arm_thumb_cp", 8, 8, MCFixupKindInfo::FKF_IsPCRel |
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel },
-// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
-{ "fixup_arm_movt_hi16", 12, 20, 0 },
-{ "fixup_arm_movw_lo16", 12, 20, 0 },
-{ "fixup_t2_movt_hi16", 12, 20, 0 },
-{ "fixup_t2_movw_lo16", 12, 20, 0 },
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
- }
-
- /// processFixupValue - Target hook to process the literal value of a fixup
- /// if necessary.
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
-
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
-
- bool mayNeedRelaxation(const MCInst &Inst) const override;
-
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override;
-
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
-
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
-
- void handleAssemblerFlag(MCAssemblerFlag Flag) override {
- switch (Flag) {
- default: break;
- case MCAF_Code16:
- setIsThumb(true);
- break;
- case MCAF_Code32:
- setIsThumb(false);
- break;
- }
+void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default:
+ break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
}
-
- unsigned getPointerSize() const { return 4; }
- bool isThumb() const { return isThumbMode; }
- void setIsThumb(bool it) { isThumbMode = it; }
- bool isLittle() const { return IsLittleEndian; }
-};
+}
} // end anonymous namespace
static unsigned getRelaxedOpcode(unsigned Op) {
switch (Op) {
- default: return Op;
- case ARM::tBcc: return ARM::t2Bcc;
- case ARM::tLDRpci: return ARM::t2LDRpci;
- case ARM::tADR: return ARM::t2ADR;
- case ARM::tB: return ARM::t2B;
- case ARM::tCBZ: return ARM::tHINT;
- case ARM::tCBNZ: return ARM::tHINT;
+ default:
+ return Op;
+ case ARM::tBcc:
+ return ARM::t2Bcc;
+ case ARM::tLDRpci:
+ return ARM::t2LDRpci;
+ case ARM::tADR:
+ return ARM::t2ADR;
+ case ARM::tB:
+ return ARM::t2B;
+ case ARM::tCBZ:
+ return ARM::tHINT;
+ case ARM::tCBNZ:
+ return ARM::tHINT;
}
}
@@ -202,8 +178,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
return false;
}
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value,
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
switch ((unsigned)Fixup.getKind()) {
@@ -265,7 +240,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res.addOperand(MCOperand::CreateImm(14));
Res.addOperand(MCOperand::CreateReg(0));
return;
- }
+ }
// The rest of instructions we're relaxing have the same operands.
// We just need to update to the proper opcode.
@@ -276,11 +251,11 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
- const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
+ const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
- const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
- : Thumb1_16bitNopEncoding;
+ const uint16_t nopEncoding =
+ hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
uint64_t NumNops = Count / 2;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write16(nopEncoding);
@@ -289,18 +264,26 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
// ARM mode
- const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
- : ARMv4_NopEncoding;
+ const uint32_t nopEncoding =
+ hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write32(nopEncoding);
// FIXME: should this function return false when unable to write exactly
// 'Count' bytes with NOP encodings?
switch (Count % 4) {
- default: break; // No leftover bytes to write
- case 1: OW->Write8(0); break;
- case 2: OW->Write16(0); break;
- case 3: OW->Write16(0); OW->Write8(0xa0); break;
+ default:
+ break; // No leftover bytes to write
+ case 1:
+ OW->Write8(0);
+ break;
+ case 2:
+ OW->Write16(0);
+ break;
+ case 3:
+ OW->Write16(0);
+ OW->Write8(0xa0);
+ break;
}
return true;
@@ -313,8 +296,7 @@ static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) {
uint32_t Swapped = (Value & 0xFFFF0000) >> 16;
Swapped |= (Value & 0x0000FFFF) << 16;
return Swapped;
- }
- else
+ } else
return Value;
}
@@ -351,7 +333,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_movt_hi16:
if (!IsPCRel)
Value >>= 16;
- // Fallthrough
+ // Fallthrough
case ARM::fixup_arm_movw_lo16: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned Lo12 = Value & 0x0FFF;
@@ -363,7 +345,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_t2_movt_hi16:
if (!IsPCRel)
Value >>= 16;
- // Fallthrough
+ // Fallthrough
case ARM::fixup_t2_movw_lo16: {
unsigned Hi4 = (Value & 0xF000) >> 12;
unsigned i = (Value & 0x800) >> 11;
@@ -379,7 +361,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_ldst_pcrel_12:
// ARM PC-relative values are offset by 8.
Value -= 4;
- // FALLTHROUGH
+ // FALLTHROUGH
case ARM::fixup_t2_ldst_pcrel_12: {
// Offset by 4, adjusted by two due to the half-word ordering of thumb.
Value -= 4;
@@ -438,7 +420,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_blx:
// These values don't encode the low two bits since they're always zero.
// Offset by 8 just as above.
- if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (const MCSymbolRefExpr *SRE =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
return 0;
return 0xffffff & ((Value - 8) >> 2);
@@ -447,17 +430,17 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value >>= 1; // Low bit is not encoded.
uint32_t out = 0;
- bool I = Value & 0x800000;
+ bool I = Value & 0x800000;
bool J1 = Value & 0x400000;
bool J2 = Value & 0x200000;
J1 ^= I;
J2 ^= I;
- out |= I << 26; // S bit
- out |= !J1 << 13; // J1 bit
- out |= !J2 << 11; // J2 bit
- out |= (Value & 0x1FF800) << 5; // imm6 field
- out |= (Value & 0x0007FF); // imm11 field
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
return swapHalfWords(out, IsLittleEndian);
}
@@ -498,7 +481,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
- (uint16_t)imm11Bits);
+ (uint16_t)imm11Bits);
return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_blx: {
@@ -515,7 +498,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Note that the halfwords are stored high first, low second; so we need
// to transpose the fixup value here to map properly.
uint32_t offset = (Value - 2) >> 2;
- if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (const MCSymbolRefExpr *SRE =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL)
offset = 0;
uint32_t signBit = (offset & 0x400000) >> 22;
@@ -528,7 +512,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
- ((uint16_t)imm10LBits) << 1);
+ ((uint16_t)imm10LBits) << 1);
return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_cp:
@@ -564,7 +548,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_pcrel_10:
Value = Value - 4; // ARM fixups offset by an additional word and don't
// need to adjust for the half-word ordering.
- // Fall through.
+ // Fall through.
case ARM::fixup_t2_pcrel_10: {
// Offset by 4, adjusted by two due to the half-word ordering of thumb.
Value = Value - 4;
@@ -735,7 +719,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
- if (!Value) return; // Doesn't change encoding.
+ if (!Value)
+ return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
@@ -757,80 +742,36 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
-namespace {
-// FIXME: This should be in a separate file.
-class ARMWinCOFFAsmBackend : public ARMAsmBackend {
-public:
- ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple)
- : ARMAsmBackend(T, Triple, true) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
- return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
- }
-};
-
-// FIXME: This should be in a separate file.
-// ELF is an ELF of course...
-class ELFARMAsmBackend : public ARMAsmBackend {
-public:
- uint8_t OSABI;
- ELFARMAsmBackend(const Target &T, const StringRef TT,
- uint8_t OSABI, bool IsLittle)
- : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) { }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
- return createARMELFObjectWriter(OS, OSABI, isLittle());
- }
-};
-
-// FIXME: This should be in a separate file.
-class DarwinARMAsmBackend : public ARMAsmBackend {
-public:
- const MachO::CPUSubTypeARM Subtype;
- DarwinARMAsmBackend(const Target &T, const StringRef TT,
- MachO::CPUSubTypeARM st)
- : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
- HasDataInCodeSupport = true;
- }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
- return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- MachO::CPU_TYPE_ARM,
- Subtype);
- }
-};
-
-} // end anonymous namespace
-
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU,
- bool isLittle) {
+ const MCRegisterInfo &MRI, StringRef TT,
+ StringRef CPU, bool isLittle) {
Triple TheTriple(TT);
switch (TheTriple.getObjectFormat()) {
- default: llvm_unreachable("unsupported object format");
+ default:
+ llvm_unreachable("unsupported object format");
case Triple::MachO: {
MachO::CPUSubTypeARM CS =
- StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
- .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
- .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
- .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
- .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
- .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
- .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
- .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
- .Default(MachO::CPU_SUBTYPE_ARM_V7);
-
- return new DarwinARMAsmBackend(T, TT, CS);
+ StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
+ .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
+ .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
+ .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
+ .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
+ .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
+ .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
+ .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
+ .Default(MachO::CPU_SUBTYPE_ARM_V7);
+
+ return new ARMAsmBackendDarwin(T, TT, CS);
}
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return new ARMWinCOFFAsmBackend(T, TT);
+ return new ARMAsmBackendWinCOFF(T, TT);
case Triple::ELF:
assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFARMAsmBackend(T, TT, OSABI, isLittle);
+ return new ARMAsmBackendELF(T, TT, OSABI, isLittle);
}
}
@@ -847,14 +788,13 @@ MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
}
MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU) {
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
return createARMAsmBackend(T, MRI, TT, CPU, true);
}
MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU) {
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
return createARMAsmBackend(T, MRI, TT, CPU, false);
}
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
new file mode 100644
index 0000000..f4f1082
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -0,0 +1,69 @@
+//===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ARMAsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo *STI;
+ bool isThumbMode; // Currently emitting Thumb code.
+ bool IsLittleEndian; // Big or little endian.
+public:
+ ARMAsmBackend(const Target &T, StringRef TT, bool IsLittle)
+ : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
+ isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {}
+
+ ~ARMAsmBackend() override { delete STI; }
+
+ unsigned getNumFixupKinds() const override {
+ return ARM::NumTargetFixupKinds;
+ }
+
+ bool hasNOP() const { return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+ /// processFixupValue - Target hook to process the literal value of a fixup
+ /// if necessary.
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override;
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override;
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override;
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override;
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+
+ void handleAssemblerFlag(MCAssemblerFlag Flag) override;
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+ bool isLittle() const { return IsLittleEndian; }
+};
+} // end anonymous namespace
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
new file mode 100644
index 0000000..3bd7ab7
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -0,0 +1,33 @@
+//===-- ARMAsmBackendDarwin.h ARM Asm Backend Darwin ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
+
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendDarwin : public ARMAsmBackend {
+public:
+ const MachO::CPUSubTypeARM Subtype;
+ ARMAsmBackendDarwin(const Target &T, StringRef TT, MachO::CPUSubTypeARM st)
+ : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
+ Subtype);
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
new file mode 100644
index 0000000..4efd325
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -0,0 +1,27 @@
+//===-- ARMAsmBackendELF.h ARM Asm Backend ELF -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+#define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H
+
+using namespace llvm;
+namespace {
+class ARMAsmBackendELF : public ARMAsmBackend {
+public:
+ uint8_t OSABI;
+ ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
+ : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMELFObjectWriter(OS, OSABI, isLittle());
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
new file mode 100644
index 0000000..33be347
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -0,0 +1,26 @@
+//===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H
+
+using namespace llvm;
+
+namespace {
+class ARMAsmBackendWinCOFF : public ARMAsmBackend {
+public:
+ ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
+ : ARMAsmBackend(T, Triple, true) {}
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 1686d76..4289a73 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMBASEINFO_H
-#define ARMBASEINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H
#include "ARMMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 1c84263..f24b419 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -37,7 +37,8 @@ namespace {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
- bool needsRelocateWithSymbol(unsigned Type) const override;
+ bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const override;
};
}
@@ -48,7 +49,8 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
ARMELFObjectWriter::~ARMELFObjectWriter() {}
-bool ARMELFObjectWriter::needsRelocateWithSymbol(unsigned Type) const {
+bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const {
// FIXME: This is extremelly conservative. This really needs to use a
// whitelist with a clear explanation for why each realocation needs to
// point to the symbol, not to the section.
@@ -100,7 +102,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_uncondbl:
switch (Modifier) {
case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_ARM_PLT32;
+ Type = ELF::R_ARM_CALL;
break;
case MCSymbolRefExpr::VK_ARM_TLSCALL:
Type = ELF::R_ARM_TLS_CALL;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 7b5d8b0..24ee537 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -848,6 +848,14 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
/* OverwriteExisting= */ false);
break;
+ // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
+ // uses the FP_ARMV8_D16 build attribute.
+ case ARM::FPV5_D16:
+ setAttributeItem(ARMBuildAttrs::FP_arch,
+ ARMBuildAttrs::AllowFPARMv8B,
+ /* OverwriteExisting= */ false);
+ break;
+
case ARM::NEON:
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPv3A,
@@ -1339,10 +1347,9 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) {
return S;
}
- MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack,
- bool IsThumb) {
+MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool IsThumb) {
ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
new ARMTargetELFStreamer(*S);
// FIXME: This should eventually end up somewhere else where more
@@ -1352,8 +1359,6 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) {
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
return S;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index bfd9e33..46ba571 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ARM_ARMFIXUPKINDS_H
-#define LLVM_ARM_ARMFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 7a19208..1d82099 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -55,7 +55,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
- HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
@@ -103,7 +102,6 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
Code32Directive = ".code\t32";
PrivateGlobalPrefix = ".L";
- HasLEB128 = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::None;
UseParensForSymbolVariant = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 51cfa0a..f1fef41 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ARMTARGETASMINFO_H
-#define LLVM_ARMTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H
#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index e545e3c..68d32b2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -35,12 +35,6 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
OS << ')';
}
-bool
-ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return false;
-}
-
void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index c5c0b10..06bf6c9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMMCEXPR_H
-#define ARMMCEXPR_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H
#include "llvm/MC/MCExpr.h"
@@ -58,8 +58,11 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
- void visitUsedExpr(MCStreamer &Streamer) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override {
+ return false;
+ }
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 2b3855d..98190ba 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -84,93 +84,89 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
Triple triple(TT);
- // Set the boolean corresponding to the current target triple, or the default
- // if one cannot be determined, to true.
- unsigned Len = TT.size();
- unsigned Idx = 0;
-
- // FIXME: Enhance Triple helper class to extract ARM version.
bool isThumb = triple.getArch() == Triple::thumb ||
triple.getArch() == Triple::thumbeb;
- if (Len >= 5 && TT.substr(0, 4) == "armv")
- Idx = 4;
- else if (Len >= 7 && TT.substr(0, 6) == "armebv")
- Idx = 6;
- else if (Len >= 7 && TT.substr(0, 6) == "thumbv")
- Idx = 6;
- else if (Len >= 9 && TT.substr(0, 8) == "thumbebv")
- Idx = 8;
bool NoCPU = CPU == "generic" || CPU.empty();
std::string ARMArchFeature;
- if (Idx) {
- unsigned SubVer = TT[Idx];
- if (SubVer == '8') {
- if (NoCPU)
- // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
- // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
- // FeatureT2XtPk, FeatureCrypto, FeatureCRC
- ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
- "+trustzone,+t2xtpk,+crypto,+crc";
- else
- // Use CPU to figure out the exact features
- ARMArchFeature = "+v8";
- } else if (SubVer == '7') {
- if (Len >= Idx+2 && TT[Idx+1] == 'm') {
- isThumb = true;
- if (NoCPU)
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
- if (NoCPU)
- // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- } else if (Len >= Idx+2 && TT[Idx+1] == 's') {
- if (NoCPU)
- // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
- // Swift
- ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- } else {
- // v7 CPUs have lots of different feature sets. If no CPU is specified,
- // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
- // the "minimum" feature set and use CPU string to figure out the exact
- // features.
- if (NoCPU)
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
- ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- }
- } else if (SubVer == '6') {
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
- ARMArchFeature = "+v6t2";
- else if (Len >= Idx+2 && TT[Idx+1] == 'm') {
- isThumb = true;
- if (NoCPU)
- // v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6m,+noarm,+mclass";
- else
- ARMArchFeature = "+v6";
- } else
- ARMArchFeature = "+v6";
- } else if (SubVer == '5') {
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
- ARMArchFeature = "+v5te";
- else
- ARMArchFeature = "+v5t";
- } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
- ARMArchFeature = "+v4t";
+ switch (triple.getSubArch()) {
+ default:
+ llvm_unreachable("invalid sub-architecture for ARM");
+ case Triple::ARMSubArch_v8:
+ if (NoCPU)
+ // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+ // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+ // FeatureT2XtPk, FeatureCrypto, FeatureCRC
+ ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+ "+trustzone,+t2xtpk,+crypto,+crc";
+ else
+ // Use CPU to figure out the exact features
+ ARMArchFeature = "+v8";
+ break;
+ case Triple::ARMSubArch_v7m:
+ isThumb = true;
+ if (NoCPU)
+ // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ break;
+ case Triple::ARMSubArch_v7em:
+ if (NoCPU)
+ // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+ // FeatureT2XtPk, FeatureMClass
+ ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ break;
+ case Triple::ARMSubArch_v7s:
+ if (NoCPU)
+ // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
+ // Swift
+ ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ break;
+ case Triple::ARMSubArch_v7:
+ // v7 CPUs have lots of different feature sets. If no CPU is specified,
+ // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
+ // the "minimum" feature set and use CPU string to figure out the exact
+ // features.
+ if (NoCPU)
+ // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
+ ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+ else
+ // Use CPU to figure out the exact features.
+ ARMArchFeature = "+v7";
+ break;
+ case Triple::ARMSubArch_v6t2:
+ ARMArchFeature = "+v6t2";
+ break;
+ case Triple::ARMSubArch_v6m:
+ isThumb = true;
+ if (NoCPU)
+ // v6m: FeatureNoARM, FeatureMClass
+ ARMArchFeature = "+v6m,+noarm,+mclass";
+ else
+ ARMArchFeature = "+v6";
+ break;
+ case Triple::ARMSubArch_v6:
+ ARMArchFeature = "+v6";
+ break;
+ case Triple::ARMSubArch_v5te:
+ ARMArchFeature = "+v5te";
+ break;
+ case Triple::ARMSubArch_v5:
+ ARMArchFeature = "+v5t";
+ break;
+ case Triple::ARMSubArch_v4t:
+ ARMArchFeature = "+v4t";
+ break;
+ case Triple::NoSubArch:
+ break;
}
if (isThumb) {
@@ -221,31 +217,14 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
Triple TheTriple(TT);
MCAsmInfo *MAI;
- switch (TheTriple.getOS()) {
- case llvm::Triple::Darwin:
- case llvm::Triple::IOS:
- case llvm::Triple::MacOSX:
+ if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
MAI = new ARMMCAsmInfoDarwin(TT);
- break;
- case llvm::Triple::Win32:
- switch (TheTriple.getEnvironment()) {
- case llvm::Triple::Itanium:
- MAI = new ARMCOFFMCAsmInfoGNU();
- break;
- case llvm::Triple::MSVC:
- MAI = new ARMCOFFMCAsmInfoMicrosoft();
- break;
- default:
- llvm_unreachable("invalid environment");
- }
- break;
- default:
- if (TheTriple.isOSBinFormatMachO())
- MAI = new ARMMCAsmInfoDarwin(TT);
- else
- MAI = new ARMELFMCAsmInfo(TT);
- break;
- }
+ else if (TheTriple.isWindowsItaniumEnvironment())
+ MAI = new ARMCOFFMCAsmInfoGNU();
+ else if (TheTriple.isWindowsMSVCEnvironment())
+ MAI = new ARMCOFFMCAsmInfoMicrosoft();
+ else
+ MAI = new ARMELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
@@ -269,11 +248,8 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getObjectFormat()) {
@@ -287,7 +263,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
case Triple::ELF:
- return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
TheTriple.getArch() == Triple::thumb);
}
}
@@ -362,8 +338,10 @@ extern "C" void LLVMInitializeARMTargetMC() {
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget,
+ createARMMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget,
+ createARMMCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 5326e56..a6c20d5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARMMCTARGETDESC_H
-#define ARMMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
#include <string>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 186776a..7da5003 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -428,7 +428,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->Symbol->isUndefined())
+ if (!SD->getSymbol().isUndefined())
FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index ad3f1ca..8acd7af 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -28,7 +28,7 @@ ARMTargetStreamer::~ARMTargetStreamer() {}
// The constant pool handling is shared by all ARMTargetStreamer
// implementations.
const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
- return ConstantPools->addEntry(Streamer, Expr);
+ return ConstantPools->addEntry(Streamer, Expr, 4);
}
void ARMTargetStreamer::emitCurrentConstantPool() {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index cd58759..e0c113e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARM_UNWIND_OP_ASM_H
-#define ARM_UNWIND_OP_ASM_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ARMEHABI.h"
@@ -90,4 +90,4 @@ private:
} // namespace llvm
-#endif // ARM_UNWIND_OP_ASM_H
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
index 2a7fe61..db8fc92 100644
--- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = ARMDesc
parent = ARM
-required_libraries = ARMAsmPrinter ARMInfo MC Support
+required_libraries = ARMAsmPrinter ARMInfo MC MCDisassembler Support
add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f6d24e9..35fe9b3 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -378,8 +378,8 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
- TRI = Fn.getTarget().getRegisterInfo();
+ TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
isLikeA9 = STI->isLikeA9() || STI->isSwift();
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index f069535..c1601a3 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -15,7 +15,7 @@ TARGET = ARM
BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
- ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+ ARMGenCallingConv.inc \
ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index baa97a7..6deab4f 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -52,9 +52,9 @@ void Thumb1FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -89,12 +89,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
@@ -321,12 +324,15 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const Thumb1InstrInfo &TII =
- *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo());
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
@@ -382,28 +388,65 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- if (ArgRegsSaveSize) {
- // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
- // to LR, and we can't pop the value directly to the PC since
- // we need to update the SP after popping the value. Therefore, we
- // pop the old LR into R3 as a temporary.
-
+ bool IsV4PopReturn = false;
+ for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo())
+ if (CSI.getReg() == ARM::LR)
+ IsV4PopReturn = true;
+ IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps();
+
+ // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ // to LR, and we can't pop the value directly to the PC since
+ // we need to update the SP after popping the value. So instead
+ // we have to emit:
+ // POP {r3}
+ // ADD sp, #offset
+ // BX r3
+ // If this would clobber a return value, then generate this sequence instead:
+ // MOV ip, r3
+ // POP {r3}
+ // ADD sp, #offset
+ // MOV lr, r3
+ // MOV r3, ip
+ // BX lr
+ if (ArgRegsSaveSize || IsV4PopReturn) {
// Get the last instruction, tBX_RET
MBBI = MBB.getLastNonDebugInstr();
assert (MBBI->getOpcode() == ARM::tBX_RET);
- // Epilogue for vararg functions: pop LR to R3 and branch off it.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
-
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
- .addReg(ARM::R3, RegState::Kill);
- AddDefaultPred(MIB);
- MIB.copyImplicitOps(&*MBBI);
- // erase the old tBX_RET instruction
- MBB.erase(MBBI);
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ if (AFI->getReturnRegsCount() <= 3) {
+ // Epilogue: pop saved LR to R3 and branch off it.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
+ .addReg(ARM::R3, RegState::Kill);
+ AddDefaultPred(MIB);
+ MIB.copyImplicitOps(&*MBBI);
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ } else {
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(ARM::R3, RegState::Kill));
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(ARM::R3, RegState::Kill));
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::R3, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill));
+ // Keep the tBX_RET instruction
+ }
}
}
@@ -417,7 +460,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -456,7 +499,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI->getDebugLoc();
@@ -470,6 +513,9 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// Special epilogue for vararg functions. See emitEpilogue
if (isVarArg)
continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (STI.hasV4TOps() && !STI.hasV5TOps())
+ continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
MIB.copyImplicitOps(&*MI);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index a227f8e..b785b28 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H
-#define LLVM_ARM_THUMB1FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
+#define LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H
#include "ARMFrameLowering.h"
#include "Thumb1InstrInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 68cbb5c..8ea912e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -41,10 +42,30 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ // Need to check the arch.
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>();
+
assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
"Thumb1 can only copy GPR registers");
+
+ if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
+ || !ARM::tGPRRegClass.contains(DestReg))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)));
+ else {
+ // FIXME: The performance consequences of this are going to be atrocious.
+ // Some things to try that should be better:
+ // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
+ // * 'movs $dst, $src' if cpsr isn't live
+ // See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html
+
+ // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
+ .addReg(DestReg, getDefRegState(true));
+ }
}
void Thumb1InstrInfo::
@@ -101,3 +122,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
}
+
+void
+Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM);
+}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index c5845b7..9fba760 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB1INSTRUCTIONINFO_H
-#define THUMB1INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
#include "ARMBaseInstrInfo.h"
#include "Thumb1RegisterInfo.h"
@@ -54,7 +54,10 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
};
}
-#endif // THUMB1INSTRUCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index f907b14..c10c809 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -66,8 +66,12 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
int Val,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
+ assert((isARMLowRegister(DestReg) ||
+ isVirtualRegister(DestReg)) &&
+ "Thumb1 does not have ldr to high register");
+
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
@@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
NumBytes = -NumBytes;
}
unsigned LdReg = DestReg;
- if (DestReg == ARM::SP) {
+ if (DestReg == ARM::SP)
assert(BaseReg == ARM::SP && "Unexpected!");
+ if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg))
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
- }
- if (NumBytes <= 255 && NumBytes >= 0)
+ if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
.addImm(NumBytes).setMIFlags(MIFlags);
- else if (NumBytes < 0 && NumBytes >= -255) {
+ } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
.addImm(NumBytes).setMIFlags(MIFlags);
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
@@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
ARMCC::AL, 0, MIFlags);
// Emit add / sub.
- int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr
+ : ARM::tADDrr);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (Opc != ARM::tADDhirr)
@@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
AddDefaultPred(MIB);
}
-/// calcNumMI - Returns the number of instructions required to materialize
-/// the specific add / sub r, c instruction.
-static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
- unsigned NumBits, unsigned Scale) {
- unsigned NumMIs = 0;
- unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-
- if (Opc == ARM::tADDrSPi) {
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
- Bytes -= ThisVal;
- NumMIs++;
- NumBits = 8;
- Scale = 1; // Followed by a number of tADDi8.
- Chunk = ((1 << NumBits) - 1) * Scale;
- }
-
- NumMIs += Bytes / Chunk;
- if ((Bytes % Chunk) != 0)
- NumMIs++;
- if (ExtraOpc)
- NumMIs++;
- return NumMIs;
-}
-
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code.
+/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or
+/// SUBs first, and uses a constant pool value if the instruction sequence would
+/// be too long. This is allowed to modify the condition flags.
void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
DebugLoc dl,
@@ -172,151 +155,146 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
bool isSub = NumBytes < 0;
unsigned Bytes = (unsigned)NumBytes;
if (isSub) Bytes = -NumBytes;
- bool isMul4 = (Bytes & 3) == 0;
- bool isTwoAddr = false;
- bool DstNotEqBase = false;
- unsigned NumBits = 1;
- unsigned Scale = 1;
- int Opc = 0;
+
+ int CopyOpc = 0;
+ unsigned CopyBits = 0;
+ unsigned CopyScale = 1;
+ bool CopyNeedsCC = false;
int ExtraOpc = 0;
- bool NeedCC = false;
-
- if (DestReg == BaseReg && BaseReg == ARM::SP) {
- assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
- NumBits = 7;
- Scale = 4;
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- isTwoAddr = true;
- } else if (!isSub && BaseReg == ARM::SP) {
- // r1 = add sp, 403
- // =>
- // r1 = add sp, 100 * 4
- // r1 = add r1, 3
- if (!isMul4) {
- Bytes &= ~3;
- ExtraOpc = ARM::tADDi3;
+ unsigned ExtraBits = 0;
+ unsigned ExtraScale = 1;
+ bool ExtraNeedsCC = false;
+
+ // Strategy:
+ // We need to select two types of instruction, maximizing the available
+ // immediate range of each. The instructions we use will depend on whether
+ // DestReg and BaseReg are low, high or the stack pointer.
+ // * CopyOpc - DestReg = BaseReg + imm
+ // This will be emitted once if DestReg != BaseReg, and never if
+ // DestReg == BaseReg.
+ // * ExtraOpc - DestReg = DestReg + imm
+ // This will be emitted as many times as necessary to add the
+ // full immediate.
+ // If the immediate ranges of these instructions are not large enough to cover
+ // NumBytes with a reasonable number of instructions, we fall back to using a
+ // value loaded from a constant pool.
+ if (DestReg == ARM::SP) {
+ if (BaseReg == ARM::SP) {
+ // sp -> sp
+ // Already in right reg, no copy needed
+ } else {
+ // low -> sp or high -> sp
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
}
- NumBits = 8;
- Scale = 4;
- Opc = ARM::tADDrSPi;
- } else {
- // sp = sub sp, c
- // r1 = sub sp, c
- // r8 = sub sp, c
- if (DestReg != BaseReg)
- DstNotEqBase = true;
- NumBits = 8;
- if (DestReg == ARM::SP) {
- Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
- NumBits = 7;
- Scale = 4;
+ ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ ExtraBits = 7;
+ ExtraScale = 4;
+ } else if (isARMLowRegister(DestReg)) {
+ if (BaseReg == ARM::SP) {
+ // sp -> low
+ assert(!isSub && "Thumb1 does not have tSUBrSPi");
+ CopyOpc = ARM::tADDrSPi;
+ CopyBits = 8;
+ CopyScale = 4;
+ } else if (DestReg == BaseReg) {
+ // low -> same low
+ // Already in right reg, no copy needed
+ } else if (isARMLowRegister(BaseReg)) {
+ // low -> different low
+ CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3;
+ CopyBits = 3;
+ CopyNeedsCC = true;
} else {
- Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
- NumBits = 8;
- NeedCC = true;
+ // high -> low
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
}
- isTwoAddr = true;
+ ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ ExtraBits = 8;
+ ExtraNeedsCC = true;
+ } else /* DestReg is high */ {
+ if (DestReg == BaseReg) {
+ // high -> same high
+ // Already in right reg, no copy needed
+ } else {
+ // {low,high,sp} -> high
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
+ }
+ ExtraOpc = 0;
}
- unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ // We could handle an unaligned immediate with an unaligned copy instruction
+ // and an aligned extra instruction, but this case is not currently needed.
+ assert(((Bytes & 3) == 0 || ExtraScale == 1) &&
+ "Unaligned offset, but all instructions require alignment");
+
+ unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale;
+ // If we would emit the copy with an immediate of 0, just use tMOVr.
+ if (CopyOpc && Bytes < CopyScale) {
+ CopyOpc = ARM::tMOVr;
+ CopyBits = 0;
+ CopyScale = 1;
+ CopyNeedsCC = false;
+ CopyRange = 0;
+ }
+ unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction
+ unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0;
+ unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange);
+
+ // We could handle this case when the copy instruction does not require an
+ // aligned immediate, but we do not currently do this.
+ assert(RangeAfterCopy % ExtraScale == 0 &&
+ "Extra instruction requires immediate to be aligned");
+
+ unsigned RequiredExtraInstrs;
+ if (ExtraRange)
+ RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange;
+ else if (RangeAfterCopy > 0)
+ // We need an extra instruction but none is available
+ RequiredExtraInstrs = 1000000;
+ else
+ RequiredExtraInstrs = 0;
+ unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs;
unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
- if (NumMIs > Threshold) {
- // This will expand into too many instructions. Load the immediate from a
- // constpool entry.
+
+ // Use a constant pool, if the sequence of ADDs/SUBs is too expensive.
+ if (RequiredInstrs > Threshold) {
emitThumbRegPlusImmInReg(MBB, MBBI, dl,
DestReg, BaseReg, NumBytes, true,
TII, MRI, MIFlags);
return;
}
- if (DstNotEqBase) {
- if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
- // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
- unsigned Chunk = (1 << 3) - 1;
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
- Bytes -= ThisVal;
- const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
- const MachineInstrBuilder MIB =
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
- .setMIFlags(MIFlags));
- AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
- } else {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
- .addReg(BaseReg, RegState::Kill))
- .setMIFlags(MIFlags);
+ // Emit zero or one copy instructions
+ if (CopyOpc) {
+ unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale;
+ Bytes -= CopyImm * CopyScale;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
+ if (CopyNeedsCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg, RegState::Kill);
+ if (CopyOpc != ARM::tMOVr) {
+ MIB.addImm(CopyImm);
}
+ AddDefaultPred(MIB.setMIFlags(MIFlags));
+
BaseReg = DestReg;
}
- unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ // Emit zero or more in-place add/sub instructions
while (Bytes) {
- unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
- Bytes -= ThisVal;
- ThisVal /= Scale;
- // Build the new tADD / tSUB.
- if (isTwoAddr) {
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
- if (NeedCC)
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(DestReg).addImm(ThisVal);
- MIB = AddDefaultPred(MIB);
- MIB.setMIFlags(MIFlags);
- } else {
- bool isKill = BaseReg != ARM::SP;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
- if (NeedCC)
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
- MIB = AddDefaultPred(MIB);
- MIB.setMIFlags(MIFlags);
-
- BaseReg = DestReg;
- if (Opc == ARM::tADDrSPi) {
- // r4 = add sp, imm
- // r4 = add r4, imm
- // ...
- NumBits = 8;
- Scale = 1;
- Chunk = ((1 << NumBits) - 1) * Scale;
- Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
- NeedCC = isTwoAddr = true;
- }
- }
- }
-
- if (ExtraOpc) {
- const MCInstrDesc &MCID = TII.get(ExtraOpc);
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
- .addReg(DestReg, RegState::Kill)
- .addImm(((unsigned)NumBytes) & 3)
- .setMIFlags(MIFlags));
- }
-}
+ unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale;
+ Bytes -= ExtraImm * ExtraScale;
-/// emitThumbConstant - Emit a series of instructions to materialize a
-/// constant.
-static void emitThumbConstant(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned DestReg, int Imm,
- const TargetInstrInfo &TII,
- const Thumb1RegisterInfo& MRI,
- DebugLoc dl) {
- bool isSub = Imm < 0;
- if (isSub) Imm = -Imm;
-
- int Chunk = (1 << 8) - 1;
- int ThisVal = (Imm > Chunk) ? Chunk : Imm;
- Imm -= ThisVal;
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
- DestReg))
- .addImm(ThisVal));
- if (Imm > 0)
- emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
- if (isSub) {
- const MCInstrDesc &MCID = TII.get(ARM::tRSB);
- AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
- .addReg(DestReg, RegState::Kill));
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
+ if (ExtraNeedsCC)
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(BaseReg).addImm(ExtraImm);
+ MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
}
}
@@ -352,86 +330,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- if (Opcode == ARM::tADDrSPi) {
+ if (Opcode == ARM::tADDframe) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
-
- // Can't use tADDrSPi if it's based off the frame pointer.
- unsigned NumBits = 0;
- unsigned Scale = 1;
- if (FrameReg != ARM::SP) {
- Opcode = ARM::tADDi3;
- NumBits = 3;
- } else {
- NumBits = 8;
- Scale = 4;
- assert((Offset & 3) == 0 &&
- "Thumb add/sub sp, #imm immediate must be multiple of 4!");
- }
-
- unsigned PredReg;
- if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
- // Turn it into a move.
- MI.setDesc(TII.get(ARM::tMOVr));
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- // Remove offset
- MI.RemoveOperand(FrameRegIdx+1);
- return true;
- }
-
- // Common case: small offset, fits into instruction.
- unsigned Mask = (1 << NumBits) - 1;
- if (((Offset / Scale) & ~Mask) == 0) {
- // Replace the FrameIndex with sp / fp
- if (Opcode == ARM::tADDi3) {
- MI.setDesc(TII.get(Opcode));
- removeOperands(MI, FrameRegIdx);
- AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
- .addImm(Offset / Scale));
- } else {
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
- }
- return true;
- }
-
unsigned DestReg = MI.getOperand(0).getReg();
- unsigned Bytes = (Offset > 0) ? Offset : -Offset;
- unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
- // MI would expand into a large number of instructions. Don't try to
- // simplify the immediate.
- if (NumMIs > 2) {
- emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
- *this);
- MBB.erase(II);
- return true;
- }
- if (Offset > 0) {
- // Translate r0 = add sp, imm to
- // r0 = add sp, 255*4
- // r0 = add r0, (imm - 255*4)
- if (Opcode == ARM::tADDi3) {
- MI.setDesc(TII.get(Opcode));
- removeOperands(MI, FrameRegIdx);
- AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
- } else {
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
- }
- Offset = (Offset - Mask * Scale);
- MachineBasicBlock::iterator NII = std::next(II);
- emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
- *this);
- } else {
- // Translate r0 = add sp, -imm to
- // r0 = -imm (this is then translated into a series of instructions)
- // r0 = add r0, sp
- emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
-
- MI.setDesc(TII.get(ARM::tADDhirr));
- MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
- }
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
+ MBB.erase(II);
return true;
} else {
if (AddrMode != ARMII::AddrModeT1_s)
@@ -485,8 +390,11 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(
- MI.getParent()->getParent()->getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MI.getParent()
+ ->getParent()
+ ->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo());
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -512,7 +420,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// off the frame pointer (if, for example, there are alloca() calls in
// the function, the offset will be negative. Use R12 instead since that's
// a call clobbered register that we know won't be used in Thumb1 mode.
- const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
DebugLoc DL;
AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
.addReg(ARM::R12, RegState::Define)
@@ -559,7 +467,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
@@ -570,7 +478,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MF.getFrameInfo()->getStackSize() + SPAdj;
if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
"Unexpected");
// There are alloca()'s in this function, must reference off the frame
// pointer or base pointer instead.
@@ -587,7 +495,10 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// when !hasReservedCallFrame().
#ifndef NDEBUG
if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
- assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
+ assert(MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 0c0abbe..5feaf52 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB1REGISTERINFO_H
-#define THUMB1REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H
#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -60,4 +60,4 @@ public:
};
}
-#endif // THUMB1REGISTERINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index edb9ff3..fdcb522 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -188,7 +188,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
true/*isImp*/, false/*isKill*/));
MachineInstr *LastITMI = MI;
- MachineBasicBlock::iterator InsertPos = MIB;
+ MachineBasicBlock::iterator InsertPos = MIB.getInstr();
++MBBI;
// Form IT block.
@@ -255,8 +255,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
AFI = Fn.getInfo<ARMFunctionInfo>();
- TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
- TRI = TM.getRegisterInfo();
+ TII = static_cast<const Thumb2InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ TRI = TM.getSubtargetImpl()->getRegisterInfo();
restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
if (!AFI->isThumbFunction())
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index a9df006..91973e1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -209,6 +209,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
+void
+Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const {
+ if (RM == Reloc::PIC_)
+ expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM);
+ else
+ expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 34d45d3..46a1f6d 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB2INSTRUCTIONINFO_H
-#define THUMB2INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
#include "ARMBaseInstrInfo.h"
#include "Thumb2RegisterInfo.h"
@@ -61,6 +61,10 @@ public:
/// always be able to get register info as well (through this method).
///
const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; }
+
+private:
+ void expandLoadStackGuard(MachineBasicBlock::iterator MI,
+ Reloc::Model RM) const override;
};
/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
@@ -71,4 +75,4 @@ ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
}
-#endif // THUMB2INSTRUCTIONINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 782d81f..0d5d85a 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -40,7 +40,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 8a33e6c..1dd94cc 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef THUMB2REGISTERINFO_H
-#define THUMB2REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
+#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
#include "ARMBaseRegisterInfo.h"
@@ -35,4 +35,4 @@ public:
};
}
-#endif // THUMB2REGISTERINFO_H
+#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 09debe7..c51eb8b 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -335,7 +335,7 @@ static bool VerifyLowRegs(MachineInstr *MI) {
bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
Opc == ARM::t2LDMDB_UPD);
- bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+ bool isLROk = (Opc == ARM::t2STMDB_UPD);
bool isSPOk = isPCOk || isLROk;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -384,7 +384,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
if (MI->getOperand(1).getReg() == ARM::SP) {
Opc = Entry.NarrowOpc2;
ImmLimit = Entry.Imm2Limit;
- HasOffReg = false;
}
Scale = 4;
@@ -1003,7 +1002,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TII = static_cast<const Thumb2InstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
STI = &TM.getSubtarget<ARMSubtarget>();
// Optimizing / minimizing size?
diff --git a/lib/Target/Android.mk b/lib/Target/Android.mk
index 1b43ce4..4494eb0 100644
--- a/lib/Target/Android.mk
+++ b/lib/Target/Android.mk
@@ -3,7 +3,6 @@ LOCAL_PATH:= $(call my-dir)
target_SRC_FILES := \
Target.cpp \
TargetIntrinsicInfo.cpp \
- TargetJITInfo.cpp \
TargetLibraryInfo.cpp \
TargetLoweringObjectFile.cpp \
TargetMachineC.cpp \
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 06a74d7..c61805b 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,7 +1,6 @@
add_llvm_library(LLVMTarget
Target.cpp
TargetIntrinsicInfo.cpp
- TargetJITInfo.cpp
TargetLibraryInfo.cpp
TargetLoweringObjectFile.cpp
TargetMachine.cpp
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 673ade7..4bae7f8 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -11,29 +11,35 @@
//
//===----------------------------------------------------------------------===//
-#ifndef CPPTARGETMACHINE_H
-#define CPPTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_CPPBACKEND_CPPTARGETMACHINE_H
+#define LLVM_LIB_TARGET_CPPBACKEND_CPPTARGETMACHINE_H
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
namespace llvm {
class formatted_raw_ostream;
+class CPPSubtarget : public TargetSubtargetInfo {
+};
+
struct CPPTargetMachine : public TargetMachine {
CPPTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS, Options) {}
+ : TargetMachine(T, TT, CPU, FS, Options), Subtarget() {}
+private:
+ CPPSubtarget Subtarget;
+public:
+ const CPPSubtarget *getSubtargetImpl() const override { return &Subtarget; }
bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
CodeGenFileType FileType, bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) override;
-
- const DataLayout *getDataLayout() const override { return nullptr; }
};
extern Target TheCppBackendTarget;
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 81b0e56..af7914f 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -1,28 +1,32 @@
set(LLVM_TARGET_DEFINITIONS Hexagon.td)
-tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
-tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(HexagonCommonTableGen)
add_llvm_target(HexagonCodeGen
HexagonAsmPrinter.cpp
HexagonCallingConvLower.cpp
HexagonCFGOptimizer.cpp
+ HexagonCopyToCombine.cpp
HexagonExpandPredSpillCode.cpp
+ HexagonFixupHwLoops.cpp
HexagonFrameLowering.cpp
HexagonHardwareLoops.cpp
- HexagonFixupHwLoops.cpp
- HexagonMachineFunctionInfo.cpp
- HexagonMachineScheduler.cpp
- HexagonMCInstLower.cpp
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
HexagonISelLowering.cpp
+ HexagonMachineFunctionInfo.cpp
+ HexagonMachineScheduler.cpp
+ HexagonMCInstLower.cpp
+ HexagonNewValueJump.cpp
HexagonPeephole.cpp
HexagonRegisterInfo.cpp
HexagonRemoveSZExtArgs.cpp
@@ -33,11 +37,8 @@ add_llvm_target(HexagonCodeGen
HexagonTargetMachine.cpp
HexagonTargetObjectFile.cpp
HexagonVLIWPacketizer.cpp
- HexagonNewValueJump.cpp
- HexagonCopyToCombine.cpp
)
add_subdirectory(TargetInfo)
-add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
-
+add_subdirectory(Disassembler)
diff --git a/lib/Target/Hexagon/Disassembler/CMakeLists.txt b/lib/Target/Hexagon/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..755a45e
--- /dev/null
+++ b/lib/Target/Hexagon/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMHexagonDisassembler
+ HexagonDisassembler.cpp
+ )
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
new file mode 100644
index 0000000..bc64be1
--- /dev/null
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -0,0 +1,114 @@
+//===-- HexagonDisassembler.cpp - Disassembler for Hexagon ISA ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Endian.h"
+
+#include <vector>
+#include <array>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-disassembler"
+
+// Pull DecodeStatus and its enum values into the global namespace.
+typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// \brief Hexagon disassembler for all Hexagon platforms.
+class HexagonDisassembler : public MCDisassembler {
+public:
+ HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+};
+}
+
+static const uint16_t IntRegDecoderTable[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+ Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+ Hexagon::R30, Hexagon::R31 };
+
+static const uint16_t PredRegDecoderTable[] = { Hexagon::P0, Hexagon::P1,
+Hexagon::P2, Hexagon::P3 };
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ void const *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Register = IntRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ void const *Decoder) {
+ if (RegNo > 3)
+ return MCDisassembler::Fail;
+
+ unsigned Register = PredRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+#include "HexagonGenDisassemblerTables.inc"
+
+static MCDisassembler *createHexagonDisassembler(Target const &T,
+ MCSubtargetInfo const &STI,
+ MCContext &Ctx) {
+ return new HexagonDisassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializeHexagonDisassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheHexagonTarget,
+ createHexagonDisassembler);
+}
+
+DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ Size = 4;
+ if (Bytes.size() < 4)
+ return MCDisassembler::Fail;
+
+ uint32_t insn =
+ llvm::support::endian::read<uint32_t, llvm::support::little,
+ llvm::support::unaligned>(Bytes.data());
+
+ // Remove parse bits.
+ insn &= ~static_cast<uint32_t>(HexagonII::InstParseBits::INST_PARSE_MASK);
+ return decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
+}
diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt
index 59849aa..17ad11b 100644
--- a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/Hexagon/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;===-- ./lib/Target/Hexagon/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = HexagonAsmPrinter
+name = HexagonDisassembler
parent = Hexagon
-required_libraries = HexagonDesc MC Support
+required_libraries = HexagonInfo MCDisassembler Support
add_to_library_groups = Hexagon
diff --git a/lib/MC/MCAnalysis/Makefile b/lib/Target/Hexagon/Disassembler/Makefile
index add2dbd..e55fd58 100644
--- a/lib/MC/MCAnalysis/Makefile
+++ b/lib/Target/Hexagon/Disassembler/Makefile
@@ -1,14 +1,16 @@
-##===- lib/MC/MCAnalysys/Makefile --------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMMCAnalysis
-BUILD_ARCHIVE := 1
-
-include $(LEVEL)/Makefile.common
+##===-- lib/Target/Hexagon/Disassembler/Makefile -----------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonDisassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 5467ee3..64ae69c 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_Hexagon_H
-#define TARGET_Hexagon_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGON_H
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 2e011bd..9240282 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -18,7 +18,7 @@
#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "InstPrinter/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index 7fe8c57..5f4c162 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONASMPRINTER_H
-#define HEXAGONASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
#include "Hexagon.h"
#include "HexagonTargetMachine.h"
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index de340e0..8a4e02c 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -72,7 +72,7 @@ static bool IsUnconditionalJump(int Opc) {
void
HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
MachineBasicBlock* NewTarget) {
- const HexagonInstrInfo *QII = QTM.getInstrInfo();
+ const HexagonInstrInfo *QII = QTM.getSubtargetImpl()->getInstrInfo();
int NewOpcode = 0;
switch(MI->getOpcode()) {
case Hexagon::JMP_t:
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index f5f958c..8d78409 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
@@ -31,7 +32,8 @@ Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
// No stack is used.
StackOffset = 0;
- UsedRegs.resize((TM.getRegisterInfo()->getNumRegs()+31)/32);
+ UsedRegs.resize(
+ (TM.getSubtargetImpl()->getRegisterInfo()->getNumRegs() + 31) / 32);
}
// HandleByVal - Allocate a stack slot large enough to pass an argument by
@@ -55,7 +57,7 @@ void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void Hexagon_CCState::MarkAllocated(unsigned Reg) {
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
UsedRegs[*AI/32] |= 1 << (*AI&31);
}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
index 70b8b64..738ed1a 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.h
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
-#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONCALLINGCONVLOWER_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index aeff680..4e76698 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -417,8 +417,8 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
bool HasChanged = false;
// Get target info.
- TRI = MF.getTarget().getRegisterInfo();
- TII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
// Combine aggressively (for code size)
ShouldCombineAggressively =
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 3dafe80..8ef4c3a 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -72,7 +72,7 @@ char HexagonExpandPredSpillCode::ID = 0;
bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
- const HexagonInstrInfo *TII = QTM.getInstrInfo();
+ const HexagonInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -86,8 +86,10 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
if (Opc == Hexagon::STriw_pred) {
// STriw_pred [R30], ofst, SrcReg;
unsigned FP = MI->getOperand(0).getReg();
- assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
- "Not a Frame Pointer, Nor a Spill Slot");
+ assert(
+ FP ==
+ QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
assert(MI->getOperand(1).isImm() && "Not an offset");
int Offset = MI->getOperand(1).getImm();
int SrcReg = MI->getOperand(2).getReg();
@@ -98,7 +100,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::CONST32_Int_Real),
HEXAGON_RESERVED_REG_1).addImm(Offset);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
HEXAGON_RESERVED_REG_1)
.addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
@@ -133,8 +135,10 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
"Not a predicate register");
unsigned FP = MI->getOperand(1).getReg();
- assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
- "Not a Frame Pointer, Nor a Spill Slot");
+ assert(
+ FP ==
+ QTM.getSubtargetImpl()->getRegisterInfo()->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
assert(MI->getOperand(2).isImm() && "Not an offset");
int Offset = MI->getOperand(2).getImm();
if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
@@ -142,7 +146,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::CONST32_Int_Real),
HEXAGON_RESERVED_REG_1).addImm(Offset);
- BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_add),
HEXAGON_RESERVED_REG_1)
.addReg(FP)
.addReg(HEXAGON_RESERVED_REG_1);
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index d41939a..5f9b927 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -160,7 +160,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII,
RegScavenger &RS) {
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineBasicBlock *MBB = MII->getParent();
DebugLoc DL = MII->getDebugLoc();
unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 21df12f..356f279 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -50,7 +50,10 @@ void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
unsigned FrameSize = MFI->getStackSize();
// Get the alignments provided by the target.
- unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
@@ -77,8 +80,8 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
- const HexagonRegisterInfo *QRI =
- static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
determineFrameLayout(MF);
@@ -115,7 +118,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
// Check for overflow.
// Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
const int ALLOCFRAME_MAX = 16384;
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
if (NumBytes >= ALLOCFRAME_MAX) {
// Emit allocframe(#0).
@@ -154,12 +157,12 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = std::prev(MBB.end());
MachineBasicBlock::iterator MBBI_end = MBB.end();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Handle EH_RETURN.
if (MBBI->getOpcode() == Hexagon::EH_RETURN_JMPR) {
assert(MBBI->getOperand(0).isReg() && "Offset should be in register!");
BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME));
- BuildMI(MBB, MBBI, dl, TII.get(Hexagon::ADD_rr),
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::A2_add),
Hexagon::R29).addReg(Hexagon::R29).addReg(Hexagon::R28);
return;
}
@@ -225,7 +228,7 @@ HexagonFrameLowering::spillCalleeSavedRegisters(
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
if (CSI.empty()) {
return false;
@@ -280,7 +283,7 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
if (CSI.empty()) {
return false;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 2d4b0b9..2d6b457 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGON_FRAMEINFO_H
-#define HEXAGON_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
#include "Hexagon.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 7f76421..e2062a3 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -220,7 +220,7 @@ namespace {
int HexagonHardwareLoops::Counter = 0;
#endif
- /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
+ /// \brief Abstraction for a trip count of a loop. A smaller version
/// of the MachineOperand class without the concerns of changing the
/// operand representation.
class CountValue {
@@ -266,7 +266,8 @@ namespace {
}
void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const {
- const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr;
+ const TargetRegisterInfo *TRI =
+ TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
if (isImm()) { OS << Contents.ImmVal; }
}
@@ -302,8 +303,10 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
- TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
- TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
+ TII = static_cast<const HexagonInstrInfo *>(
+ TM->getSubtargetImpl()->getInstrInfo());
+ TRI = static_cast<const HexagonRegisterInfo *>(
+ TM->getSubtargetImpl()->getRegisterInfo());
for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
I != E; ++I) {
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index dabe650..dc58c42 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -446,8 +446,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (TII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
@@ -513,8 +513,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (TII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
@@ -591,8 +591,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
// Figure out the opcode.
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (LoadedVT == MVT::i64) {
if (TII->isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::POST_LDrid;
@@ -701,8 +701,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
// Offset value must be within representable range
// and must have correct alignment properties.
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
if (TII->isValidAutoIncImm(StoredVT, Val)) {
SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
Chain};
@@ -1218,10 +1218,10 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
// as at least one of the operands.
if (IntrinsicWithPred) {
SmallVector<SDValue, 8> Ops;
- const HexagonInstrInfo *TII =
- static_cast<const HexagonInstrInfo*>(TM.getInstrInfo());
+ const HexagonInstrInfo *TII = static_cast<const HexagonInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
// Iterate over all the operands of the intrinsics.
// For PredRegs, do the transfer.
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index a460ea4..7646088 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -51,9 +51,9 @@ class HexagonCCState : public CCState {
public:
HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
- LLVMContext &C, int NumNamedVarArgParams)
- : CCState(CC, isVarArg, MF, TM, locs, C),
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ int NumNamedVarArgParams)
+ : CCState(CC, isVarArg, MF, locs, C),
NumNamedVarArgParams(NumNamedVarArgParams) {}
int getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
@@ -322,8 +322,8 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values of ISD::RET
CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
@@ -372,8 +372,8 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
@@ -427,9 +427,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext(),
- NumNamedVarArgParams);
+ HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext(), NumNamedVarArgParams);
if (NumNamedVarArgParams > 0)
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
@@ -464,7 +463,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains;
const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- DAG.getTarget().getRegisterInfo());
+ DAG.getSubtarget().getRegisterInfo());
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy());
@@ -723,7 +722,7 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
// Check it to be lr
const HexagonRegisterInfo *QRI =
static_cast<const HexagonRegisterInfo *>(
- DAG.getTarget().getRegisterInfo());
+ DAG.getSubtarget().getRegisterInfo());
if (Reg == QRI->getRARegister()) {
FuncInfo->setHasClobberLR(true);
break;
@@ -817,7 +816,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// The Sub result contains the new stack start address, so it
// must be placed in the stack pointer register.
const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
- DAG.getTarget().getRegisterInfo());
+ DAG.getSubtarget().getRegisterInfo());
SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub);
SDValue Ops[2] = { ArgAdjust, CopyChain };
@@ -843,8 +842,8 @@ const {
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
@@ -964,7 +963,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
@@ -990,8 +989,8 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- const HexagonRegisterInfo *TRI =
- static_cast<const HexagonRegisterInfo *>(DAG.getTarget().getRegisterInfo());
+ const HexagonRegisterInfo *TRI = static_cast<const HexagonRegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
@@ -1044,7 +1043,7 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
//===----------------------------------------------------------------------===//
HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
- : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ : TargetLowering(targetmachine),
TM(targetmachine) {
const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>();
@@ -1453,8 +1452,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
setMinFunctionAlignment(2);
// Needed for DYNAMIC_STACKALLOC expansion.
- const HexagonRegisterInfo *QRI =
- static_cast<const HexagonRegisterInfo *>(TM.getRegisterInfo());
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
setStackPointerRegisterToSaveRestore(QRI->getStackRegister());
setSchedulingPreference(Sched::VLIW);
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index ec16cc8..63e4392 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef Hexagon_ISELLOWERING_H
-#define Hexagon_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
#include "Hexagon.h"
#include "llvm/CodeGen/CallingConvLower.h"
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 1057343..cc27c4c 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -92,12 +92,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let AsmString = asmstr;
let Pattern = pattern;
let Constraints = cstr;
- let Itinerary = itin;
- let Size = 4;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ // Instruction type according to the ISA.
IType Type = type;
let TSFlags{4-0} = Type.Value;
@@ -186,6 +192,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
"");
let PNewValue = !if(isPredicatedNew, "new", "");
let NValueST = !if(isNVStore, "true", "false");
+ let isCodeGenOnly = 1;
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 1c95e06..1688c4a 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1295,16 +1295,24 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
switch (MI->getOpcode())
{
default: return false;
+ case Hexagon::A2_paddf:
+ case Hexagon::A2_paddfnew:
+ case Hexagon::A2_paddt:
+ case Hexagon::A2_paddtnew:
+ case Hexagon::A2_pandf:
+ case Hexagon::A2_pandfnew:
+ case Hexagon::A2_pandt:
+ case Hexagon::A2_pandtnew:
+ case Hexagon::A2_porf:
+ case Hexagon::A2_porfnew:
+ case Hexagon::A2_port:
+ case Hexagon::A2_portnew:
+ case Hexagon::A2_pxorf:
+ case Hexagon::A2_pxorfnew:
+ case Hexagon::A2_pxort:
+ case Hexagon::A2_pxortnew:
case Hexagon::ADD_ri_cPt:
case Hexagon::ADD_ri_cNotPt:
- case Hexagon::ADD_rr_cPt:
- case Hexagon::ADD_rr_cNotPt:
- case Hexagon::XOR_rr_cPt:
- case Hexagon::XOR_rr_cNotPt:
- case Hexagon::AND_rr_cPt:
- case Hexagon::AND_rr_cNotPt:
- case Hexagon::OR_rr_cPt:
- case Hexagon::OR_rr_cNotPt:
case Hexagon::SUB_rr_cPt:
case Hexagon::SUB_rr_cNotPt:
case Hexagon::COMBINE_rr_cPt:
@@ -1636,11 +1644,10 @@ void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
}
-DFAPacketizer *HexagonInstrInfo::
-CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- const InstrItineraryData *II = TM->getInstrItineraryData();
- return TM->getSubtarget<HexagonGenSubtargetInfo>().createDFAPacketizer(II);
+DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
+ const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const HexagonSubtarget &>(STI).createDFAPacketizer(II);
}
bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
@@ -1765,7 +1772,7 @@ int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return -1 << (bits - 1);
+ return -1U << (bits - 1);
else
return 0;
}
@@ -1779,9 +1786,9 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return ~(-1 << (bits - 1));
+ return ~(-1U << (bits - 1));
else
- return ~(-1 << bits);
+ return ~(-1U << bits);
}
// Returns true if an instruction can be converted into a non-extended
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 6b032c9..6acfbec 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonINSTRUCTIONINFO_H
-#define HexagonINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
@@ -148,9 +148,8 @@ public:
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
const BranchProbability &Probability) const override;
- DFAPacketizer*
- CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const override;
+ DFAPacketizer *
+ CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
bool isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 4dcf101..4090681 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -92,6 +92,92 @@ def HexagonWrapperCombineII :
def HexagonWrapperCombineRR :
SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
+class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
+ bit IsComm>
+ : ALU32_rr<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredRel {
+ let isCommutable = IsComm;
+ let BaseOpcode = mnemonic#_rr;
+ let CextOpcode = mnemonic;
+
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1111;
+ let Inst{27} = 0b0;
+ let Inst{26-24} = MajOp;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = !if(OpsRev,Rt,Rs);
+ let Inst{12-8} = !if(OpsRev,Rs,Rt);
+ let Inst{4-0} = Rd;
+}
+
+let hasSideEffects = 0, hasNewValue = 1 in
+class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev, bit PredNot, bit PredNew>
+ : ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
+ "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") "#
+ "$Rd = "#mnemonic#"($Rs, $Rt)",
+ [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
+ let isPredicated = 1;
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = PredNew;
+ let BaseOpcode = mnemonic#_rr;
+ let CextOpcode = mnemonic;
+
+ bits<2> Pu;
+ bits<5> Rs;
+ bits<5> Rt;
+ bits<5> Rd;
+
+ let IClass = 0b1111;
+ let Inst{27} = 0b1;
+ let Inst{26-24} = MajOp;
+ let Inst{23-21} = MinOp;
+ let Inst{20-16} = !if(OpsRev,Rt,Rs);
+ let Inst{13} = PredNew;
+ let Inst{12-8} = !if(OpsRev,Rs,Rt);
+ let Inst{7} = PredNot;
+ let Inst{6-5} = Pu;
+ let Inst{4-0} = Rd;
+}
+
+multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev> {
+ def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
+ def f : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 0>;
+ def tnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 1>;
+ def fnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 1>;
+}
+
+multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
+ bit OpsRev, bit IsComm> {
+ let isPredicable = 1 in
+ def A2_#NAME : T_ALU32_3op <mnemonic, MajOp, MinOp, OpsRev, IsComm>;
+ defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
+}
+
+let isCodeGenOnly = 0 in
+defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
+defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
+defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>;
+defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
+defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
+
+// Pats for instruction selection.
+class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
+ : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: BinOp32_pat<add, A2_add, i32>;
+def: BinOp32_pat<and, A2_and, i32>;
+def: BinOp32_pat<or, A2_or, i32>;
+def: BinOp32_pat<sub, A2_sub, i32>;
+def: BinOp32_pat<xor, A2_xor, i32>;
+
multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
let isPredicatedNew = isPredNew in
@@ -127,13 +213,6 @@ multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
}
}
-let isCommutable = 1 in {
- defm ADD_rr : ALU32_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
- defm AND_rr : ALU32_base<"and", "AND", and>, ImmRegRel, PredNewRel;
- defm XOR_rr : ALU32_base<"xor", "XOR", xor>, ImmRegRel, PredNewRel;
- defm OR_rr : ALU32_base<"or", "OR", or>, ImmRegRel, PredNewRel;
-}
-
defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
// Combines the two integer registers SRC1 and SRC2 into a double register.
@@ -225,7 +304,7 @@ def AND_ri : ALU32_ri<(outs IntRegs:$dst),
s10ExtPred:$src2))]>, ImmRegRel;
// Nop.
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, isCodeGenOnly = 0 in
def NOP : ALU32_rr<(outs), (ins),
"nop",
[]>;
@@ -753,7 +832,7 @@ def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
let InputType = "imm", isBarrier = 1, isPredicable = 1,
Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
-opExtentBits = 24 in
+opExtentBits = 24, isCodeGenOnly = 0 in
class T_JMP <dag InsDag, list<dag> JumpList = []>
: JInst<(outs), InsDag,
"jump $dst" , JumpList> {
@@ -2212,7 +2291,7 @@ def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
let AddedComplexity = 10 in
def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
- (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
+ (i32 (A2_and (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index db5b7ea..d39f7d7 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -2130,6 +2130,42 @@ let Predicates = [HasV4T, UseMEMOP] in {
// incorrect code for negative numbers.
// Pd=cmpb.eq(Rs,#u8)
+let isCompare = 1, isExtendable = 1, opExtendable = 2, hasSideEffects = 0,
+ validSubTargets = HasV4SubT in
+class CMP_NOT_REG_IMM<string OpName, bits<2> op, Operand ImmOp,
+ list<dag> Pattern>
+ : ALU32Inst <(outs PredRegs:$dst), (ins IntRegs:$src1, ImmOp:$src2),
+ "$dst = !cmp."#OpName#"($src1, #$src2)",
+ Pattern,
+ "", ALU32_2op_tc_2early_SLOT0123> {
+ bits<2> dst;
+ bits<5> src1;
+ bits<10> src2;
+
+ let IClass = 0b0111;
+ let Inst{27-24} = 0b0101;
+ let Inst{23-22} = op;
+ let Inst{20-16} = src1;
+ let Inst{21} = !if (!eq(OpName, "gtu"), 0b0, src2{9});
+ let Inst{13-5} = src2{8-0};
+ let Inst{4-2} = 0b100;
+ let Inst{1-0} = dst;
+}
+
+let opExtentBits = 10, isExtentSigned = 1 in {
+def C4_cmpneqi : CMP_NOT_REG_IMM <"eq", 0b00, s10Ext, [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), s10ExtPred:$src2))]>;
+
+def C4_cmpltei : CMP_NOT_REG_IMM <"gt", 0b01, s10Ext, [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), s10ExtPred:$src2)))]>;
+
+}
+let opExtentBits = 9 in
+def C4_cmplteui : CMP_NOT_REG_IMM <"gtu", 0b10, u9Ext, [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)))]>;
+
+
+
// p=!cmp.eq(r1,r2)
let isCompare = 1, validSubTargets = HasV4SubT in
def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
@@ -2139,15 +2175,6 @@ def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
(setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
Requires<[HasV4T]>;
-// p=!cmp.eq(r1,#s10)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s10Ext:$src2),
- "$dst = !cmp.eq($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>,
- Requires<[HasV4T]>;
-
// p=!cmp.gt(r1,r2)
let isCompare = 1, validSubTargets = HasV4SubT in
def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
@@ -2157,14 +2184,6 @@ def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
(not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
Requires<[HasV4T]>;
-// p=!cmp.gt(r1,#s10)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s10Ext:$src2),
- "$dst = !cmp.gt($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>,
- Requires<[HasV4T]>;
// p=!cmp.gtu(r1,r2)
let isCompare = 1, validSubTargets = HasV4SubT in
@@ -2175,15 +2194,6 @@ def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
(not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
Requires<[HasV4T]>;
-// p=!cmp.gtu(r1,#u9)
-let isCompare = 1, validSubTargets = HasV4SubT in
-def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u9Ext:$src2),
- "$dst = !cmp.gtu($src1, #$src2)",
- [(set (i1 PredRegs:$dst),
- (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>,
- Requires<[HasV4T]>;
-
let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, u8Imm:$src2),
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index 99f59d5..b3385d8 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1843,6 +1843,11 @@ class si_MInst_didi<string opc, Intrinsic IntID>
!strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
[(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class T_RI_pat <InstHexagon MI, Intrinsic IntID>
+ : Pat<(IntID (i32 IntRegs:$Rs), imm:$It),
+ (MI IntRegs:$Rs, imm:$It)>;
+
//
// LDInst classes.
//
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
index dd28ebb..77b148b 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -217,12 +217,13 @@ def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>;
// ALU32 / PRED / Conditional Sign Extend.
// ALU32 / PRED / Conditional Zero Extend.
// ALU32 / PRED / Compare.
-def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>;
-def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
-def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>;
-def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+def: T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi>;
+def: T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei>;
+def: T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui>;
// ALU32 / PRED / cmpare To General Register.
def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index d799bdb..cb18df6 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonMACHINEFUNCTIONINFO_H
-#define HexagonMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 6fcaa20..97c626f 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -145,7 +145,7 @@ void VLIWMachineScheduler::schedule() {
<< "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
<< " " << BB->getName()
<< " in_func " << BB->getParent()->getFunction()->getName()
- << " at loop depth " << MLI.getLoopDepth(BB)
+ << " at loop depth " << MLI->getLoopDepth(BB)
<< " \n");
buildDAGWithRegPressure();
@@ -208,8 +208,12 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
const TargetMachine &TM = DAG->MF.getTarget();
delete Top.HazardRec;
delete Bot.HazardRec;
- Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
- Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Top.HazardRec =
+ TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ Bot.HazardRec =
+ TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
delete Top.ResourceModel;
delete Bot.ResourceModel;
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 8c41086..1e023c3 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONASMPRINTER_H
-#define HEXAGONASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -56,7 +56,9 @@ class VLIWResourceModel {
public:
VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
SchedModel(SM), TotalPackets(0) {
- ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM, nullptr);
+ ResourcesModel =
+ TM.getSubtargetImpl()->getInstrInfo()->CreateTargetScheduleState(
+ *TM.getSubtargetImpl());
// This hard requirement could be relaxed,
// but for now do not let it proceed.
@@ -99,7 +101,7 @@ public:
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
/// time to do some work.
- virtual void schedule() override;
+ void schedule() override;
/// Perform platform-specific DAG postprocessing.
void postprocessDAG();
};
@@ -207,15 +209,15 @@ public:
: DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"),
Bot(BotQID, "BotQ") {}
- virtual void initialize(ScheduleDAGMI *dag) override;
+ void initialize(ScheduleDAGMI *dag) override;
- virtual SUnit *pickNode(bool &IsTopNode) override;
+ SUnit *pickNode(bool &IsTopNode) override;
- virtual void schedNode(SUnit *SU, bool IsTopNode) override;
+ void schedNode(SUnit *SU, bool IsTopNode) override;
- virtual void releaseTopNode(SUnit *SU) override;
+ void releaseTopNode(SUnit *SU) override;
- virtual void releaseBottomNode(SUnit *SU) override;
+ void releaseBottomNode(SUnit *SU) override;
unsigned ReportPackets() {
return Top.ResourceModel->getTotalPackets() +
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index b7c03a7..782c979 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -362,9 +362,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
LiveVariables &LVs = getAnalysis<LiveVariables>();
#endif
- QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
- QRI =
- static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ QRI = static_cast<const HexagonRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
if (!QRI->Subtarget.hasV4TOps() ||
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 48b6159..8912152 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -111,10 +111,8 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
false, false)
bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
- QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
- getInstrInfo());
- QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
- getRegisterInfo());
+ QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ QRI = MF.getTarget().getSubtarget<HexagonSubtarget>().getRegisterInfo();
MRI = &MF.getRegInfo();
DenseMap<unsigned, unsigned> PeepholeMap;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index fb466d3..2b6741c 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -128,12 +128,12 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Addressable stack objects are accessed using neg. offsets from %fp.
MachineFunction &MF = *MI.getParent()->getParent();
const HexagonInstrInfo &TII =
- *static_cast<const HexagonInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned FrameReg = getFrameRegister(MF);
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (!TFI->hasFP(MF)) {
// We will not reserve space on the stack for the lr and fp registers.
Offset -= 2 * Hexagon_WordSize;
@@ -176,7 +176,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
+ TII.get(Hexagon::A2_add),
dstReg).addReg(FrameReg).addReg(dstReg);
} else {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
@@ -205,7 +205,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
+ TII.get(Hexagon::A2_add),
resReg).addReg(FrameReg).addReg(resReg);
} else {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
@@ -237,7 +237,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg).
+ TII.get(Hexagon::A2_add), ResReg).addReg(FrameReg).
addReg(ResReg);
MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
true);
@@ -256,7 +256,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
+ TII.get(Hexagon::A2_add),
dstReg).addReg(FrameReg).addReg(dstReg);
// Can we delete MI??? r2 = add (r2, #0).
MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
@@ -278,7 +278,7 @@ unsigned HexagonRegisterInfo::getRARegister() const {
unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
&MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
return Hexagon::R30;
}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 648b4af..a83b502 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonREGISTERINFO_H
-#define HexagonREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONREGISTERINFO_H
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index 8ea1b7e..9750984 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -13,46 +13,48 @@
let Namespace = "Hexagon" in {
- class HexagonReg<string n> : Register<n> {
+ class HexagonReg<bits<5> num, string n> : Register<n> {
field bits<5> Num;
+ let HWEncoding{4-0} = num;
}
- class HexagonDoubleReg<string n, list<Register> subregs> :
+ class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs> :
RegisterWithSubRegs<n, subregs> {
field bits<5> Num;
+ let HWEncoding{4-0} = num;
}
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers.
- class Ri<bits<5> num, string n> : HexagonReg<n> {
+ class Ri<bits<5> num, string n> : HexagonReg<num, n> {
let Num = num;
}
// Rf - 32-bit floating-point registers.
- class Rf<bits<5> num, string n> : HexagonReg<n> {
+ class Rf<bits<5> num, string n> : HexagonReg<num, n> {
let Num = num;
}
// Rd - 64-bit registers.
class Rd<bits<5> num, string n, list<Register> subregs> :
- HexagonDoubleReg<n, subregs> {
+ HexagonDoubleReg<num, n, subregs> {
let Num = num;
let SubRegs = subregs;
}
// Rp - predicate registers
- class Rp<bits<5> num, string n> : HexagonReg<n> {
+ class Rp<bits<5> num, string n> : HexagonReg<num, n> {
let Num = num;
}
// Rc - control registers
- class Rc<bits<5> num, string n> : HexagonReg<n> {
+ class Rc<bits<5> num, string n> : HexagonReg<num, n> {
let Num = num;
}
// Rj - aliased integer registers
- class Rj<string n, Ri R>: HexagonReg<n> {
+ class Rj<string n, Ri R>: HexagonReg<R.Num, n> {
let Num = R.Num;
let Aliases = [R];
}
@@ -61,38 +63,9 @@ let Namespace = "Hexagon" in {
def subreg_hireg : SubRegIndex<32, 32>;
// Integer registers.
- def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
- def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
- def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
- def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
- def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
- def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
- def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
- def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
- def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
- def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
- def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
- def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
- def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
- def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
- def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
- def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
- def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
- def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
- def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
- def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
- def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
- def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
- def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
- def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
- def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
- def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
- def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
- def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
- def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
- def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
- def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
- def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
+ foreach I = 0-31 in {
+ def R#I : Ri<I, "r"#I>, DwarfRegNum<[I]>;
+ }
def SP : Rj<"sp", R29>, DwarfRegNum<[29]>;
def FP : Rj<"fp", R30>, DwarfRegNum<[30]>;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index b40b303..8ac2e43 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonSELECTIONDAGINFO_H
-#define HexagonSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 247207f..8fdd493 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -68,12 +68,13 @@ char HexagonSplitConst32AndConst64::ID = 0;
bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
const HexagonTargetObjectFile &TLOF =
- (const HexagonTargetObjectFile &)
- QTM.getTargetLowering()->getObjFileLowering();
+ (const HexagonTargetObjectFile &)QTM.getSubtargetImpl()
+ ->getTargetLowering()
+ ->getObjFileLowering();
if (TLOF.IsSmallDataEnabled())
return true;
- const TargetInstrInfo *TII = QTM.getInstrInfo();
+ const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -138,10 +139,10 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
else if (Opc == Hexagon::CONST64_Int_Real) {
int DestReg = MI->getOperand(0).getReg();
int64_t ImmValue = MI->getOperand(1).getImm ();
- unsigned DestLo =
- QTM.getRegisterInfo()->getSubReg (DestReg, Hexagon::subreg_loreg);
- unsigned DestHi =
- QTM.getRegisterInfo()->getSubReg (DestReg, Hexagon::subreg_hireg);
+ unsigned DestLo = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
+ DestReg, Hexagon::subreg_loreg);
+ unsigned DestHi = QTM.getSubtargetImpl()->getRegisterInfo()->getSubReg(
+ DestReg, Hexagon::subreg_hireg);
int32_t LowWord = (ImmValue & 0xFFFFFFFF);
int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index 9601090..1052b80 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -80,7 +80,7 @@ char HexagonSplitTFRCondSets::ID = 0;
bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = QTM.getInstrInfo();
+ const TargetInstrInfo *TII = QTM.getSubtargetImpl()->getInstrInfo();
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index b184e62..10776ae 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef Hexagon_SUBTARGET_H
-#define Hexagon_SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H
#include "HexagonFrameLowering.h"
#include "HexagonInstrInfo.h"
@@ -56,19 +56,25 @@ public:
HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
const TargetMachine &TM);
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- const HexagonInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const HexagonRegisterInfo *getRegisterInfo() const {
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+ const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const HexagonRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const HexagonTargetLowering *getTargetLowering() const { return &TLInfo; }
- const HexagonFrameLowering *getFrameLowering() const {
+ const HexagonTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const HexagonFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- const HexagonSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
+ const HexagonSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
StringRef FS);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 7831410..cd18dfb 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -70,10 +70,13 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<HexagonTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+HexagonTargetMachine::~HexagonTargetMachine() {}
+
namespace {
/// Hexagon Code Generator Pass Configuration Options.
class HexagonPassConfig : public TargetPassConfig {
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index d88178e..4a9f447 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonTARGETMACHINE_H
-#define HexagonTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETMACHINE_H
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
@@ -23,6 +23,7 @@ namespace llvm {
class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
HexagonSubtarget Subtarget;
public:
@@ -30,34 +31,18 @@ public:
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~HexagonTargetMachine() override;
- const HexagonInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
const HexagonSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const HexagonRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const InstrItineraryData* getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
- const HexagonTargetLowering* getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const HexagonFrameLowering* getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const HexagonSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
static unsigned getModuleMatchQuality(const Module &M);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
extern bool flag_aligned_memcpy;
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index c97526e..f4ab5e2 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -31,7 +31,7 @@ static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
-
+ InitializeELF(TM.Options.UseInitArray);
SmallDataSection =
getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
@@ -79,7 +79,8 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty));
}
return false;
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index 1bd1272..c974204 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonTARGETOBJECTFILE_H
-#define HexagonTARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCSectionELF.h"
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 87ce960..e7296d6 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -118,7 +118,6 @@ namespace {
public:
// Ctor.
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT,
const MachineBranchProbabilityInfo *MBPI);
// initPacketizerState - initialize some internal flags.
@@ -146,23 +145,23 @@ namespace {
bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC);
- bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
+ bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC);
- bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII);
- bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit);
- bool DemoteToDotOld(MachineInstr* MI);
- bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
- std::map <MachineInstr*, SUnit*> MIToSUnit);
- bool RestrictingDepExistInPacket(MachineInstr*,
- unsigned, std::map <MachineInstr*, SUnit*>);
+ const TargetRegisterClass *RC);
+ bool
+ CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
+ MachineBasicBlock::iterator &MII);
+ bool CanPromoteToNewValueStore(
+ MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit);
+ bool DemoteToDotOld(MachineInstr *MI);
+ bool ArePredicatesComplements(
+ MachineInstr *MI1, MachineInstr *MI2,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit);
+ bool RestrictingDepExistInPacket(MachineInstr *, unsigned,
+ const std::map<MachineInstr *, SUnit *> &);
bool isNewifiable(MachineInstr* MI);
bool isCondInst(MachineInstr* MI);
bool tryAllocateResourcesForConstExt(MachineInstr* MI);
@@ -184,20 +183,19 @@ INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
// HexagonPacketizerList Ctor.
HexagonPacketizerList::HexagonPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT,
- const MachineBranchProbabilityInfo *MBPI)
- : VLIWPacketizerList(MF, MLI, MDT, true){
+ MachineFunction &MF, MachineLoopInfo &MLI,
+ const MachineBranchProbabilityInfo *MBPI)
+ : VLIWPacketizerList(MF, MLI, true) {
this->MBPI = MBPI;
}
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
const MachineBranchProbabilityInfo *MBPI =
&getAnalysis<MachineBranchProbabilityInfo>();
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI);
+ HexagonPacketizerList Packetizer(Fn, MLI, MBPI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -324,8 +322,8 @@ bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
unsigned DepReg) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
// Check for lr dependence
if (DepReg == QRI->getRARegister()) {
@@ -536,9 +534,9 @@ static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
// if there is a new value store in the packet. Corollary, if there is
// already a store in a packet, there can not be a new value store.
// Arch Spec: 3.4.4.2
-bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
- MachineInstr *PacketMI, unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit) {
+bool HexagonPacketizerList::CanPromoteToNewValueStore(
+ MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Make sure we are looking at the store, that can be promoted.
if (!QII->mayBeNewStore(MI))
@@ -549,8 +547,8 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
GetStoreValueOperand(MI).getReg() != DepReg)
return false;
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
const MCInstrDesc& MCID = PacketMI->getDesc();
// first operand is always the result
@@ -561,7 +559,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end();
(VI != VE); ++VI) {
- SUnit* PacketSU = MIToSUnit[*VI];
+ SUnit *PacketSU = MIToSUnit.find(*VI)->second;
if (PacketSU->getInstr()->getDesc().mayStore() ||
// if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
// then we don't need this
@@ -661,7 +659,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
(VI != VE); ++VI) {
- SUnit* TempSU = MIToSUnit[*VI];
+ SUnit *TempSU = MIToSUnit.find(*VI)->second;
MachineInstr* TempMI = TempSU->getInstr();
// Following condition is true for all the instructions until PacketMI is
@@ -717,15 +715,14 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
// can this MI to promoted to either
// new value store or new value jump
-bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
- SUnit *PacketSU, unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII)
-{
+bool HexagonPacketizerList::CanPromoteToNewValue(
+ MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
+ MachineBasicBlock::iterator &MII) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
if (!QRI->Subtarget.hasV4TOps() ||
!QII->mayBeNewStore(MI))
return false;
@@ -746,12 +743,10 @@ bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
// 1. dot new on predicate - V2/V3/V4
// 2. dot new on stores NV/ST - V4
// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
-bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
- SUnit *PacketSU, unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC )
-{
+bool HexagonPacketizerList::CanPromoteToDotNew(
+ MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit,
+ MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Already a dot new instruction.
if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI))
@@ -803,12 +798,12 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
// The P3 from a) and d) will be complements after
// a)'s P3 is converted to .new form
// Anti Dep between c) and b) is irrelevant for this case
-bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit) {
+bool HexagonPacketizerList::RestrictingDepExistInPacket(
+ MachineInstr *MI, unsigned DepReg,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- SUnit* PacketSUDep = MIToSUnit[MI];
+ SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
@@ -817,7 +812,7 @@ bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
if(!QII->isPredicated(*VIN)) continue;
// Scheduling Unit for current insn in the packet
- SUnit* PacketSU = MIToSUnit[*VIN];
+ SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
// Look at dependencies between current members of the packet
// and predicate defining instruction MI.
@@ -861,8 +856,9 @@ static unsigned getPredicatedRegister(MachineInstr *MI,
// Given two predicated instructions, this function detects whether
// the predicates are complements
-bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
- MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
+bool HexagonPacketizerList::ArePredicatesComplements(
+ MachineInstr *MI1, MachineInstr *MI2,
+ const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
@@ -873,7 +869,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
return false;
// Scheduling unit for candidate
- SUnit* SU = MIToSUnit[MI1];
+ SUnit *SU = MIToSUnit.find(MI1)->second;
// One corner case deals with the following scenario:
// Trying to add
@@ -898,7 +894,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
// Scheduling Unit for current insn in the packet
- SUnit* PacketSU = MIToSUnit[*VIN];
+ SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
// If this instruction in the packet is succeeded by the candidate...
if (PacketSU->isSucc(SU)) {
@@ -1007,8 +1003,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineBasicBlock::iterator II = I;
const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
- const HexagonRegisterInfo* QRI =
- (const HexagonRegisterInfo *) TM.getRegisterInfo();
+ const HexagonRegisterInfo *QRI =
+ (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
// Inline asm cannot go in the packet.
@@ -1103,7 +1099,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end();
(VI != VE && maintainNewValueJump); ++VI) {
- SUnit* PacketSU = MIToSUnit[*VI];
+ SUnit *PacketSU = MIToSUnit.find(*VI)->second;
// NVJ can not be part of the dual jump - Arch Spec: section 7.8
if (PacketSU->getInstr()->getDesc().isCall()) {
@@ -1278,9 +1274,9 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
}
// For V4, special case ALLOCFRAME. Even though there is dependency
- // between ALLOCAFRAME and subsequent store, allow it to be
+ // between ALLOCFRAME and subsequent store, allow it to be
// packetized in a same packet. This implies that the store is using
- // caller's SP. Hense, offset needs to be updated accordingly.
+ // caller's SP. Hence, offset needs to be updated accordingly.
else if (DepType == SDep::Data
&& QRI->Subtarget.hasV4TOps()
&& J->getOpcode() == Hexagon::ALLOCFRAME
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
index 668ca98..edbe29a 100644
--- a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
+++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -74,10 +74,14 @@ static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
}
const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
- unsigned Alignment =
- State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned Alignment = State.getTarget()
+ .getSubtargetImpl()
+ ->getDataLayout()
+ ->getABITypeAlignment(ArgTy);
unsigned Size =
- State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8;
+ State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
+ ArgTy) /
+ 8;
// If it's passed by value, then we need the size of the aggregate not of
// the pointer.
@@ -129,10 +133,14 @@ static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
}
const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
- unsigned Alignment =
- State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned Alignment = State.getTarget()
+ .getSubtargetImpl()
+ ->getDataLayout()
+ ->getABITypeAlignment(ArgTy);
unsigned Size =
- State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8;
+ State.getTarget().getSubtargetImpl()->getDataLayout()->getTypeSizeInBits(
+ ArgTy) /
+ 8;
unsigned Offset3 = State.AllocateStack(Size, Alignment);
State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
diff --git a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
deleted file mode 100644
index 1ddaf9b..0000000
--- a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_library(LLVMHexagonAsmPrinter
- HexagonInstPrinter.cpp
- )
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
index 0cf9a06..6ffd26a 100644
--- a/lib/Target/Hexagon/LLVMBuild.txt
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = InstPrinter MCTargetDesc TargetInfo
+subdirectories = Disassembler MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
@@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library
name = HexagonCodeGen
parent = Hexagon
-required_libraries = Analysis AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC Scalar SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core HexagonDesc HexagonInfo MC SelectionDAG Support Target
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index eeef3ef..2a6124e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -1,5 +1,11 @@
add_llvm_library(LLVMHexagonDesc
+ HexagonAsmBackend.cpp
+ HexagonELFObjectWriter.cpp
+ HexagonInstPrinter.cpp
HexagonMCAsmInfo.cpp
+ HexagonMCCodeEmitter.cpp
HexagonMCInst.cpp
HexagonMCTargetDesc.cpp
)
+
+add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
new file mode 100644
index 0000000..bdccf88
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -0,0 +1,74 @@
+//===-- HexagonAsmBackend.cpp - Hexagon Assembler Backend -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonAsmBackend : public MCAsmBackend {
+public:
+ HexagonAsmBackend(Target const & /*T*/) {}
+
+ unsigned getNumFixupKinds() const override { return 0; }
+
+ void applyFixup(MCFixup const & /*Fixup*/, char * /*Data*/,
+ unsigned /*DataSize*/, uint64_t /*Value*/,
+ bool /*IsPCRel*/) const override {
+ return;
+ }
+
+ bool mayNeedRelaxation(MCInst const & /*Inst*/) const override {
+ return false;
+ }
+
+ bool fixupNeedsRelaxation(MCFixup const & /*Fixup*/, uint64_t /*Value*/,
+ MCRelaxableFragment const * /*DF*/,
+ MCAsmLayout const & /*Layout*/) const override {
+ llvm_unreachable("fixupNeedsRelaxation() unimplemented");
+ }
+
+ void relaxInstruction(MCInst const & /*Inst*/,
+ MCInst & /*Res*/) const override {
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+ bool writeNopData(uint64_t /*Count*/,
+ MCObjectWriter * /*OW*/) const override {
+ return true;
+ }
+};
+} // end anonymous namespace
+
+namespace {
+class ELFHexagonAsmBackend : public HexagonAsmBackend {
+ uint8_t OSABI;
+
+public:
+ ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
+ : HexagonAsmBackend(T), OSABI(OSABI) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ StringRef CPU("HexagonV4");
+ return createHexagonELFObjectWriter(OS, OSABI, CPU);
+ }
+};
+} // end anonymous namespace
+
+namespace llvm {
+MCAsmBackend *createHexagonAsmBackend(Target const &T,
+ MCRegisterInfo const & /*MRI*/,
+ StringRef TT, StringRef /*CPU*/) {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFHexagonAsmBackend(T, OSABI);
+}
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index f8be77c..c0a3fae 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -14,12 +14,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONBASEINFO_H
-#define HEXAGONBASEINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
#include "HexagonMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
+#include <stdint.h>
+
namespace llvm {
/// HexagonII - This namespace holds all of the target specific flags that
@@ -189,6 +191,15 @@ namespace HexagonII {
MO_GPREL
};
+ enum class InstParseBits : uint32_t {
+ INST_PARSE_MASK = 0x0000c000,
+ INST_PARSE_PACKET_END = 0x0000c000,
+ INST_PARSE_LOOP_END = 0x00008000,
+ INST_PARSE_NOT_END = 0x00004000,
+ INST_PARSE_DUPLEX = 0x00000000,
+ INST_PARSE_EXTENDER = 0x00000000
+ };
+
} // End namespace HexagonII.
} // End namespace llvm.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
new file mode 100644
index 0000000..56c9dc7
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -0,0 +1,62 @@
+//===-- HexagonELFObjectWriter.cpp - Hexagon Target Descriptions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "hexagon-elf-writer"
+
+using namespace llvm;
+using namespace Hexagon;
+
+namespace {
+
+class HexagonELFObjectWriter : public MCELFObjectTargetWriter {
+private:
+ StringRef CPU;
+
+public:
+ HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
+
+ virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+ bool IsPCRel) const override;
+};
+}
+
+HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
+ : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON,
+ /*HasRelocationAddend*/ true),
+ CPU(C) {}
+
+unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
+ MCFixup const &Fixup,
+ bool IsPCRel) const {
+ unsigned Type = (unsigned)ELF::R_HEX_NONE;
+ llvm::MCFixupKind Kind = Fixup.getKind();
+
+ switch (Kind) {
+ default:
+ DEBUG(dbgs() << "unrecognized relocation " << Fixup.getKind() << "\n");
+ llvm_unreachable("Unimplemented Fixup kind!");
+ break;
+ case FK_Data_4:
+ Type = (IsPCRel) ? ELF::R_HEX_32_PCREL : ELF::R_HEX_32;
+ break;
+ }
+ return Type;
+}
+
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI,
+ StringRef CPU) {
+ MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
+} \ No newline at end of file
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 9942a60..1fd8d70 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -29,6 +29,45 @@ using namespace llvm;
#include "HexagonGenAsmWriter.inc"
const char HexagonInstPrinter::PacketPadding = '\t';
+// Return the minimum value that a constant extendable operand can have
+// without being extended.
+static int getMinValue(uint64_t TSFlags) {
+ unsigned isSigned =
+ (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits =
+ (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned)
+ return -1U << (bits - 1);
+
+ return 0;
+}
+
+// Return the maximum value that a constant extendable operand can have
+// without being extended.
+static int getMaxValue(uint64_t TSFlags) {
+ unsigned isSigned =
+ (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+ unsigned bits =
+ (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+
+ if (isSigned)
+ return ~(-1U << (bits - 1));
+
+ return ~(-1U << bits);
+}
+
+// Return true if the instruction must be extended.
+static bool isExtended(uint64_t TSFlags) {
+ return (TSFlags >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Currently just used in an assert statement
+static bool isExtendable(uint64_t TSFlags) LLVM_ATTRIBUTE_UNUSED;
+// Return true if the instruction may be extended based on the operand value.
+static bool isExtendable(uint64_t TSFlags) {
+ return (TSFlags >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
return MII.getName(Opcode);
@@ -116,9 +155,20 @@ void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const {
- const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI);
- if (HMCI->isConstExtended())
+ const MCOperand &MO = MI->getOperand(OpNo);
+ const MCInstrDesc &MII = getMII().get(MI->getOpcode());
+
+ assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
+ "Expecting an extendable operand");
+
+ if (MO.isExpr() || isExtended(MII.TSFlags)) {
O << "#";
+ } else if (MO.isImm()) {
+ int ImmValue = MO.getImm();
+ if (ImmValue < getMinValue(MII.TSFlags) ||
+ ImmValue > getMaxValue(MII.TSFlags))
+ O << "#";
+ }
printOperand(MI, OpNo, O);
}
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 09e3f88..55ae95c 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONINSTPRINTER_H
-#define HEXAGONINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
+#define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 141e514..ad5e0fb 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -24,7 +24,6 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
Data64bitsDirective = nullptr; // .xword is only supported by V9.
ZeroDirective = "\t.skip\t";
CommentString = "//";
- HasLEB128 = true;
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
InlineAsmStart = "# InlineAsm Start";
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index 953d804..ab18f0b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HexagonMCASMINFO_H
-#define HexagonMCASMINFO_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCASMINFO_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
new file mode 100644
index 0000000..4471977
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -0,0 +1,88 @@
+//===-- HexagonMCCodeEmitter.cpp - Hexagon Target Descriptions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+using namespace llvm;
+using namespace Hexagon;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+/// \brief 10.6 Instruction Packets
+/// Possible values for instruction packet parse field.
+enum class ParseField { duplex = 0x0, last0 = 0x1, last1 = 0x2, end = 0x3 };
+/// \brief Returns the packet bits based on instruction position.
+uint32_t getPacketBits(HexagonMCInst const &HMI) {
+ unsigned const ParseFieldOffset = 14;
+ ParseField Field = HMI.isPacketEnd() ? ParseField::end : ParseField::last0;
+ return static_cast <uint32_t> (Field) << ParseFieldOffset;
+}
+void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
+ OS << static_cast<uint8_t>((Binary >> 0x00) & 0xff);
+ OS << static_cast<uint8_t>((Binary >> 0x08) & 0xff);
+ OS << static_cast<uint8_t>((Binary >> 0x10) & 0xff);
+ OS << static_cast<uint8_t>((Binary >> 0x18) & 0xff);
+}
+}
+
+HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
+ MCSubtargetInfo const &aMST,
+ MCContext &aMCT)
+ : MST(aMST), MCT(aMCT) {}
+
+void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const {
+ HexagonMCInst const &HMB = static_cast<HexagonMCInst const &>(MI);
+ uint64_t Binary = getBinaryCodeForInstr(HMB, Fixups, STI) | getPacketBits(HMB);
+ assert(HMB.getDesc().getSize() == 4 && "All instructions should be 32bit");
+ emitLittleEndian(Binary, OS);
+ ++MCNumEmitted;
+}
+
+unsigned
+HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const {
+ if (MO.isReg())
+ return MCT.getRegisterInfo()->getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ llvm_unreachable("Only Immediates and Registers implemented right now");
+}
+
+MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const {
+ return MST;
+}
+
+MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI,
+ MCSubtargetInfo const &MST,
+ MCContext &MCT) {
+ return new HexagonMCCodeEmitter(MII, MST, MCT);
+}
+
+#include "HexagonGenMCCodeEmitter.inc"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
new file mode 100644
index 0000000..96048ad
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -0,0 +1,60 @@
+//===-- HexagonMCCodeEmitter.h - Hexagon Target Descriptions ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Definition for classes that emit Hexagon machine code from MCInsts
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCODEEMITTER_H
+#define HEXAGONMCCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class HexagonMCCodeEmitter : public MCCodeEmitter {
+ MCSubtargetInfo const &MST;
+ MCContext &MCT;
+
+public:
+ HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST,
+ MCContext &aMCT);
+
+ MCSubtargetInfo const &getSubtargetInfo() const;
+
+ void EncodeInstruction(MCInst const &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const override;
+
+ // \brief TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(MCInst const &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const;
+
+ /// \brief Return binary encoding of operand.
+ unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ MCSubtargetInfo const &STI) const;
+
+private:
+ HexagonMCCodeEmitter(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
+ void operator=(HexagonMCCodeEmitter const &) LLVM_DELETED_FUNCTION;
+}; // class HexagonMCCodeEmitter
+
+} // namespace llvm
+
+#endif /* HEXAGONMCCODEEMITTER_H */
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
index 9260b4a..c842b9b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
@@ -20,8 +20,9 @@ using namespace llvm;
// Return the slots used by the insn.
unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const {
- const HexagonInstrInfo* QII = TM->getInstrInfo();
- const InstrItineraryData* II = TM->getInstrItineraryData();
+ const HexagonInstrInfo *QII = TM->getSubtargetImpl()->getInstrInfo();
+ const InstrItineraryData *II =
+ TM->getSubtargetImpl()->getInstrItineraryData();
const InstrStage*
IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass());
@@ -154,7 +155,7 @@ int HexagonMCInst::getMinValue(void) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return -1 << (bits - 1);
+ return -1U << (bits - 1);
else
return 0;
}
@@ -169,7 +170,7 @@ int HexagonMCInst::getMaxValue(void) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return ~(-1 << (bits - 1));
+ return ~(-1U << (bits - 1));
else
- return ~(-1 << bits);
+ return ~(-1U << bits);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
index 3c52d45..90fbbf3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONMCINST_H
-#define HEXAGONMCINST_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINST_H
#include "HexagonTargetMachine.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 581674d..14ddd9d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -13,8 +13,9 @@
#include "HexagonMCTargetDesc.h"
#include "HexagonMCAsmInfo.h"
-#include "InstPrinter/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -46,9 +47,17 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
return X;
}
-static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
- StringRef CPU,
- StringRef FS) {
+static MCStreamer *
+createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE);
+ return ES;
+}
+
+
+static MCSubtargetInfo *
+createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
return X;
@@ -59,22 +68,40 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
// VirtualFP = (R30 + #0).
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
- nullptr, Hexagon::R30, 0);
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(nullptr, Hexagon::R30, 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
+static MCStreamer *createMCStreamer(Target const &T, StringRef TT,
+ MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ MCSubtargetInfo const &STI, bool RelaxAll) {
+ MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+ new MCTargetStreamer(*ES);
+ return ES;
+}
+
+
static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
// For the time being, use static relocations, since there's really no
// support for PIC yet.
X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
return X;
}
+static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new HexagonInstPrinter(MAI, MII, MRI);
+}
// Force static initialization.
extern "C" void LLVMInitializeHexagonTargetMC() {
@@ -86,7 +113,8 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
createHexagonMCCodeGenInfo);
// Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget,
+ createHexagonMCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
@@ -95,4 +123,19 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
createHexagonMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget,
+ createHexagonMCCodeEmitter);
+
+ // Register the MC Inst Printer
+ TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+ createHexagonMCInstPrinter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
+ createHexagonAsmBackend);
+
+ // Register the obj streamer
+ TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 2238b1a..02fd516 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -11,15 +11,37 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONMCTARGETDESC_H
-#define HEXAGONMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
+
+#include <cstdint>
namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
+class StringRef;
+class raw_ostream;
extern Target TheHexagonTarget;
+MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
+ MCRegisterInfo const &MRI,
+ MCSubtargetInfo const &MST,
+ MCContext &MCT);
+
+MCAsmBackend *createHexagonAsmBackend(Target const &T,
+ MCRegisterInfo const &MRI, StringRef TT,
+ StringRef CPU);
+
+MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+ StringRef CPU);
+
} // End llvm namespace
// Define symbolic names for Hexagon registers. This defines a mapping from
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
index 73c7e01..f559a21 100644
--- a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = HexagonDesc
parent = Hexagon
-required_libraries = HexagonInfo MC
+required_libraries = HexagonInfo MC Support
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
index dc387c5..329c9d3 100644
--- a/lib/Target/Hexagon/Makefile
+++ b/lib/Target/Hexagon/Makefile
@@ -14,10 +14,12 @@ TARGET = Hexagon
BUILT_SOURCES = HexagonGenRegisterInfo.inc \
HexagonGenInstrInfo.inc \
HexagonGenAsmWriter.inc \
- HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
- HexagonGenCallingConv.inc \
- HexagonGenDFAPacketizer.inc
-
-DIRS = InstPrinter TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
+ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+ HexagonGenCallingConv.inc \
+ HexagonGenDFAPacketizer.inc \
+ HexagonGenMCCodeEmitter.inc \
+ HexagonGenDisassemblerTables.inc
+
+DIRS = TargetInfo MCTargetDesc Disassembler
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 5afbd20..7fae505 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430INSTPRINTER_H
-#define MSP430INSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
+#define LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index ef805bb..2c9532d 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430TARGETASMINFO_H
-#define MSP430TARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCASMINFO_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 72adb45..4c70803 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -39,7 +39,7 @@ static MCInstrInfo *createMSP430MCInstrInfo() {
static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitMSP430MCRegisterInfo(X, MSP430::PCW);
+ InitMSP430MCRegisterInfo(X, MSP430::PC);
return X;
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 7f3505c..586f5d9 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430MCTARGETDESC_H
-#define MSP430MCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
namespace llvm {
class Target;
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index 4574ce5..796f252 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_H
-#define LLVM_TARGET_MSP430_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430_H
+#define LLVM_LIB_TARGET_MSP430_MSP430_H
#include "MCTargetDesc/MSP430MCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index a96930a..ffcf222 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -17,6 +17,7 @@
#include "MSP430.h"
#include "MSP430InstrInfo.h"
+#include "MSP430Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -54,7 +55,7 @@ FunctionPass *llvm::createMSP430BranchSelectionPass() {
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
const MSP430InstrInfo *TII =
- static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
+ static_cast<const MSP430InstrInfo *>(Fn.getSubtarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index 8a69d1e..b38f578 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -17,7 +17,7 @@ def RetCC_MSP430 : CallingConv<[
CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
// i16 are returned in registers R15, R14, R13, R12
- CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+ CCIfType<[i16], CCAssignToReg<[R15, R14, R13, R12]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 82c8b29..d6cb9f6 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -14,6 +14,7 @@
#include "MSP430FrameLowering.h"
#include "MSP430InstrInfo.h"
#include "MSP430MachineFunctionInfo.h"
+#include "MSP430Subtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -43,7 +44,7 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
const MSP430InstrInfo &TII =
- *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -62,18 +63,18 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
// Update the frame offset adjustment.
MFI->setOffsetAdjustment(-NumBytes);
- // Save FPW into the appropriate stack slot...
+ // Save FP into the appropriate stack slot...
BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
- .addReg(MSP430::FPW, RegState::Kill);
+ .addReg(MSP430::FP, RegState::Kill);
- // Update FPW with the new base value...
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
- .addReg(MSP430::SPW);
+ // Update FP with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FP)
+ .addReg(MSP430::SP);
// Mark the FramePtr as live-in in every block except the entry.
for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
I != E; ++I)
- I->addLiveIn(MSP430::FPW);
+ I->addLiveIn(MSP430::FP);
} else
NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
@@ -85,18 +86,18 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- if (NumBytes) { // adjust stack pointer: SPW -= numbytes
- // If there is an SUB16ri of SPW immediately before this instruction, merge
+ if (NumBytes) { // adjust stack pointer: SP -= numbytes
+ // If there is an SUB16ri of SP immediately before this instruction, merge
// the two.
//NumBytes -= mergeSPUpdates(MBB, MBBI, true);
- // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // If there is an ADD16ri or SUB16ri of SP immediately after this
// instruction, merge the two instructions.
// mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
if (NumBytes) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(NumBytes);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
@@ -108,7 +109,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
const MachineFrameInfo *MFI = MF.getFrameInfo();
MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
const MSP430InstrInfo &TII =
- *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
unsigned RetOpcode = MBBI->getOpcode();
@@ -131,8 +132,8 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t FrameSize = StackSize - 2;
NumBytes = FrameSize - CSSize;
- // pop FPW.
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ // pop FP.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FP);
} else
NumBytes = StackSize - CSSize;
@@ -147,28 +148,28 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
DL = MBBI->getDebugLoc();
- // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // If there is an ADD16ri or SUB16ri of SP immediately before this
// instruction, merge the two instructions.
//if (NumBytes || MFI->hasVarSizedObjects())
// mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
if (MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+ TII.get(MSP430::MOV16rr), MSP430::SP).addReg(MSP430::FP);
if (CSSize) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(CSSize);
+ TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(CSSize);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
} else {
- // adjust stack pointer back: SPW += numbytes
+ // adjust stack pointer back: SP += numbytes
if (NumBytes) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(NumBytes);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
@@ -188,7 +189,7 @@ MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
MFI->setCalleeSavedFrameSize(CSI.size() * 2);
@@ -214,7 +215,7 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i)
BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
@@ -226,13 +227,13 @@ void MSP430FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const MSP430InstrInfo &TII =
- *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
unsigned StackAlign = getStackAlignment();
if (!hasReservedCallFrame(MF)) {
// If the stack pointer can be changed after prologue, turn the
- // adjcallstackup instruction into a 'sub SPW, <amt>' and the
- // adjcallstackdown instruction into 'add SPW, <amt>'
+ // adjcallstackup instruction into a 'sub SP, <amt>' and the
+ // adjcallstackdown instruction into 'add SP, <amt>'
// TODO: consider using push / pop instead of sub + store / add
MachineInstr *Old = I;
uint64_t Amount = Old->getOperand(0).getImm();
@@ -245,8 +246,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = nullptr;
if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
+ TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(Amount);
} else {
assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
// factor out the amount the callee already popped.
@@ -254,8 +255,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount -= CalleeAmt;
if (Amount)
New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::ADD16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
+ TII.get(MSP430::ADD16ri), MSP430::SP)
+ .addReg(MSP430::SP).addImm(Amount);
}
if (New) {
@@ -273,7 +274,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *Old = I;
MachineInstr *New =
BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
- MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ MSP430::SP).addReg(MSP430::SP).addImm(CalleeAmt);
// The SRW implicit def is dead.
New->getOperand(3).setIsDead();
@@ -287,11 +288,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
void
MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *) const {
- // Create a frame entry for the FPW register that must be saved.
+ // Create a frame entry for the FP register that must be saved.
if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
(void)FrameIdx;
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
- "Slot for FPW register must be last in order to be found!");
+ "Slot for FP register must be last in order to be found!");
}
}
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index fadfeed..1941af2 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430_FRAMEINFO_H
-#define MSP430_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430FRAMELOWERING_H
+#define LLVM_LIB_TARGET_MSP430_MSP430FRAMELOWERING_H
#include "MSP430.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index a9b9035..81c176b 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -97,9 +97,9 @@ namespace {
public:
MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel),
- Lowering(*TM.getTargetLowering()),
- Subtarget(*TM.getSubtargetImpl()) { }
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getSubtargetImpl()->getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) {}
const char *getPassName() const override {
return "MSP430 DAG->DAG Pattern Instruction Selection";
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 3d3ee92..22936dd 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -58,7 +58,7 @@ HWMultMode("msp430-hwmult-mode", cl::Hidden,
clEnumValEnd));
MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ : TargetLowering(TM) {
// Set up the register classes.
addRegisterClass(MVT::i8, &MSP430::GR8RegClass);
@@ -72,7 +72,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
// Division is expensive
setIntDivIsCheap(false);
- setStackPointerRegisterToSaveRestore(MSP430::SPW);
+ setStackPointerRegisterToSaveRestore(MSP430::SP);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -282,7 +282,7 @@ static void AnalyzeArguments(CCState &State,
SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<ArgT> &Args) {
static const MCPhysReg RegList[] = {
- MSP430::R15W, MSP430::R14W, MSP430::R13W, MSP430::R12W
+ MSP430::R15, MSP430::R14, MSP430::R13, MSP430::R12
};
static const unsigned NbRegs = array_lengthof(RegList);
@@ -437,8 +437,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
AnalyzeArguments(CCInfo, ArgLocs, Ins);
// Create frame index for the start of the first vararg value
@@ -533,8 +533,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
report_fatal_error("ISRs cannot return any value");
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analize return values.
AnalyzeReturnValues(CCInfo, RVLocs, Outs);
@@ -583,8 +583,8 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
SmallVectorImpl<SDValue> &InVals) const {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
AnalyzeArguments(CCInfo, ArgLocs, Outs);
// Get a count of how many bytes are to be pushed on the stack.
@@ -627,7 +627,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
assert(VA.isMemLoc());
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, getPointerTy());
SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
StackPtr,
@@ -719,8 +719,8 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
AnalyzeReturnValues(CCInfo, RVLocs, Ins);
@@ -941,31 +941,31 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
Convert = false;
break;
case MSP430CC::COND_HS:
- // Res = SRW & 1, no processing is required
+ // Res = SR & 1, no processing is required
break;
case MSP430CC::COND_LO:
- // Res = ~(SRW & 1)
+ // Res = ~(SR & 1)
Invert = true;
break;
case MSP430CC::COND_NE:
if (andCC) {
- // C = ~Z, thus Res = SRW & 1, no processing is required
+ // C = ~Z, thus Res = SR & 1, no processing is required
} else {
- // Res = ~((SRW >> 1) & 1)
+ // Res = ~((SR >> 1) & 1)
Shift = true;
Invert = true;
}
break;
case MSP430CC::COND_E:
Shift = true;
- // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
- // Res = (SRW >> 1) & 1 is 1 word shorter.
+ // C = ~Z for AND instruction, thus we can put Res = ~(SR & 1), however,
+ // Res = (SR >> 1) & 1 is 1 word shorter.
break;
}
EVT VT = Op.getValueType();
SDValue One = DAG.getConstant(1, VT);
if (Convert) {
- SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+ SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SR,
MVT::i16, Flag);
if (Shift)
// FIXME: somewhere this is turned into a SRL, lower it MSP specific?
@@ -1074,7 +1074,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- MSP430::FPW, VT);
+ MSP430::FP, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
@@ -1199,7 +1199,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineRegisterInfo &RI = F->getRegInfo();
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned Opc;
const TargetRegisterClass * RC;
@@ -1310,7 +1311,8 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
return EmitShiftInstr(MI, BB);
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 3e2f344..073ddc9 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
-#define LLVM_TARGET_MSP430_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430ISELLOWERING_H
+#define LLVM_LIB_TARGET_MSP430_MSP430ISELLOWERING_H
#include "MSP430.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -170,4 +170,4 @@ namespace llvm {
};
} // namespace llvm
-#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
+#endif
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index ccb6c09..27681aa 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -307,7 +307,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0;
case TargetOpcode::INLINEASM: {
const MachineFunction *MF = MI->getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
*MF->getTarget().getMCAsmInfo());
}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index e6baaef..f9b25b6 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430INSTRINFO_H
-#define LLVM_TARGET_MSP430INSTRINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430INSTRINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430INSTRINFO_H
#include "MSP430RegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 50e3fda..7c5aa11 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -111,8 +111,8 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
// a stack adjustment and the codegen must know that they may modify the stack
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber SRW.
-let Defs = [SPW, SRW], Uses = [SPW] in {
+// sub / add which can clobber SR.
+let Defs = [SP, SR], Uses = [SP] in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
"#ADJCALLSTACKDOWN",
[(MSP430callseq_start timm:$amt)]>;
@@ -130,7 +130,7 @@ let usesCustomInserter = 1 in {
"# Select16 PSEUDO",
[(set GR16:$dst,
(MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
- let Defs = [SRW] in {
+ let Defs = [SR] in {
def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Shl8 PSEUDO",
[(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
@@ -192,7 +192,7 @@ let isBarrier = 1 in {
}
// Conditional branches
-let Uses = [SRW] in
+let Uses = [SR] in
def JCC : CJForm<0, 0,
(outs), (ins jmptarget:$dst, cc:$cc),
"j$cc\t$dst",
@@ -207,8 +207,8 @@ let isCall = 1 in
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
// registers are added manually.
- let Defs = [R12W, R13W, R14W, R15W, SRW],
- Uses = [SPW] in {
+ let Defs = [R12, R13, R14, R15, SR],
+ Uses = [SP] in {
def CALLi : II16i<0x0,
(outs), (ins i16imm:$dst),
"call\t$dst", [(MSP430call imm:$dst)]>;
@@ -224,7 +224,7 @@ let isCall = 1 in
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions...
//
-let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let Defs = [SP], Uses = [SP], neverHasSideEffects=1 in {
let mayLoad = 1 in
def POP16r : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$reg), (ins), "pop.w\t$reg", []>;
@@ -337,7 +337,7 @@ def MOV16mm : I16mm<0x0,
let Constraints = "$src = $dst" in {
-let Defs = [SRW] in {
+let Defs = [SR] in {
let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
@@ -345,24 +345,24 @@ def ADD8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"add.b\t{$src2, $dst}",
[(set GR8:$dst, (add GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"add.w\t{$src2, $dst}",
[(set GR16:$dst, (add GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def ADD8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"add.b\t{$src2, $dst}",
[(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"add.w\t{$src2, $dst}",
[(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -381,160 +381,160 @@ def ADD8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"add.b\t{$src2, $dst}",
[(set GR8:$dst, (add GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"add.w\t{$src2, $dst}",
[(set GR16:$dst, (add GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
let Constraints = "" in {
def ADD8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"add.b\t{$src, $dst}",
[(store (add (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"add.w\t{$src, $dst}",
[(store (add (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"add.b\t{$src, $dst}",
[(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"add.w\t{$src, $dst}",
[(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"add.b\t{$src, $dst}",
[(store (add (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADD16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"add.w\t{$src, $dst}",
[(store (add (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-let Uses = [SRW] in {
+let Uses = [SR] in {
let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
def ADC8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"addc.b\t{$src2, $dst}",
[(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"addc.w\t{$src2, $dst}",
[(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
} // isCommutable
def ADC8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"addc.b\t{$src2, $dst}",
[(set GR8:$dst, (adde GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"addc.w\t{$src2, $dst}",
[(set GR16:$dst, (adde GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"addc.b\t{$src2, $dst}",
[(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"addc.w\t{$src2, $dst}",
[(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let Constraints = "" in {
def ADC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"addc.b\t{$src, $dst}",
[(store (adde (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"addc.w\t{$src, $dst}",
[(store (adde (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"addc.b\t{$src, $dst}",
[(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"addc.w\t{$src, $dst}",
[(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"addc.b\t{$src, $dst}",
[(store (adde (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def ADC16mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"addc.w\t{$src, $dst}",
[(store (adde (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-} // Uses = [SRW]
+} // Uses = [SR]
let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
def AND8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"and.b\t{$src2, $dst}",
[(set GR8:$dst, (and GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"and.w\t{$src2, $dst}",
[(set GR16:$dst, (and GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def AND8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"and.b\t{$src2, $dst}",
[(set GR8:$dst, (and GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"and.w\t{$src2, $dst}",
[(set GR16:$dst, (and GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"and.b\t{$src2, $dst}",
[(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"and.w\t{$src2, $dst}",
[(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -553,36 +553,36 @@ def AND8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"and.b\t{$src, $dst}",
[(store (and (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"and.w\t{$src, $dst}",
[(store (and (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"and.b\t{$src, $dst}",
[(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"and.w\t{$src, $dst}",
[(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"and.b\t{$src, $dst}",
[(store (and (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def AND16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"and.w\t{$src, $dst}",
[(store (and (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
@@ -703,35 +703,35 @@ def XOR8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"xor.b\t{$src2, $dst}",
[(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"xor.w\t{$src2, $dst}",
[(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def XOR8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"xor.b\t{$src2, $dst}",
[(set GR8:$dst, (xor GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"xor.w\t{$src2, $dst}",
[(set GR16:$dst, (xor GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"xor.b\t{$src2, $dst}",
[(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"xor.w\t{$src2, $dst}",
[(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -750,34 +750,34 @@ def XOR8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"xor.b\t{$src, $dst}",
[(store (xor (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"xor.w\t{$src, $dst}",
[(store (xor (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"xor.b\t{$src, $dst}",
[(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"xor.w\t{$src, $dst}",
[(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"xor.b\t{$src, $dst}",
[(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def XOR16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"xor.w\t{$src, $dst}",
[(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
@@ -785,34 +785,34 @@ def SUB8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"sub.b\t{$src2, $dst}",
[(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"sub.w\t{$src2, $dst}",
[(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"sub.b\t{$src2, $dst}",
[(set GR8:$dst, (sub GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"sub.w\t{$src2, $dst}",
[(set GR16:$dst, (sub GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"sub.b\t{$src2, $dst}",
[(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"sub.w\t{$src2, $dst}",
[(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
Constraints = "$base = $base_wb, $src = $dst" in {
@@ -831,153 +831,153 @@ def SUB8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"sub.b\t{$src, $dst}",
[(store (sub (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"sub.w\t{$src, $dst}",
[(store (sub (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"sub.b\t{$src, $dst}",
[(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"sub.w\t{$src, $dst}",
[(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"sub.b\t{$src, $dst}",
[(store (sub (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SUB16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"sub.w\t{$src, $dst}",
[(store (sub (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-let Uses = [SRW] in {
+let Uses = [SR] in {
def SBC8rr : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"subc.b\t{$src2, $dst}",
[(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16rr : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"subc.w\t{$src2, $dst}",
[(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8ri : I8ri<0x0,
(outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"subc.b\t{$src2, $dst}",
[(set GR8:$dst, (sube GR8:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16ri : I16ri<0x0,
(outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"subc.w\t{$src2, $dst}",
[(set GR16:$dst, (sube GR16:$src, imm:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8rm : I8rm<0x0,
(outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"subc.b\t{$src2, $dst}",
[(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16rm : I16rm<0x0,
(outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"subc.w\t{$src2, $dst}",
[(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
- (implicit SRW)]>;
+ (implicit SR)]>;
let Constraints = "" in {
def SBC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"subc.b\t{$src, $dst}",
[(store (sube (load addr:$dst), GR8:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16mr : I16mr<0x0,
(outs), (ins memdst:$dst, GR16:$src),
"subc.w\t{$src, $dst}",
[(store (sube (load addr:$dst), GR16:$src), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8mi : I8mi<0x0,
(outs), (ins memdst:$dst, i8imm:$src),
"subc.b\t{$src, $dst}",
[(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16mi : I16mi<0x0,
(outs), (ins memdst:$dst, i16imm:$src),
"subc.w\t{$src, $dst}",
[(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC8mm : I8mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"subc.b\t{$src, $dst}",
[(store (sube (load addr:$dst),
(i8 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SBC16mm : I16mm<0x0,
(outs), (ins memdst:$dst, memsrc:$src),
"subc.w\t{$src, $dst}",
[(store (sube (load addr:$dst),
(i16 (load addr:$src))), addr:$dst),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
-} // Uses = [SRW]
+} // Uses = [SR]
// FIXME: memory variant!
def SAR8r1 : II8r<0x0,
(outs GR8:$dst), (ins GR8:$src),
"rra.b\t$dst",
[(set GR8:$dst, (MSP430rra GR8:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SAR16r1 : II16r<0x0,
(outs GR16:$dst), (ins GR16:$src),
"rra.w\t$dst",
[(set GR16:$dst, (MSP430rra GR16:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SHL8r1 : I8rr<0x0,
(outs GR8:$dst), (ins GR8:$src),
"rla.b\t$dst",
[(set GR8:$dst, (MSP430rla GR8:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SHL16r1 : I16rr<0x0,
(outs GR16:$dst), (ins GR16:$src),
"rla.w\t$dst",
[(set GR16:$dst, (MSP430rla GR16:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
"clrc\n\t"
"rrc.b\t$dst",
[(set GR8:$dst, (MSP430rrc GR8:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
"clrc\n\t"
"rrc.w\t$dst",
[(set GR16:$dst, (MSP430rrc GR16:$src)),
- (implicit SRW)]>;
+ (implicit SR)]>;
// FIXME: Memory sext's ?
def SEXT16r : II16r<0x0,
(outs GR16:$dst), (ins GR16:$src),
"sxt\t$dst",
[(set GR16:$dst, (sext_inreg GR16:$src, i8)),
- (implicit SRW)]>;
+ (implicit SR)]>;
-} // Defs = [SRW]
+} // Defs = [SR]
def ZEXT16r : I8rr<0x0,
(outs GR16:$dst), (ins GR16:$src),
@@ -993,57 +993,57 @@ def SWPB16r : II16r<0x0,
} // Constraints = "$src = $dst"
// Integer comparisons
-let Defs = [SRW] in {
+let Defs = [SR] in {
def CMP8rr : I8rr<0x0,
(outs), (ins GR8:$src, GR8:$src2),
"cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR8:$src, GR8:$src2), (implicit SR)]>;
def CMP16rr : I16rr<0x0,
(outs), (ins GR16:$src, GR16:$src2),
"cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR16:$src, GR16:$src2), (implicit SR)]>;
def CMP8ri : I8ri<0x0,
(outs), (ins GR8:$src, i8imm:$src2),
"cmp.b\t{$src2, $src}",
- [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR8:$src, imm:$src2), (implicit SR)]>;
def CMP16ri : I16ri<0x0,
(outs), (ins GR16:$src, i16imm:$src2),
"cmp.w\t{$src2, $src}",
- [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
+ [(MSP430cmp GR16:$src, imm:$src2), (implicit SR)]>;
def CMP8mi : I8mi<0x0,
(outs), (ins memsrc:$src, i8imm:$src2),
"cmp.b\t{$src2, $src}",
[(MSP430cmp (load addr:$src),
- (i8 imm:$src2)), (implicit SRW)]>;
+ (i8 imm:$src2)), (implicit SR)]>;
def CMP16mi : I16mi<0x0,
(outs), (ins memsrc:$src, i16imm:$src2),
"cmp.w\t{$src2, $src}",
[(MSP430cmp (load addr:$src),
- (i16 imm:$src2)), (implicit SRW)]>;
+ (i16 imm:$src2)), (implicit SR)]>;
def CMP8rm : I8rm<0x0,
(outs), (ins GR8:$src, memsrc:$src2),
"cmp.b\t{$src2, $src}",
[(MSP430cmp GR8:$src, (load addr:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def CMP16rm : I16rm<0x0,
(outs), (ins GR16:$src, memsrc:$src2),
"cmp.w\t{$src2, $src}",
[(MSP430cmp GR16:$src, (load addr:$src2)),
- (implicit SRW)]>;
+ (implicit SR)]>;
def CMP8mr : I8mr<0x0,
(outs), (ins memsrc:$src, GR8:$src2),
"cmp.b\t{$src2, $src}",
[(MSP430cmp (load addr:$src), GR8:$src2),
- (implicit SRW)]>;
+ (implicit SR)]>;
def CMP16mr : I16mr<0x0,
(outs), (ins memsrc:$src, GR16:$src2),
"cmp.w\t{$src2, $src}",
[(MSP430cmp (load addr:$src), GR16:$src2),
- (implicit SRW)]>;
+ (implicit SR)]>;
// BIT TESTS, just sets condition codes
@@ -1053,56 +1053,56 @@ def BIT8rr : I8rr<0x0,
(outs), (ins GR8:$src, GR8:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16rr : I16rr<0x0,
(outs), (ins GR16:$src, GR16:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
}
def BIT8ri : I8ri<0x0,
(outs), (ins GR8:$src, i8imm:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16ri : I16ri<0x0,
(outs), (ins GR16:$src, i16imm:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8rm : I8rm<0x0,
(outs), (ins GR8:$src, memdst:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16rm : I16rm<0x0,
(outs), (ins GR16:$src, memdst:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8mr : I8mr<0x0,
(outs), (ins memsrc:$src, GR8:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16mr : I16mr<0x0,
(outs), (ins memsrc:$src, GR16:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8mi : I8mi<0x0,
(outs), (ins memsrc:$src, i8imm:$src2),
"bit.b\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16mi : I16mi<0x0,
(outs), (ins memsrc:$src, i16imm:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT8mm : I8mm<0x0,
(outs), (ins memsrc:$src, memsrc:$src2),
@@ -1110,15 +1110,15 @@ def BIT8mm : I8mm<0x0,
[(MSP430cmp (and_su (i8 (load addr:$src)),
(load addr:$src2)),
0),
- (implicit SRW)]>;
+ (implicit SR)]>;
def BIT16mm : I16mm<0x0,
(outs), (ins memsrc:$src, memsrc:$src2),
"bit.w\t{$src2, $src}",
[(MSP430cmp (and_su (i16 (load addr:$src)),
(load addr:$src2)),
0),
- (implicit SRW)]>;
-} // Defs = [SRW]
+ (implicit SR)]>;
+} // Defs = [SR]
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 05352a2..77b91b7 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
MCSymbol *MSP430MCInstLower::
@@ -50,7 +51,7 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetJumpTableSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout *DL = Printer.TM.getSubtargetImpl()->getDataLayout();
SmallString<256> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
<< Printer.getFunctionNumber() << '_'
@@ -67,7 +68,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout *DL = Printer.TM.getSubtargetImpl()->getDataLayout();
SmallString<256> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI"
<< Printer.getFunctionNumber() << '_'
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index 794aa56..ebd6397 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430_MCINSTLOWER_H
-#define MSP430_MCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430MCINSTLOWER_H
+#define LLVM_LIB_TARGET_MSP430_MSP430MCINSTLOWER_H
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index d1697f4..fcc5f5b 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430MACHINEFUNCTIONINFO_H
-#define MSP430MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430MACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 691bcee..614467b 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -33,32 +33,32 @@ using namespace llvm;
// FIXME: Provide proper call frame setup / destroy opcodes.
MSP430RegisterInfo::MSP430RegisterInfo()
- : MSP430GenRegisterInfo(MSP430::PCW) {}
+ : MSP430GenRegisterInfo(MSP430::PC) {}
const MCPhysReg*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
const Function* F = MF->getFunction();
static const MCPhysReg CalleeSavedRegs[] = {
- MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
0
};
static const MCPhysReg CalleeSavedRegsFP[] = {
- MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
0
};
static const MCPhysReg CalleeSavedRegsIntr[] = {
- MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
- MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15,
0
};
static const MCPhysReg CalleeSavedRegsIntrFP[] = {
- MSP430::R5W, MSP430::R6W, MSP430::R7W,
- MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
- MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ MSP430::R5, MSP430::R6, MSP430::R7,
+ MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15,
0
};
@@ -73,22 +73,22 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// Mark 4 special registers with subregisters as reserved.
Reserved.set(MSP430::PCB);
Reserved.set(MSP430::SPB);
Reserved.set(MSP430::SRB);
Reserved.set(MSP430::CGB);
- Reserved.set(MSP430::PCW);
- Reserved.set(MSP430::SPW);
- Reserved.set(MSP430::SRW);
- Reserved.set(MSP430::CGW);
+ Reserved.set(MSP430::PC);
+ Reserved.set(MSP430::SP);
+ Reserved.set(MSP430::SR);
+ Reserved.set(MSP430::CG);
// Mark frame pointer as reserved if needed.
if (TFI->hasFP(MF)) {
Reserved.set(MSP430::FPB);
- Reserved.set(MSP430::FPW);
+ Reserved.set(MSP430::FP);
}
return Reserved;
@@ -109,11 +109,11 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FP : MSP430::SP);
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
// Skip the saved PC
@@ -122,7 +122,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!TFI->hasFP(MF))
Offset += MF.getFrameInfo()->getStackSize();
else
- Offset += 2; // Skip the saved FPW
+ Offset += 2; // Skip the saved FP
// Fold imm into offset
Offset += MI.getOperand(FIOperandNum + 1).getImm();
@@ -131,7 +131,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// This is actually "load effective address" of the stack slot
// instruction. We have only two-address instructions, thus we need to
// expand it into mov + add
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MI.setDesc(TII.get(MSP430::MOV16rr));
MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
@@ -156,7 +156,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+ return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP;
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index cb01961..3f88a69 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
-#define LLVM_TARGET_MSP430REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430REGISTERINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430REGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
@@ -44,4 +44,4 @@ public:
} // end namespace llvm
-#endif // LLVM_TARGET_MSP430REGISTERINFO_H
+#endif
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 4010781..b5a6ed0 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -46,22 +46,22 @@ def R15B : MSP430Reg<15, "r15">;
def subreg_8bit : SubRegIndex<8> { let Namespace = "MSP430"; }
let SubRegIndices = [subreg_8bit] in {
-def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
-def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
-def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
-def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>;
-def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>;
-def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>;
-def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>;
-def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>;
-def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>;
-def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>;
-def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
-def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
-def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
-def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
-def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
-def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+def PC : MSP430RegWithSubregs<0, "r0", [PCB]>;
+def SP : MSP430RegWithSubregs<1, "r1", [SPB]>;
+def SR : MSP430RegWithSubregs<2, "r2", [SRB]>;
+def CG : MSP430RegWithSubregs<3, "r3", [CGB]>;
+def FP : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def R5 : MSP430RegWithSubregs<5, "r5", [R5B]>;
+def R6 : MSP430RegWithSubregs<6, "r6", [R6B]>;
+def R7 : MSP430RegWithSubregs<7, "r7", [R7B]>;
+def R8 : MSP430RegWithSubregs<8, "r8", [R8B]>;
+def R9 : MSP430RegWithSubregs<9, "r9", [R9B]>;
+def R10 : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11 : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12 : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13 : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14 : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15 : MSP430RegWithSubregs<15, "r15", [R15B]>;
}
def GR8 : RegisterClass<"MSP430", [i8], 8,
@@ -74,8 +74,8 @@ def GR8 : RegisterClass<"MSP430", [i8], 8,
def GR16 : RegisterClass<"MSP430", [i16], 16,
// Volatile registers
- (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+ (add R12, R13, R14, R15, R11, R10, R9, R8, R7, R6, R5,
// Frame pointer, sometimes allocable
- FPW,
+ FP,
// Volatile, but not allocable
- PCW, SPW, SRW, CGW)>;
+ PC, SP, SR, CG)>;
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index cb04adc..61a6b19 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MSP430SELECTIONDAGINFO_H
-#define MSP430SELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index dbddc52..cb83b92 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -34,6 +34,6 @@ MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: MSP430GenSubtargetInfo(TT, CPU, FS),
// FIXME: Check DataLayout string.
- DL("e-m:e-p:16:16-i32:16:32-n8:16"), FrameLowering(),
+ DL("e-m:e-p:16:16-i32:16:32-a:16-n8:16"), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
TSInfo(DL) {}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 0152ad1..d1845db 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
-#define LLVM_TARGET_MSP430_SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430SUBTARGET_H
+#define LLVM_LIB_TARGET_MSP430_MSP430SUBTARGET_H
#include "MSP430FrameLowering.h"
#include "MSP430InstrInfo.h"
@@ -51,14 +51,20 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const TargetRegisterInfo *getRegisterInfo() const {
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const TargetRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const MSP430TargetLowering *getTargetLowering() const { return &TLInfo; }
- const MSP430SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const MSP430TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 5ca36f2..8cee016 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MSP430TargetMachine.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "MSP430.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -30,10 +31,13 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+MSP430TargetMachine::~MSP430TargetMachine() {}
+
namespace {
/// MSP430 Code Generator Pass Configuration Options.
class MSP430PassConfig : public TargetPassConfig {
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index efa8403..0e54ed6 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
-#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H
+#define LLVM_LIB_TARGET_MSP430_MSP430TARGETMACHINE_H
#include "MSP430Subtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -24,6 +24,7 @@ namespace llvm {
/// MSP430TargetMachine
///
class MSP430TargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
MSP430Subtarget Subtarget;
public:
@@ -31,31 +32,18 @@ public:
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~MSP430TargetMachine() override;
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const MSP430InstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
const MSP430Subtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const TargetRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const MSP430TargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
}; // MSP430TargetMachine.
} // end namespace llvm
-#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
+#endif
diff --git a/lib/Target/Mips/Android.mk b/lib/Target/Mips/Android.mk
index 9f437f8..18d1177 100644
--- a/lib/Target/Mips/Android.mk
+++ b/lib/Target/Mips/Android.mk
@@ -20,9 +20,10 @@ mips_codegen_SRC_FILES := \
Mips16ISelLowering.cpp \
Mips16InstrInfo.cpp \
Mips16RegisterInfo.cpp \
+ MipsABIInfo.cpp \
MipsAnalyzeImmediate.cpp \
MipsAsmPrinter.cpp \
- MipsCodeEmitter.cpp \
+ MipsCCState.cpp \
MipsConstantIslandPass.cpp \
MipsDelaySlotFiller.cpp \
MipsFastISel.cpp \
@@ -30,7 +31,6 @@ mips_codegen_SRC_FILES := \
MipsInstrInfo.cpp \
MipsISelDAGToDAG.cpp \
MipsISelLowering.cpp \
- MipsJITInfo.cpp \
MipsLongBranch.cpp \
MipsMachineFunction.cpp \
MipsMCInstLower.cpp \
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 0c06be8..0c5b41f 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -13,6 +13,7 @@
#include "MipsTargetStreamer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -26,6 +27,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/SourceMgr.h"
+#include <memory>
using namespace llvm;
@@ -38,36 +41,69 @@ class MCInstrInfo;
namespace {
class MipsAssemblerOptions {
public:
- MipsAssemblerOptions() : aTReg(1), reorder(true), macro(true) {}
+ MipsAssemblerOptions(uint64_t Features_) :
+ ATReg(1), Reorder(true), Macro(true), Features(Features_) {}
- unsigned getATRegNum() { return aTReg; }
- bool setATReg(unsigned Reg);
+ MipsAssemblerOptions(const MipsAssemblerOptions *Opts) {
+ ATReg = Opts->getATRegNum();
+ Reorder = Opts->isReorder();
+ Macro = Opts->isMacro();
+ Features = Opts->getFeatures();
+ }
- bool isReorder() { return reorder; }
- void setReorder() { reorder = true; }
- void setNoreorder() { reorder = false; }
+ unsigned getATRegNum() const { return ATReg; }
+ bool setATReg(unsigned Reg);
- bool isMacro() { return macro; }
- void setMacro() { macro = true; }
- void setNomacro() { macro = false; }
+ bool isReorder() const { return Reorder; }
+ void setReorder() { Reorder = true; }
+ void setNoReorder() { Reorder = false; }
+
+ bool isMacro() const { return Macro; }
+ void setMacro() { Macro = true; }
+ void setNoMacro() { Macro = false; }
+
+ uint64_t getFeatures() const { return Features; }
+ void setFeatures(uint64_t Features_) { Features = Features_; }
+
+ // Set of features that are either architecture features or referenced
+ // by them (e.g.: FeatureNaN2008 implied by FeatureMips32r6).
+ // The full table can be found in MipsGenSubtargetInfo.inc (MipsFeatureKV[]).
+ // The reason we need this mask is explained in the selectArch function.
+ // FIXME: Ideally we would like TableGen to generate this information.
+ static const uint64_t AllArchRelatedMask =
+ Mips::FeatureMips1 | Mips::FeatureMips2 | Mips::FeatureMips3 |
+ Mips::FeatureMips3_32 | Mips::FeatureMips3_32r2 | Mips::FeatureMips4 |
+ Mips::FeatureMips4_32 | Mips::FeatureMips4_32r2 | Mips::FeatureMips5 |
+ Mips::FeatureMips5_32r2 | Mips::FeatureMips32 | Mips::FeatureMips32r2 |
+ Mips::FeatureMips32r6 | Mips::FeatureMips64 | Mips::FeatureMips64r2 |
+ Mips::FeatureMips64r6 | Mips::FeatureCnMips | Mips::FeatureFP64Bit |
+ Mips::FeatureGP64Bit | Mips::FeatureNaN2008;
private:
- unsigned aTReg;
- bool reorder;
- bool macro;
+ unsigned ATReg;
+ bool Reorder;
+ bool Macro;
+ uint64_t Features;
};
}
namespace {
class MipsAsmParser : public MCTargetAsmParser {
MipsTargetStreamer &getTargetStreamer() {
- MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer();
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<MipsTargetStreamer &>(TS);
}
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
- MipsAssemblerOptions Options;
+ SmallVector<std::unique_ptr<MipsAssemblerOptions>, 2> AssemblerOptions;
+ MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
+ // nullptr, which indicates that no function is currently
+ // selected. This usually happens after an '.end func'
+ // directive.
+
+ // Print a warning along with its fix-it message at the given range.
+ void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
+ SMRange Range, bool ShowColors = true);
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
@@ -76,15 +112,15 @@ class MipsAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
/// Parse a register as used in CFI directives
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseParenSuffix(StringRef Name, OperandVector &Operands);
+ bool parseParenSuffix(StringRef Name, OperandVector &Operands);
- bool ParseBracketSuffix(StringRef Name, OperandVector &Operands);
+ bool parseBracketSuffix(StringRef Name, OperandVector &Operands);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -94,25 +130,28 @@ class MipsAsmParser : public MCTargetAsmParser {
MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands);
MipsAsmParser::OperandMatchResultTy
- MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+ matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
StringRef Identifier, SMLoc S);
MipsAsmParser::OperandMatchResultTy
- MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
+ matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
- MipsAsmParser::OperandMatchResultTy ParseAnyRegister(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy ParseImm(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseImm(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy ParseJumpTarget(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy ParseLSAImm(OperandVector &Operands);
+ MipsAsmParser::OperandMatchResultTy parseLSAImm(OperandVector &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseRegisterList (OperandVector &Operands);
bool searchSymbolAlias(OperandVector &Operands);
- bool ParseOperand(OperandVector &, StringRef Mnemonic);
+ bool parseOperand(OperandVector &, StringRef Mnemonic);
bool needsExpansion(MCInst &Inst);
@@ -130,6 +169,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
void expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions, bool isLoad,
bool isImmOpnd);
@@ -142,8 +184,10 @@ class MipsAsmParser : public MCTargetAsmParser {
const MCExpr *evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
bool isEvaluated(const MCExpr *Expr);
+ bool parseSetMips0Directive();
+ bool parseSetArchDirective();
bool parseSetFeature(uint64_t Feature);
- bool parseDirectiveCPLoad(SMLoc Loc);
+ bool parseDirectiveCpLoad(SMLoc Loc);
bool parseDirectiveCPSetup();
bool parseDirectiveNaN();
bool parseDirectiveSet();
@@ -153,10 +197,16 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetNoAtDirective();
bool parseSetMacroDirective();
bool parseSetNoMacroDirective();
+ bool parseSetMsaDirective();
+ bool parseSetNoMsaDirective();
+ bool parseSetNoDspDirective();
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
+ bool parseSetMips16Directive();
bool parseSetNoMips16Directive();
bool parseSetFpDirective();
+ bool parseSetPopDirective();
+ bool parseSetPushDirective();
bool parseSetAssignment();
@@ -174,6 +224,8 @@ class MipsAsmParser : public MCTargetAsmParser {
int matchCPURegisterName(StringRef Symbol);
+ int matchHWRegsRegisterName(StringRef Symbol);
+
int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
int matchFPURegisterName(StringRef Name);
@@ -200,18 +252,51 @@ class MipsAsmParser : public MCTargetAsmParser {
// Example: INSERT.B $w0[n], $1 => 16 > n >= 0
bool validateMSAIndex(int Val, int RegKind);
- void setFeatureBits(unsigned Feature, StringRef FeatureString) {
+ // Selects a new architecture by updating the FeatureBits with the necessary
+ // info including implied dependencies.
+ // Internally, it clears all the feature bits related to *any* architecture
+ // and selects the new one using the ToggleFeature functionality of the
+ // MCSubtargetInfo object that handles implied dependencies. The reason we
+ // clear all the arch related bits manually is because ToggleFeature only
+ // clears the features that imply the feature being cleared and not the
+ // features implied by the feature being cleared. This is easier to see
+ // with an example:
+ // --------------------------------------------------
+ // | Feature | Implies |
+ // | -------------------------------------------------|
+ // | FeatureMips1 | None |
+ // | FeatureMips2 | FeatureMips1 |
+ // | FeatureMips3 | FeatureMips2 | FeatureMipsGP64 |
+ // | FeatureMips4 | FeatureMips3 |
+ // | ... | |
+ // --------------------------------------------------
+ //
+ // Setting Mips3 is equivalent to set: (FeatureMips3 | FeatureMips2 |
+ // FeatureMipsGP64 | FeatureMips1)
+ // Clearing Mips3 is equivalent to clear (FeatureMips3 | FeatureMips4).
+ void selectArch(StringRef ArchFeature) {
+ uint64_t FeatureBits = STI.getFeatureBits();
+ FeatureBits &= ~MipsAssemblerOptions::AllArchRelatedMask;
+ STI.setFeatureBits(FeatureBits);
+ setAvailableFeatures(
+ ComputeAvailableFeatures(STI.ToggleFeature(ArchFeature)));
+ AssemblerOptions.back()->setFeatures(getAvailableFeatures());
+ }
+
+ void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(STI.getFeatureBits() & Feature)) {
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
+ AssemblerOptions.back()->setFeatures(getAvailableFeatures());
}
- void clearFeatureBits(unsigned Feature, StringRef FeatureString) {
+ void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (STI.getFeatureBits() & Feature) {
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
+ AssemblerOptions.back()->setFeatures(getAvailableFeatures());
}
public:
@@ -225,9 +310,19 @@ public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(), STI(sti) {
+ MCAsmParserExtension::Initialize(parser);
+
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ // Remember the initial assembler options. The user can not modify these.
+ AssemblerOptions.push_back(
+ make_unique<MipsAssemblerOptions>(getAvailableFeatures()));
+
+ // Create an assembler options environment for the user to modify.
+ AssemblerOptions.push_back(
+ make_unique<MipsAssemblerOptions>(getAvailableFeatures()));
getTargetStreamer().updateABIInfo(*this);
@@ -237,12 +332,11 @@ public:
((STI.getFeatureBits() & Mips::FeatureN32) != 0) +
((STI.getFeatureBits() & Mips::FeatureN64) != 0)) == 1);
- if (!isABI_O32() && !allowOddSPReg() != 0)
+ if (!isABI_O32() && !useOddSPReg() != 0)
report_fatal_error("-mno-odd-spreg requires the O32 ABI");
- }
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+ CurrentFn = nullptr;
+ }
/// True if all of $fcc0 - $fcc7 exist for the current ISA.
bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
@@ -252,9 +346,9 @@ public:
bool isABI_N32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
bool isABI_N64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
bool isABI_O32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
- bool isABI_FPXX() const { return false; } // TODO: add check for FeatureXX
+ bool isABI_FPXX() const { return STI.getFeatureBits() & Mips::FeatureFPXX; }
- bool allowOddSPReg() const {
+ bool useOddSPReg() const {
return !(STI.getFeatureBits() & Mips::FeatureNoOddSPReg);
}
@@ -292,10 +386,10 @@ public:
return STI.getFeatureBits() & Mips::FeatureMips16;
}
// TODO: see how can we get this info.
- bool mipsSEUsesSoftFloat() const { return false; }
+ bool abiUsesSoftFloat() const { return false; }
/// Warn if RegNo is the current assembler temporary.
- void WarnIfAssemblerTemporary(int RegNo, SMLoc Loc);
+ void warnIfAssemblerTemporary(int RegNo, SMLoc Loc);
};
}
@@ -333,7 +427,8 @@ private:
k_Memory, /// Base + Offset Memory Address
k_PhysRegister, /// A physical register from the Mips namespace
k_RegisterIndex, /// A register index in one or more RegKind.
- k_Token /// A simple token
+ k_Token, /// A simple token
+ k_RegList /// A physical register list
} Kind;
public:
@@ -368,12 +463,17 @@ private:
const MCExpr *Off;
};
+ struct RegListOp {
+ SmallVector<unsigned, 10> *List;
+ };
+
union {
struct Token Tok;
struct PhysRegOp PhysReg;
struct RegIdxOp RegIdx;
struct ImmOp Imm;
struct MemOp Mem;
+ struct RegListOp RegList;
};
SMLoc StartLoc, EndLoc;
@@ -397,7 +497,15 @@ public:
/// target.
unsigned getGPR32Reg() const {
assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
- AsmParser.WarnIfAssemblerTemporary(RegIdx.Index, StartLoc);
+ AsmParser.warnIfAssemblerTemporary(RegIdx.Index, StartLoc);
+ unsigned ClassID = Mips::GPR32RegClassID;
+ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
+ }
+
+ /// Coerce the register to GPR32 and return the real register for the current
+ /// target.
+ unsigned getGPRMM16Reg() const {
+ assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
unsigned ClassID = Mips::GPR32RegClassID;
return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
}
@@ -550,6 +658,11 @@ public:
Inst.addOperand(MCOperand::CreateReg(getGPR32Reg()));
}
+ void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getGPRMM16Reg()));
+ }
+
/// Render the operand to an MCInst as a GPR64
/// Asserts if the wrong number of operands are requested, or the operand
/// is not a k_RegisterIndex compatible with RegKind_GPR
@@ -572,7 +685,7 @@ public:
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getFGR32Reg()));
// FIXME: We ought to do this for -integrated-as without -via-file-asm too.
- if (!AsmParser.allowOddSPReg() && RegIdx.Index & 1)
+ if (!AsmParser.useOddSPReg() && RegIdx.Index & 1)
AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
"registers");
}
@@ -647,6 +760,13 @@ public:
addExpr(Inst, Expr);
}
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ for (auto RegNo : getRegList())
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+ }
+
bool isReg() const override {
// As a special case until we sort out the definition of div/divu, pretend
// that $0/$zero are k_PhysRegister so that MCK_ZERO works correctly.
@@ -679,6 +799,7 @@ public:
int64_t Val = getConstantImm();
return 1 <= Val && Val <= 4;
}
+ bool isRegList() const { return Kind == k_RegList; }
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
@@ -720,6 +841,11 @@ public:
return static_cast<const MCConstantExpr *>(getMemOff())->getValue();
}
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == k_RegList) && "Invalid access!");
+ return *(RegList.List);
+ }
+
static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
MipsAsmParser &Parser) {
auto Op = make_unique<MipsOperand>(k_Token, Parser);
@@ -733,16 +859,16 @@ public:
/// Create a numeric register (e.g. $1). The exact register remains
/// unresolved until an instruction successfully matches
static std::unique_ptr<MipsOperand>
- CreateNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ createNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
- DEBUG(dbgs() << "CreateNumericReg(" << Index << ", ...)\n");
+ DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n");
return CreateReg(Index, RegKind_Numeric, RegInfo, S, E, Parser);
}
/// Create a register that is definitely a GPR.
/// This is typically only used for named registers such as $gp.
static std::unique_ptr<MipsOperand>
- CreateGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_GPR, RegInfo, S, E, Parser);
}
@@ -750,15 +876,23 @@ public:
/// Create a register that is definitely a FGR.
/// This is typically only used for named registers such as $f0.
static std::unique_ptr<MipsOperand>
- CreateFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FGR, RegInfo, S, E, Parser);
}
+ /// Create a register that is definitely a HWReg.
+ /// This is typically only used for named registers such as $hwr_cpunum.
+ static std::unique_ptr<MipsOperand>
+ createHWRegsReg(unsigned Index, const MCRegisterInfo *RegInfo,
+ SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+ return CreateReg(Index, RegKind_HWRegs, RegInfo, S, E, Parser);
+ }
+
/// Create a register that is definitely an FCC.
/// This is typically only used for named registers such as $fcc0.
static std::unique_ptr<MipsOperand>
- CreateFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FCC, RegInfo, S, E, Parser);
}
@@ -766,7 +900,7 @@ public:
/// Create a register that is definitely an ACC.
/// This is typically only used for named registers such as $ac0.
static std::unique_ptr<MipsOperand>
- CreateACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ createACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_ACC, RegInfo, S, E, Parser);
}
@@ -774,7 +908,7 @@ public:
/// Create a register that is definitely an MSA128.
/// This is typically only used for named registers such as $w0.
static std::unique_ptr<MipsOperand>
- CreateMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ createMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSA128, RegInfo, S, E, Parser);
}
@@ -782,7 +916,7 @@ public:
/// Create a register that is definitely an MSACtrl.
/// This is typically only used for named registers such as $msaaccess.
static std::unique_ptr<MipsOperand>
- CreateMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ createMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSACtrl, RegInfo, S, E, Parser);
}
@@ -807,9 +941,29 @@ public:
return Op;
}
+ static std::unique_ptr<MipsOperand>
+ CreateRegList(SmallVectorImpl<unsigned> &Regs, SMLoc StartLoc, SMLoc EndLoc,
+ MipsAsmParser &Parser) {
+ assert (Regs.size() > 0 && "Empty list not allowed");
+
+ auto Op = make_unique<MipsOperand>(k_RegList, Parser);
+ Op->RegList.List = new SmallVector<unsigned, 10>();
+ for (auto Reg : Regs)
+ Op->RegList.List->push_back(Reg);
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
bool isGPRAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31;
}
+ bool isMM16AsmReg() const {
+ if (!(isRegIdx() && RegIdx.Kind))
+ return false;
+ return ((RegIdx.Index >= 2 && RegIdx.Index <= 7)
+ || RegIdx.Index == 16 || RegIdx.Index == 17);
+ }
bool isFGRAsmReg() const {
// AFGR64 is $0-$15 but we handle this in getAFGR64()
return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
@@ -855,6 +1009,8 @@ public:
case k_Memory:
delete Mem.Base;
break;
+ case k_RegList:
+ delete RegList.List;
case k_PhysRegister:
case k_RegisterIndex:
case k_Token:
@@ -885,6 +1041,12 @@ public:
case k_Token:
OS << Tok.Data;
break;
+ case k_RegList:
+ OS << "RegList< ";
+ for (auto Reg : (*RegList.List))
+ OS << Reg << " ";
+ OS << ">";
+ break;
}
}
}; // class MipsOperand
@@ -897,6 +1059,19 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return MipsInsts[Opcode];
}
+static bool hasShortDelaySlot(unsigned Opcode) {
+ switch (Opcode) {
+ case Mips::JALS_MM:
+ case Mips::JALRS_MM:
+ case Mips::JALRS16_MM:
+ case Mips::BGEZALS_MM:
+ case Mips::BLTZALS_MM:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
@@ -961,15 +1136,21 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
"nop instruction");
}
- if (MCID.hasDelaySlot() && Options.isReorder()) {
+ if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
Instructions.push_back(Inst);
MCInst NopInst;
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateImm(0));
+ if (hasShortDelaySlot(Inst.getOpcode())) {
+ NopInst.setOpcode(Mips::MOVE16_MM);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ } else {
+ NopInst.setOpcode(Mips::SLL);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateImm(0));
+ }
Instructions.push_back(NopInst);
return false;
}
@@ -1008,6 +1189,80 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
} // for
} // if load/store
+ // TODO: Handle this with the AsmOperandClass.PredicateMethod.
+ if (inMicroMipsMode()) {
+ MCOperand Opnd;
+ int Imm;
+
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case Mips::ADDIUS5_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < -8 || Imm > 7)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ADDIUSP_MM:
+ Opnd = Inst.getOperand(0);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < -1032 || Imm > 1028 || (Imm < 8 && Imm > -12) ||
+ Imm % 4 != 0)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::SLL16_MM:
+ case Mips::SRL16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < 1 || Imm > 8)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::LI16_MM:
+ Opnd = Inst.getOperand(1);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (Imm < -1 || Imm > 126)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ADDIUR2_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (!(Imm == 1 || Imm == -1 ||
+ ((Imm % 4 == 0) && Imm < 28 && Imm > 0)))
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ADDIUR1SP_MM:
+ Opnd = Inst.getOperand(1);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (OffsetToAlignment(Imm, 4LL))
+ return Error(IDLoc, "misaligned immediate operand value");
+ if (Imm < 0 || Imm > 255)
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ case Mips::ANDI16_MM:
+ Opnd = Inst.getOperand(2);
+ if (!Opnd.isImm())
+ return Error(IDLoc, "expected immediate operand kind");
+ Imm = Opnd.getImm();
+ if (!(Imm == 128 || (Imm >= 1 && Imm <= 4) || Imm == 7 || Imm == 8 ||
+ Imm == 15 || Imm == 16 || Imm == 31 || Imm == 32 || Imm == 63 ||
+ Imm == 64 || Imm == 255 || Imm == 32768 || Imm == 65535))
+ return Error(IDLoc, "immediate operand value out of range");
+ break;
+ }
+ }
+
if (needsExpansion(Inst))
return expandInstruction(Inst, IDLoc, Instructions);
else
@@ -1039,7 +1294,7 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
return expandLoadImm(Inst, IDLoc, Instructions);
case Mips::LoadImm64Reg:
if (!isGP64bit()) {
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
return expandLoadImm(Inst, IDLoc, Instructions);
@@ -1051,8 +1306,8 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
}
namespace {
-template <int Shift, bool PerformShift>
-void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+template <bool PerformShift>
+void createShiftOr(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
if (PerformShift) {
@@ -1067,11 +1322,18 @@ void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegNo));
tmpInst.addOperand(MCOperand::CreateReg(RegNo));
- tmpInst.addOperand(
- MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)));
+ tmpInst.addOperand(Operand);
tmpInst.setLoc(IDLoc);
Instructions.push_back(tmpInst);
}
+
+template <int Shift, bool PerformShift>
+void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ createShiftOr<PerformShift>(
+ MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)), RegNo,
+ IDLoc, Instructions);
+}
}
bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
@@ -1114,7 +1376,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
} else if ((ImmValue & (0xffffLL << 48)) == 0) {
if (!isGP64bit()) {
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
@@ -1141,7 +1403,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
} else {
if (!isGP64bit()) {
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
@@ -1176,7 +1438,12 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(2);
- assert(ImmOp.isImm() && "expected immediate operand kind");
+ assert((ImmOp.isImm() || ImmOp.isExpr()) &&
+ "expected immediate operand kind");
+ if (!ImmOp.isImm()) {
+ expandLoadAddressSym(Inst, IDLoc, Instructions);
+ return false;
+ }
const MCOperand &SrcRegOp = Inst.getOperand(1);
assert(SrcRegOp.isReg() && "expected register operand kind");
const MCOperand &DstRegOp = Inst.getOperand(0);
@@ -1220,7 +1487,12 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
- assert(ImmOp.isImm() && "expected immediate operand kind");
+ assert((ImmOp.isImm() || ImmOp.isExpr()) &&
+ "expected immediate operand kind");
+ if (!ImmOp.isImm()) {
+ expandLoadAddressSym(Inst, IDLoc, Instructions);
+ return false;
+ }
const MCOperand &RegOp = Inst.getOperand(0);
assert(RegOp.isReg() && "expected register operand kind");
int ImmValue = ImmOp.getImm();
@@ -1250,6 +1522,71 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
return false;
}
+void
+MipsAsmParser::expandLoadAddressSym(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ // FIXME: If we do have a valid at register to use, we should generate a
+ // slightly shorter sequence here.
+ MCInst tmpInst;
+ int ExprOperandNo = 1;
+ // Sometimes the assembly parser will get the immediate expression as
+ // a $zero + an immediate.
+ if (Inst.getNumOperands() == 3) {
+ assert(Inst.getOperand(1).getReg() ==
+ (isGP64bit() ? Mips::ZERO_64 : Mips::ZERO));
+ ExprOperandNo = 2;
+ }
+ const MCOperand &SymOp = Inst.getOperand(ExprOperandNo);
+ assert(SymOp.isExpr() && "expected symbol operand kind");
+ const MCOperand &RegOp = Inst.getOperand(0);
+ unsigned RegNo = RegOp.getReg();
+ const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymOp.getExpr());
+ const MCSymbolRefExpr *HiExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
+ const MCSymbolRefExpr *LoExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+ if (isGP64bit()) {
+ // If it's a 64-bit architecture, expand to:
+ // la d,sym => lui d,highest(sym)
+ // ori d,d,higher(sym)
+ // dsll d,d,16
+ // ori d,d,hi16(sym)
+ // dsll d,d,16
+ // ori d,d,lo16(sym)
+ const MCSymbolRefExpr *HighestExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
+ const MCSymbolRefExpr *HigherExpr =
+ MCSymbolRefExpr::Create(Symbol->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
+
+ tmpInst.setOpcode(Mips::LUi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateExpr(HighestExpr));
+ Instructions.push_back(tmpInst);
+
+ createShiftOr<false>(MCOperand::CreateExpr(HigherExpr), RegNo, SMLoc(),
+ Instructions);
+ createShiftOr<true>(MCOperand::CreateExpr(HiExpr), RegNo, SMLoc(),
+ Instructions);
+ createShiftOr<true>(MCOperand::CreateExpr(LoExpr), RegNo, SMLoc(),
+ Instructions);
+ } else {
+ // Otherwise, expand to:
+ // la d,sym => lui d,hi16(sym)
+ // ori d,d,lo16(sym)
+ tmpInst.setOpcode(Mips::LUi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ Instructions.push_back(tmpInst);
+
+ createShiftOr<false>(MCOperand::CreateExpr(LoExpr), RegNo, SMLoc(),
+ Instructions);
+ }
+}
+
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions,
bool isLoad, bool isImmOpnd) {
@@ -1381,7 +1718,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
@@ -1404,7 +1741,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -1423,16 +1760,25 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
-void MipsAsmParser::WarnIfAssemblerTemporary(int RegIndex, SMLoc Loc) {
- if ((RegIndex != 0) && ((int)Options.getATRegNum() == RegIndex)) {
+void MipsAsmParser::warnIfAssemblerTemporary(int RegIndex, SMLoc Loc) {
+ if ((RegIndex != 0) &&
+ ((int)AssemblerOptions.back()->getATRegNum() == RegIndex)) {
if (RegIndex == 1)
- Warning(Loc, "Used $at without \".set noat\"");
+ Warning(Loc, "used $at without \".set noat\"");
else
- Warning(Loc, Twine("Used $") + Twine(RegIndex) + " with \".set at=$" +
+ Warning(Loc, Twine("used $") + Twine(RegIndex) + " with \".set at=$" +
Twine(RegIndex) + "\"");
}
}
+void
+MipsAsmParser::printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
+ SMRange Range, bool ShowColors) {
+ getSourceManager().PrintMessage(Range.Start, SourceMgr::DK_Warning, Msg,
+ Range, SMFixIt(Range, FixMsg),
+ ShowColors);
+}
+
int MipsAsmParser::matchCPURegisterName(StringRef Name) {
int CC;
@@ -1472,23 +1818,55 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
.Case("t9", 25)
.Default(-1);
- if (isABI_N32() || isABI_N64()) {
- // Although SGI documentation just cuts out t0-t3 for n32/n64,
- // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
- // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
- if (8 <= CC && CC <= 11)
- CC += 4;
+ if (!(isABI_N32() || isABI_N64()))
+ return CC;
+
+ if (12 <= CC && CC <= 15) {
+ // Name is one of t4-t7
+ AsmToken RegTok = getLexer().peekTok();
+ SMRange RegRange = RegTok.getLocRange();
+
+ StringRef FixedName = StringSwitch<StringRef>(Name)
+ .Case("t4", "t0")
+ .Case("t5", "t1")
+ .Case("t6", "t2")
+ .Case("t7", "t3")
+ .Default("");
+ assert(FixedName != "" && "Register name is not one of t4-t7.");
+
+ printWarningWithFixIt("register names $t4-$t7 are only available in O32.",
+ "Did you mean $" + FixedName + "?", RegRange);
+ }
+
+ // Although SGI documentation just cuts out t0-t3 for n32/n64,
+ // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
+ // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
+ if (8 <= CC && CC <= 11)
+ CC += 4;
+
+ if (CC == -1)
+ CC = StringSwitch<unsigned>(Name)
+ .Case("a4", 8)
+ .Case("a5", 9)
+ .Case("a6", 10)
+ .Case("a7", 11)
+ .Case("kt0", 26)
+ .Case("kt1", 27)
+ .Default(-1);
- if (CC == -1)
- CC = StringSwitch<unsigned>(Name)
- .Case("a4", 8)
- .Case("a5", 9)
- .Case("a6", 10)
- .Case("a7", 11)
- .Case("kt0", 26)
- .Case("kt1", 27)
- .Default(-1);
- }
+ return CC;
+}
+
+int MipsAsmParser::matchHWRegsRegisterName(StringRef Name) {
+ int CC;
+
+ CC = StringSwitch<unsigned>(Name)
+ .Case("hwr_cpunum", 0)
+ .Case("hwr_synci_step", 1)
+ .Case("hwr_cc", 2)
+ .Case("hwr_ccres", 3)
+ .Case("hwr_ulr", 29)
+ .Default(-1);
return CC;
}
@@ -1568,15 +1946,15 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
if (Reg > 31)
return false;
- aTReg = Reg;
+ ATReg = Reg;
return true;
}
int MipsAsmParser::getATReg(SMLoc Loc) {
- int AT = Options.getATRegNum();
+ int AT = AssemblerOptions.back()->getATRegNum();
if (AT == 0)
reportParseError(Loc,
- "Pseudo instruction requires $at, which is not available");
+ "pseudo-instruction requires $at, which is not available");
return AT;
}
@@ -1597,8 +1975,9 @@ int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
return getReg(RegClass, RegNum);
}
-bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
- DEBUG(dbgs() << "ParseOperand\n");
+bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+ MCAsmParser &Parser = getParser();
+ DEBUG(dbgs() << "parseOperand\n");
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
@@ -1626,7 +2005,7 @@ bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
// for div, divu, and similar instructions because it is not an operand
// to the instruction definition but an explicit register. Special case
// this situation for now.
- if (ParseAnyRegister(Operands) != MatchOperand_NoMatch)
+ if (parseAnyRegister(Operands) != MatchOperand_NoMatch)
return false;
// Maybe it is a symbol reference.
@@ -1651,7 +2030,7 @@ bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
case AsmToken::Tilde:
case AsmToken::String: {
DEBUG(dbgs() << ".. generic integer\n");
- OperandMatchResultTy ResTy = ParseImm(Operands);
+ OperandMatchResultTy ResTy = parseImm(Operands);
return ResTy != MatchOperand_Success;
}
case AsmToken::Percent: {
@@ -1696,7 +2075,7 @@ const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
Val = ((MCE->getValue() + 0x800080008000LL) >> 48) & 0xffff;
break;
default:
- report_fatal_error("Unsupported reloc value!");
+ report_fatal_error("unsupported reloc value");
}
return MCConstantExpr::Create(Val, getContext());
}
@@ -1753,6 +2132,7 @@ bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
}
bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+ MCAsmParser &Parser = getParser();
Parser.Lex(); // Eat the % token.
const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
if (Tok.isNot(AsmToken::Identifier))
@@ -1797,7 +2177,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
- OperandMatchResultTy ResTy = ParseAnyRegister(Operands);
+ OperandMatchResultTy ResTy = parseAnyRegister(Operands);
if (ResTy == MatchOperand_Success) {
assert(Operands.size() == 1);
MipsOperand &Operand = static_cast<MipsOperand &>(*Operands.front());
@@ -1821,6 +2201,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
}
bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
+ MCAsmParser &Parser = getParser();
SMLoc S;
bool Result = true;
@@ -1850,6 +2231,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseMemOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
DEBUG(dbgs() << "parseMemOperand\n");
const MCExpr *IdVal = nullptr;
SMLoc S;
@@ -1882,7 +2264,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
// Zero register assumed, add a memory operand with ZERO as its base.
// "Base" will be managed by k_Memory.
- auto Base = MipsOperand::CreateGPRReg(0, getContext().getRegisterInfo(),
+ auto Base = MipsOperand::createGPRReg(0, getContext().getRegisterInfo(),
S, E, *this);
Operands.push_back(
MipsOperand::CreateMem(std::move(Base), IdVal, S, E, *this));
@@ -1895,7 +2277,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
Parser.Lex(); // Eat the '(' token.
}
- Res = ParseAnyRegister(Operands);
+ Res = parseAnyRegister(Operands);
if (Res != MatchOperand_Success)
return Res;
@@ -1932,7 +2314,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
}
bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
-
+ MCAsmParser &Parser = getParser();
MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
if (Sym) {
SMLoc S = Parser.getTok().getLoc();
@@ -1943,10 +2325,10 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
return false;
if (Expr->getKind() == MCExpr::SymbolRef) {
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
- const StringRef DefSymbol = Ref->getSymbol().getName();
+ StringRef DefSymbol = Ref->getSymbol().getName();
if (DefSymbol.startswith("$")) {
OperandMatchResultTy ResTy =
- MatchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
+ matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
if (ResTy == MatchOperand_Success) {
Parser.Lex();
return true;
@@ -1966,47 +2348,54 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
StringRef Identifier,
SMLoc S) {
int Index = matchCPURegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateGPRReg(
+ Operands.push_back(MipsOperand::createGPRReg(
+ Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+ return MatchOperand_Success;
+ }
+
+ Index = matchHWRegsRegisterName(Identifier);
+ if (Index != -1) {
+ Operands.push_back(MipsOperand::createHWRegsReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchFPURegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateFGRReg(
+ Operands.push_back(MipsOperand::createFGRReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchFCCRegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateFCCReg(
+ Operands.push_back(MipsOperand::createFCCReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchACRegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateACCReg(
+ Operands.push_back(MipsOperand::createACCReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchMSA128RegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateMSA128Reg(
+ Operands.push_back(MipsOperand::createMSA128Reg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
Index = matchMSA128CtrlRegisterName(Identifier);
if (Index != -1) {
- Operands.push_back(MipsOperand::CreateMSACtrlReg(
+ Operands.push_back(MipsOperand::createMSACtrlReg(
Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
return MatchOperand_Success;
}
@@ -2015,18 +2404,19 @@ MipsAsmParser::MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
+MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
+ MCAsmParser &Parser = getParser();
auto Token = Parser.getLexer().peekTok(false);
if (Token.is(AsmToken::Identifier)) {
DEBUG(dbgs() << ".. identifier\n");
StringRef Identifier = Token.getIdentifier();
OperandMatchResultTy ResTy =
- MatchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
+ matchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
return ResTy;
} else if (Token.is(AsmToken::Integer)) {
DEBUG(dbgs() << ".. integer\n");
- Operands.push_back(MipsOperand::CreateNumericReg(
+ Operands.push_back(MipsOperand::createNumericReg(
Token.getIntVal(), getContext().getRegisterInfo(), S, Token.getLoc(),
*this));
return MatchOperand_Success;
@@ -2038,8 +2428,9 @@ MipsAsmParser::MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
- DEBUG(dbgs() << "ParseAnyRegister\n");
+MipsAsmParser::parseAnyRegister(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ DEBUG(dbgs() << "parseAnyRegister\n");
auto Token = Parser.getTok();
@@ -2056,7 +2447,7 @@ MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
}
DEBUG(dbgs() << ".. $\n");
- OperandMatchResultTy ResTy = MatchAnyRegisterWithoutDollar(Operands, S);
+ OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S);
if (ResTy == MatchOperand_Success) {
Parser.Lex(); // $
Parser.Lex(); // identifier
@@ -2065,7 +2456,8 @@ MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseImm(OperandVector &Operands) {
+MipsAsmParser::parseImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -2089,18 +2481,19 @@ MipsAsmParser::ParseImm(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseJumpTarget(OperandVector &Operands) {
- DEBUG(dbgs() << "ParseJumpTarget\n");
+MipsAsmParser::parseJumpTarget(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ DEBUG(dbgs() << "parseJumpTarget\n");
SMLoc S = getLexer().getLoc();
// Integers and expressions are acceptable
- OperandMatchResultTy ResTy = ParseImm(Operands);
+ OperandMatchResultTy ResTy = parseImm(Operands);
if (ResTy != MatchOperand_NoMatch)
return ResTy;
// Registers are a valid target and have priority over symbols.
- ResTy = ParseAnyRegister(Operands);
+ ResTy = parseAnyRegister(Operands);
if (ResTy != MatchOperand_NoMatch)
return ResTy;
@@ -2116,6 +2509,7 @@ MipsAsmParser::ParseJumpTarget(OperandVector &Operands) {
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseInvNum(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
const MCExpr *IdVal;
// If the first token is '$' we may have register operand.
if (Parser.getTok().is(AsmToken::Dollar))
@@ -2133,7 +2527,8 @@ MipsAsmParser::parseInvNum(OperandVector &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseLSAImm(OperandVector &Operands) {
+MipsAsmParser::parseLSAImm(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -2171,6 +2566,82 @@ MipsAsmParser::ParseLSAImm(OperandVector &Operands) {
return MatchOperand_Success;
}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseRegisterList(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SmallVector<unsigned, 10> Regs;
+ unsigned RegNo;
+ unsigned PrevReg = Mips::NoRegister;
+ bool RegRange = false;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
+
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_ParseFail;
+
+ SMLoc S = Parser.getTok().getLoc();
+ while (parseAnyRegister(TmpOperands) == MatchOperand_Success) {
+ SMLoc E = getLexer().getLoc();
+ MipsOperand &Reg = static_cast<MipsOperand &>(*TmpOperands.back());
+ RegNo = isGP64bit() ? Reg.getGPR64Reg() : Reg.getGPR32Reg();
+ if (RegRange) {
+ // Remove last register operand because registers from register range
+ // should be inserted first.
+ if (RegNo == Mips::RA) {
+ Regs.push_back(RegNo);
+ } else {
+ unsigned TmpReg = PrevReg + 1;
+ while (TmpReg <= RegNo) {
+ if ((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) {
+ Error(E, "invalid register operand");
+ return MatchOperand_ParseFail;
+ }
+
+ PrevReg = TmpReg;
+ Regs.push_back(TmpReg++);
+ }
+ }
+
+ RegRange = false;
+ } else {
+ if ((PrevReg == Mips::NoRegister) && (RegNo != Mips::S0) &&
+ (RegNo != Mips::RA)) {
+ Error(E, "$16 or $31 expected");
+ return MatchOperand_ParseFail;
+ } else if (((RegNo < Mips::S0) || (RegNo > Mips::S7)) &&
+ (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ Error(E, "invalid register operand");
+ return MatchOperand_ParseFail;
+ } else if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) &&
+ (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ Error(E, "consecutive register numbers expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Regs.push_back(RegNo);
+ }
+
+ if (Parser.getTok().is(AsmToken::Minus))
+ RegRange = true;
+
+ if (!Parser.getTok().isNot(AsmToken::Minus) &&
+ !Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(E, "',' or '-' expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Lex(); // Consume comma or minus
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ break;
+
+ PrevReg = RegNo;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(MipsOperand::CreateRegList(Regs, S, E, *this));
+ parseMemOperand(Operands);
+ return MatchOperand_Success;
+}
+
MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
MCSymbolRefExpr::VariantKind VK =
@@ -2212,12 +2683,13 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
/// ::= '(', register, ')'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseParenSuffix(StringRef Name, OperandVector &Operands) {
+bool MipsAsmParser::parseParenSuffix(StringRef Name, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
if (getLexer().is(AsmToken::LParen)) {
Operands.push_back(
MipsOperand::CreateToken("(", getLexer().getLoc(), *this));
Parser.Lex();
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
@@ -2240,13 +2712,14 @@ bool MipsAsmParser::ParseParenSuffix(StringRef Name, OperandVector &Operands) {
/// ::= '[', integer, ']'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseBracketSuffix(StringRef Name,
+bool MipsAsmParser::parseBracketSuffix(StringRef Name,
OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
if (getLexer().is(AsmToken::LBrac)) {
Operands.push_back(
MipsOperand::CreateToken("[", getLexer().getLoc(), *this));
Parser.Lex();
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
@@ -2265,14 +2738,16 @@ bool MipsAsmParser::ParseBracketSuffix(StringRef Name,
bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
DEBUG(dbgs() << "ParseInstruction\n");
- // We have reached first instruction, module directive after
- // this is forbidden.
- getTargetStreamer().setCanHaveModuleDir(false);
+
+ // We have reached first instruction, module directive are now forbidden.
+ getTargetStreamer().forbidModuleDirective();
+
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
Parser.eatToEndOfStatement();
- return Error(NameLoc, "Unknown instruction");
+ return Error(NameLoc, "unknown instruction");
}
// First operand in MCInst is instruction mnemonic.
Operands.push_back(MipsOperand::CreateToken(Name, NameLoc, *this));
@@ -2280,29 +2755,29 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
- if (getLexer().is(AsmToken::LBrac) && ParseBracketSuffix(Name, Operands))
+ if (getLexer().is(AsmToken::LBrac) && parseBracketSuffix(Name, Operands))
return true;
// AFAIK, parenthesis suffixes are never on the first operand
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (ParseOperand(Operands, Name)) {
+ if (parseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
// Parse bracket and parenthesis suffixes before we iterate
if (getLexer().is(AsmToken::LBrac)) {
- if (ParseBracketSuffix(Name, Operands))
+ if (parseBracketSuffix(Name, Operands))
return true;
} else if (getLexer().is(AsmToken::LParen) &&
- ParseParenSuffix(Name, Operands))
+ parseParenSuffix(Name, Operands))
return true;
}
}
@@ -2316,6 +2791,7 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
bool MipsAsmParser::reportParseError(Twine ErrorMsg) {
+ MCAsmParser &Parser = getParser();
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, ErrorMsg);
@@ -2326,14 +2802,15 @@ bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) {
}
bool MipsAsmParser::parseSetNoAtDirective() {
+ MCAsmParser &Parser = getParser();
// Line should look like: ".set noat".
// set at reg to 0.
- Options.setATReg(0);
+ AssemblerOptions.back()->setATReg(0);
// eat noat
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
Parser.Lex(); // Consume the EndOfStatement.
@@ -2341,18 +2818,19 @@ bool MipsAsmParser::parseSetNoAtDirective() {
}
bool MipsAsmParser::parseSetAtDirective() {
+ MCAsmParser &Parser = getParser();
// Line can be .set at - defaults to $1
// or .set at=$reg
int AtRegNo;
getParser().Lex();
if (getLexer().is(AsmToken::EndOfStatement)) {
- Options.setATReg(1);
+ AssemblerOptions.back()->setATReg(1);
Parser.Lex(); // Consume the EndOfStatement.
return false;
} else if (getLexer().is(AsmToken::Equal)) {
getParser().Lex(); // Eat the '='.
if (getLexer().isNot(AsmToken::Dollar)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected dollar sign '$'");
return false;
}
Parser.Lex(); // Eat the '$'.
@@ -2362,7 +2840,7 @@ bool MipsAsmParser::parseSetAtDirective() {
} else if (Reg.is(AsmToken::Integer)) {
AtRegNo = Reg.getIntVal();
} else {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected identifier or integer");
return false;
}
@@ -2371,14 +2849,14 @@ bool MipsAsmParser::parseSetAtDirective() {
return false;
}
- if (!Options.setATReg(AtRegNo)) {
- reportParseError("unexpected token in statement");
+ if (!AssemblerOptions.back()->setATReg(AtRegNo)) {
+ reportParseError("invalid register");
return false;
}
getParser().Lex(); // Eat the register.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
Parser.Lex(); // Consume the EndOfStatement.
@@ -2390,72 +2868,138 @@ bool MipsAsmParser::parseSetAtDirective() {
}
bool MipsAsmParser::parseSetReorderDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- Options.setReorder();
+ AssemblerOptions.back()->setReorder();
getTargetStreamer().emitDirectiveSetReorder();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetNoReorderDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- Options.setNoreorder();
+ AssemblerOptions.back()->setNoReorder();
getTargetStreamer().emitDirectiveSetNoReorder();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetMacroDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- Options.setMacro();
+ AssemblerOptions.back()->setMacro();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetNoMacroDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("`noreorder' must be set before `nomacro'");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- if (Options.isReorder()) {
+ if (AssemblerOptions.back()->isReorder()) {
reportParseError("`noreorder' must be set before `nomacro'");
return false;
}
- Options.setNomacro();
+ AssemblerOptions.back()->setNoMacro();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
-bool MipsAsmParser::parseSetNoMips16Directive() {
+bool MipsAsmParser::parseSetMsaDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ setFeatureBits(Mips::FeatureMSA, "msa");
+ getTargetStreamer().emitDirectiveSetMsa();
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoMsaDirective() {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ clearFeatureBits(Mips::FeatureMSA, "msa");
+ getTargetStreamer().emitDirectiveSetNoMsa();
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoDspDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "nodsp".
+
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
- // For now do nothing.
+
+ clearFeatureBits(Mips::FeatureDSP, "dsp");
+ getTargetStreamer().emitDirectiveSetNoDsp();
+ return false;
+}
+
+bool MipsAsmParser::parseSetMips16Directive() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "mips16".
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ setFeatureBits(Mips::FeatureMips16, "mips16");
+ getTargetStreamer().emitDirectiveSetMips16();
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MipsAsmParser::parseSetNoMips16Directive() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex(); // Eat "nomips16".
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ clearFeatureBits(Mips::FeatureMips16, "mips16");
+ getTargetStreamer().emitDirectiveSetNoMips16();
Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetFpDirective() {
+ MCAsmParser &Parser = getParser();
MipsABIFlagsSection::FpABIKind FpAbiVal;
// Line can be: .set fp=32
// .set fp=xx
@@ -2463,7 +3007,7 @@ bool MipsAsmParser::parseSetFpDirective() {
Parser.Lex(); // Eat fp token
AsmToken Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Equal)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected equals sign '='");
return false;
}
Parser.Lex(); // Eat '=' token.
@@ -2473,7 +3017,7 @@ bool MipsAsmParser::parseSetFpDirective() {
return false;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
getTargetStreamer().emitDirectiveSetFp(FpAbiVal);
@@ -2481,15 +3025,50 @@ bool MipsAsmParser::parseSetFpDirective() {
return false;
}
+bool MipsAsmParser::parseSetPopDirective() {
+ MCAsmParser &Parser = getParser();
+ SMLoc Loc = getLexer().getLoc();
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ // Always keep an element on the options "stack" to prevent the user
+ // from changing the initial options. This is how we remember them.
+ if (AssemblerOptions.size() == 2)
+ return reportParseError(Loc, ".set pop with no .set push");
+
+ AssemblerOptions.pop_back();
+ setAvailableFeatures(AssemblerOptions.back()->getFeatures());
+
+ getTargetStreamer().emitDirectiveSetPop();
+ return false;
+}
+
+bool MipsAsmParser::parseSetPushDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ // Create a copy of the current assembler options environment and push it.
+ AssemblerOptions.push_back(
+ make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
+
+ getTargetStreamer().emitDirectiveSetPush();
+ return false;
+}
+
bool MipsAsmParser::parseSetAssignment() {
StringRef Name;
const MCExpr *Value;
+ MCAsmParser &Parser = getParser();
if (Parser.parseIdentifier(Name))
reportParseError("expected identifier after .set");
if (getLexer().isNot(AsmToken::Comma))
- return reportParseError("unexpected token in .set directive");
+ return reportParseError("unexpected token, expected comma");
Lex(); // Eat comma
if (Parser.parseExpression(Value))
@@ -2505,10 +3084,61 @@ bool MipsAsmParser::parseSetAssignment() {
return false;
}
+bool MipsAsmParser::parseSetMips0Directive() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return reportParseError("unexpected token, expected end of statement");
+
+ // Reset assembler options to their initial values.
+ setAvailableFeatures(AssemblerOptions.front()->getFeatures());
+ AssemblerOptions.back()->setFeatures(AssemblerOptions.front()->getFeatures());
+
+ getTargetStreamer().emitDirectiveSetMips0();
+ return false;
+}
+
+bool MipsAsmParser::parseSetArchDirective() {
+ MCAsmParser &Parser = getParser();
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Equal))
+ return reportParseError("unexpected token, expected equals sign");
+
+ Parser.Lex();
+ StringRef Arch;
+ if (Parser.parseIdentifier(Arch))
+ return reportParseError("expected arch identifier");
+
+ StringRef ArchFeatureName =
+ StringSwitch<StringRef>(Arch)
+ .Case("mips1", "mips1")
+ .Case("mips2", "mips2")
+ .Case("mips3", "mips3")
+ .Case("mips4", "mips4")
+ .Case("mips5", "mips5")
+ .Case("mips32", "mips32")
+ .Case("mips32r2", "mips32r2")
+ .Case("mips32r6", "mips32r6")
+ .Case("mips64", "mips64")
+ .Case("mips64r2", "mips64r2")
+ .Case("mips64r6", "mips64r6")
+ .Case("cnmips", "cnmips")
+ .Case("r4000", "mips3") // This is an implementation of Mips3.
+ .Default("");
+
+ if (ArchFeatureName.empty())
+ return reportParseError("unsupported architecture");
+
+ selectArch(ArchFeatureName);
+ getTargetStreamer().emitDirectiveSetArch(Arch);
+ return false;
+}
+
bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
+ MCAsmParser &Parser = getParser();
Parser.Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
- return reportParseError("unexpected token in .set directive");
+ return reportParseError("unexpected token, expected end of statement");
switch (Feature) {
default:
@@ -2520,26 +3150,56 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
case Mips::FeatureMicroMips:
getTargetStreamer().emitDirectiveSetMicroMips();
break;
- case Mips::FeatureMips16:
- getTargetStreamer().emitDirectiveSetMips16();
+ case Mips::FeatureMips1:
+ selectArch("mips1");
+ getTargetStreamer().emitDirectiveSetMips1();
+ break;
+ case Mips::FeatureMips2:
+ selectArch("mips2");
+ getTargetStreamer().emitDirectiveSetMips2();
+ break;
+ case Mips::FeatureMips3:
+ selectArch("mips3");
+ getTargetStreamer().emitDirectiveSetMips3();
+ break;
+ case Mips::FeatureMips4:
+ selectArch("mips4");
+ getTargetStreamer().emitDirectiveSetMips4();
+ break;
+ case Mips::FeatureMips5:
+ selectArch("mips5");
+ getTargetStreamer().emitDirectiveSetMips5();
+ break;
+ case Mips::FeatureMips32:
+ selectArch("mips32");
+ getTargetStreamer().emitDirectiveSetMips32();
break;
case Mips::FeatureMips32r2:
- setFeatureBits(Mips::FeatureMips32r2, "mips32r2");
+ selectArch("mips32r2");
getTargetStreamer().emitDirectiveSetMips32R2();
break;
+ case Mips::FeatureMips32r6:
+ selectArch("mips32r6");
+ getTargetStreamer().emitDirectiveSetMips32R6();
+ break;
case Mips::FeatureMips64:
- setFeatureBits(Mips::FeatureMips64, "mips64");
+ selectArch("mips64");
getTargetStreamer().emitDirectiveSetMips64();
break;
case Mips::FeatureMips64r2:
- setFeatureBits(Mips::FeatureMips64r2, "mips64r2");
+ selectArch("mips64r2");
getTargetStreamer().emitDirectiveSetMips64R2();
break;
+ case Mips::FeatureMips64r6:
+ selectArch("mips64r6");
+ getTargetStreamer().emitDirectiveSetMips64R6();
+ break;
}
return false;
}
bool MipsAsmParser::eatComma(StringRef ErrorStr) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Comma)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
@@ -2550,14 +3210,17 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) {
return true;
}
-bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) {
- if (Options.isReorder())
- Warning(Loc, ".cpload in reorder section");
+bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
+ if (AssemblerOptions.back()->isReorder())
+ Warning(Loc, ".cpload should be inside a noreorder section");
- // FIXME: Warn if cpload is used in Mips16 mode.
+ if (inMips16Mode()) {
+ reportParseError(".cpload is not supported in Mips16 mode");
+ return false;
+ }
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
- OperandMatchResultTy ResTy = ParseAnyRegister(Reg);
+ OperandMatchResultTy ResTy = parseAnyRegister(Reg);
if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
reportParseError("expected register containing function address");
return false;
@@ -2569,17 +3232,24 @@ bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) {
return false;
}
- getTargetStreamer().emitDirectiveCpload(RegOpnd.getGPR32Reg());
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveCpLoad(RegOpnd.getGPR32Reg());
return false;
}
bool MipsAsmParser::parseDirectiveCPSetup() {
+ MCAsmParser &Parser = getParser();
unsigned FuncReg;
unsigned Save;
bool SaveIsReg = true;
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
- OperandMatchResultTy ResTy = ParseAnyRegister(TmpReg);
+ OperandMatchResultTy ResTy = parseAnyRegister(TmpReg);
if (ResTy == MatchOperand_NoMatch) {
reportParseError("expected register containing function address");
Parser.eatToEndOfStatement();
@@ -2596,10 +3266,10 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
FuncReg = FuncRegOpnd.getGPR32Reg();
TmpReg.clear();
- if (!eatComma("expected comma parsing directive"))
+ if (!eatComma("unexpected token, expected comma"))
return true;
- ResTy = ParseAnyRegister(TmpReg);
+ ResTy = parseAnyRegister(TmpReg);
if (ResTy == MatchOperand_NoMatch) {
const AsmToken &Tok = Parser.getTok();
if (Tok.is(AsmToken::Integer)) {
@@ -2621,7 +3291,7 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
Save = SaveOpnd.getGPR32Reg();
}
- if (!eatComma("expected comma parsing directive"))
+ if (!eatComma("unexpected token, expected comma"))
return true;
StringRef Name;
@@ -2634,6 +3304,7 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
}
bool MipsAsmParser::parseDirectiveNaN() {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
const AsmToken &Tok = Parser.getTok();
@@ -2654,7 +3325,7 @@ bool MipsAsmParser::parseDirectiveNaN() {
}
bool MipsAsmParser::parseDirectiveSet() {
-
+ MCAsmParser &Parser = getParser();
// Get the next token.
const AsmToken &Tok = Parser.getTok();
@@ -2662,8 +3333,14 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetNoAtDirective();
} else if (Tok.getString() == "at") {
return parseSetAtDirective();
+ } else if (Tok.getString() == "arch") {
+ return parseSetArchDirective();
} else if (Tok.getString() == "fp") {
return parseSetFpDirective();
+ } else if (Tok.getString() == "pop") {
+ return parseSetPopDirective();
+ } else if (Tok.getString() == "push") {
+ return parseSetPushDirective();
} else if (Tok.getString() == "reorder") {
return parseSetReorderDirective();
} else if (Tok.getString() == "noreorder") {
@@ -2673,7 +3350,7 @@ bool MipsAsmParser::parseDirectiveSet() {
} else if (Tok.getString() == "nomacro") {
return parseSetNoMacroDirective();
} else if (Tok.getString() == "mips16") {
- return parseSetFeature(Mips::FeatureMips16);
+ return parseSetMips16Directive();
} else if (Tok.getString() == "nomips16") {
return parseSetNoMips16Directive();
} else if (Tok.getString() == "nomicromips") {
@@ -2682,14 +3359,38 @@ bool MipsAsmParser::parseDirectiveSet() {
return false;
} else if (Tok.getString() == "micromips") {
return parseSetFeature(Mips::FeatureMicroMips);
+ } else if (Tok.getString() == "mips0") {
+ return parseSetMips0Directive();
+ } else if (Tok.getString() == "mips1") {
+ return parseSetFeature(Mips::FeatureMips1);
+ } else if (Tok.getString() == "mips2") {
+ return parseSetFeature(Mips::FeatureMips2);
+ } else if (Tok.getString() == "mips3") {
+ return parseSetFeature(Mips::FeatureMips3);
+ } else if (Tok.getString() == "mips4") {
+ return parseSetFeature(Mips::FeatureMips4);
+ } else if (Tok.getString() == "mips5") {
+ return parseSetFeature(Mips::FeatureMips5);
+ } else if (Tok.getString() == "mips32") {
+ return parseSetFeature(Mips::FeatureMips32);
} else if (Tok.getString() == "mips32r2") {
return parseSetFeature(Mips::FeatureMips32r2);
+ } else if (Tok.getString() == "mips32r6") {
+ return parseSetFeature(Mips::FeatureMips32r6);
} else if (Tok.getString() == "mips64") {
return parseSetFeature(Mips::FeatureMips64);
} else if (Tok.getString() == "mips64r2") {
return parseSetFeature(Mips::FeatureMips64r2);
+ } else if (Tok.getString() == "mips64r6") {
+ return parseSetFeature(Mips::FeatureMips64r6);
} else if (Tok.getString() == "dsp") {
return parseSetFeature(Mips::FeatureDSP);
+ } else if (Tok.getString() == "nodsp") {
+ return parseSetNoDspDirective();
+ } else if (Tok.getString() == "msa") {
+ return parseSetMsaDirective();
+ } else if (Tok.getString() == "nomsa") {
+ return parseSetNoMsaDirective();
} else {
// It is just an identifier, look for an assignment.
parseSetAssignment();
@@ -2702,6 +3403,7 @@ bool MipsAsmParser::parseDirectiveSet() {
/// parseDataDirective
/// ::= .word [ expression (, expression)* ]
bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -2713,9 +3415,8 @@ bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
- // FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
- return Error(L, "unexpected token in directive");
+ return Error(L, "unexpected token, expected comma");
Parser.Lex();
}
}
@@ -2727,6 +3428,7 @@ bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
/// parseDirectiveGpWord
/// ::= .gpword local_sym
bool MipsAsmParser::parseDirectiveGpWord() {
+ MCAsmParser &Parser = getParser();
const MCExpr *Value;
// EmitGPRel32Value requires an expression, so we are using base class
// method to evaluate the expression.
@@ -2735,7 +3437,8 @@ bool MipsAsmParser::parseDirectiveGpWord() {
getParser().getStreamer().EmitGPRel32Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(getLexer().getLoc(), "unexpected token in directive");
+ return Error(getLexer().getLoc(),
+ "unexpected token, expected end of statement");
Parser.Lex(); // Eat EndOfStatement token.
return false;
}
@@ -2743,6 +3446,7 @@ bool MipsAsmParser::parseDirectiveGpWord() {
/// parseDirectiveGpDWord
/// ::= .gpdword local_sym
bool MipsAsmParser::parseDirectiveGpDWord() {
+ MCAsmParser &Parser = getParser();
const MCExpr *Value;
// EmitGPRel64Value requires an expression, so we are using base class
// method to evaluate the expression.
@@ -2751,17 +3455,19 @@ bool MipsAsmParser::parseDirectiveGpDWord() {
getParser().getStreamer().EmitGPRel64Value(Value);
if (getLexer().isNot(AsmToken::EndOfStatement))
- return Error(getLexer().getLoc(), "unexpected token in directive");
+ return Error(getLexer().getLoc(),
+ "unexpected token, expected end of statement");
Parser.Lex(); // Eat EndOfStatement token.
return false;
}
bool MipsAsmParser::parseDirectiveOption() {
+ MCAsmParser &Parser = getParser();
// Get the option token.
AsmToken Tok = Parser.getTok();
// At the moment only identifiers are supported.
if (Tok.isNot(AsmToken::Identifier)) {
- Error(Parser.getTok().getLoc(), "unexpected token in .option directive");
+ Error(Parser.getTok().getLoc(), "unexpected token, expected identifier");
Parser.eatToEndOfStatement();
return false;
}
@@ -2773,7 +3479,7 @@ bool MipsAsmParser::parseDirectiveOption() {
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
Error(Parser.getTok().getLoc(),
- "unexpected token in .option pic0 directive");
+ "unexpected token, expected end of statement");
Parser.eatToEndOfStatement();
}
return false;
@@ -2784,14 +3490,15 @@ bool MipsAsmParser::parseDirectiveOption() {
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
Error(Parser.getTok().getLoc(),
- "unexpected token in .option pic2 directive");
+ "unexpected token, expected end of statement");
Parser.eatToEndOfStatement();
}
return false;
}
// Unknown option.
- Warning(Parser.getTok().getLoc(), "unknown option in .option directive");
+ Warning(Parser.getTok().getLoc(),
+ "unknown option, expected 'pic0' or 'pic2'");
Parser.eatToEndOfStatement();
return false;
}
@@ -2801,10 +3508,11 @@ bool MipsAsmParser::parseDirectiveOption() {
/// ::= .module nooddspreg
/// ::= .module fp=value
bool MipsAsmParser::parseDirectiveModule() {
+ MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
SMLoc L = Lexer.getLoc();
- if (!getTargetStreamer().getCanHaveModuleDir()) {
+ if (!getTargetStreamer().isModuleDirectiveAllowed()) {
// TODO : get a better message.
reportParseError(".module directive must appear before any code");
return false;
@@ -2819,7 +3527,7 @@ bool MipsAsmParser::parseDirectiveModule() {
clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("Expected end of statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
@@ -2834,7 +3542,7 @@ bool MipsAsmParser::parseDirectiveModule() {
setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("Expected end of statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
@@ -2854,10 +3562,11 @@ bool MipsAsmParser::parseDirectiveModule() {
/// ::= =xx
/// ::= =64
bool MipsAsmParser::parseDirectiveModuleFP() {
+ MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
if (Lexer.isNot(AsmToken::Equal)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected equals sign '='");
return false;
}
Parser.Lex(); // Eat '=' token.
@@ -2867,7 +3576,7 @@ bool MipsAsmParser::parseDirectiveModuleFP() {
return false;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
+ reportParseError("unexpected token, expected end of statement");
return false;
}
@@ -2879,6 +3588,7 @@ bool MipsAsmParser::parseDirectiveModuleFP() {
bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
StringRef Directive) {
+ MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
if (Lexer.is(AsmToken::Identifier)) {
@@ -2925,30 +3635,160 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
}
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
+ MCAsmParser &Parser = getParser();
StringRef IDVal = DirectiveID.getString();
if (IDVal == ".cpload")
- return parseDirectiveCPLoad(DirectiveID.getLoc());
+ return parseDirectiveCpLoad(DirectiveID.getLoc());
if (IDVal == ".dword") {
parseDataDirective(8, DirectiveID.getLoc());
return false;
}
-
if (IDVal == ".ent") {
- // Ignore this directive for now.
- Parser.Lex();
+ StringRef SymbolName;
+
+ if (Parser.parseIdentifier(SymbolName)) {
+ reportParseError("expected identifier after .ent");
+ return false;
+ }
+
+ // There's an undocumented extension that allows an integer to
+ // follow the name of the procedure which AFAICS is ignored by GAS.
+ // Example: .ent foo,2
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma)) {
+ // Even though we accept this undocumented extension for compatibility
+ // reasons, the additional integer argument does not actually change
+ // the behaviour of the '.ent' directive, so we would like to discourage
+ // its use. We do this by not referring to the extended version in
+ // error messages which are not directly related to its use.
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+ Parser.Lex(); // Eat the comma.
+ const MCExpr *DummyNumber;
+ int64_t DummyNumberVal;
+ // If the user was explicitly trying to use the extended version,
+ // we still give helpful extension-related error messages.
+ if (Parser.parseExpression(DummyNumber)) {
+ reportParseError("expected number after comma");
+ return false;
+ }
+ if (!DummyNumber->EvaluateAsAbsolute(DummyNumberVal)) {
+ reportParseError("expected an absolute expression after comma");
+ return false;
+ }
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+ getTargetStreamer().emitDirectiveEnt(*Sym);
+ CurrentFn = Sym;
return false;
}
if (IDVal == ".end") {
- // Ignore this directive for now.
- Parser.Lex();
+ StringRef SymbolName;
+
+ if (Parser.parseIdentifier(SymbolName)) {
+ reportParseError("expected identifier after .end");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ if (CurrentFn == nullptr) {
+ reportParseError(".end used without .ent");
+ return false;
+ }
+
+ if ((SymbolName != CurrentFn->getName())) {
+ reportParseError(".end symbol does not match .ent symbol");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveEnd(SymbolName);
+ CurrentFn = nullptr;
return false;
}
if (IDVal == ".frame") {
- // Ignore this directive for now.
- Parser.eatToEndOfStatement();
+ // .frame $stack_reg, frame_size_in_bytes, $return_reg
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
+ OperandMatchResultTy ResTy = parseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ reportParseError("expected stack register");
+ return false;
+ }
+
+ MipsOperand &StackRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!StackRegOpnd.isGPRAsmReg()) {
+ reportParseError(StackRegOpnd.getStartLoc(),
+ "expected general purpose register");
+ return false;
+ }
+ unsigned StackReg = StackRegOpnd.getGPR32Reg();
+
+ if (Parser.getTok().is(AsmToken::Comma))
+ Parser.Lex();
+ else {
+ reportParseError("unexpected token, expected comma");
+ return false;
+ }
+
+ // Parse the frame size.
+ const MCExpr *FrameSize;
+ int64_t FrameSizeVal;
+
+ if (Parser.parseExpression(FrameSize)) {
+ reportParseError("expected frame size value");
+ return false;
+ }
+
+ if (!FrameSize->EvaluateAsAbsolute(FrameSizeVal)) {
+ reportParseError("frame size not an absolute expression");
+ return false;
+ }
+
+ if (Parser.getTok().is(AsmToken::Comma))
+ Parser.Lex();
+ else {
+ reportParseError("unexpected token, expected comma");
+ return false;
+ }
+
+ // Parse the return register.
+ TmpReg.clear();
+ ResTy = parseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ reportParseError("expected return register");
+ return false;
+ }
+
+ MipsOperand &ReturnRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!ReturnRegOpnd.isGPRAsmReg()) {
+ reportParseError(ReturnRegOpnd.getStartLoc(),
+ "expected general purpose register");
+ return false;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ getTargetStreamer().emitFrame(StackReg, FrameSizeVal,
+ ReturnRegOpnd.getGPR32Reg());
return false;
}
@@ -2956,15 +3796,61 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveSet();
}
- if (IDVal == ".fmask") {
- // Ignore this directive for now.
- Parser.eatToEndOfStatement();
- return false;
- }
+ if (IDVal == ".mask" || IDVal == ".fmask") {
+ // .mask bitmask, frame_offset
+ // bitmask: One bit for each register used.
+ // frame_offset: Offset from Canonical Frame Address ($sp on entry) where
+ // first register is expected to be saved.
+ // Examples:
+ // .mask 0x80000000, -4
+ // .fmask 0x80000000, -4
+ //
- if (IDVal == ".mask") {
- // Ignore this directive for now.
- Parser.eatToEndOfStatement();
+ // Parse the bitmask
+ const MCExpr *BitMask;
+ int64_t BitMaskVal;
+
+ if (Parser.parseExpression(BitMask)) {
+ reportParseError("expected bitmask value");
+ return false;
+ }
+
+ if (!BitMask->EvaluateAsAbsolute(BitMaskVal)) {
+ reportParseError("bitmask not an absolute expression");
+ return false;
+ }
+
+ if (Parser.getTok().is(AsmToken::Comma))
+ Parser.Lex();
+ else {
+ reportParseError("unexpected token, expected comma");
+ return false;
+ }
+
+ // Parse the frame_offset
+ const MCExpr *FrameOffset;
+ int64_t FrameOffsetVal;
+
+ if (Parser.parseExpression(FrameOffset)) {
+ reportParseError("expected frame offset value");
+ return false;
+ }
+
+ if (!FrameOffset->EvaluateAsAbsolute(FrameOffsetVal)) {
+ reportParseError("frame offset not an absolute expression");
+ return false;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ if (IDVal == ".mask")
+ getTargetStreamer().emitMask(BitMaskVal, FrameOffsetVal);
+ else
+ getTargetStreamer().emitFMask(BitMaskVal, FrameOffsetVal);
return false;
}
@@ -2992,7 +3878,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".abicalls") {
getTargetStreamer().emitDirectiveAbiCalls();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Error(Parser.getTok().getLoc(), "unexpected token in directive");
+ Error(Parser.getTok().getLoc(),
+ "unexpected token, expected end of statement");
// Clear line
Parser.eatToEndOfStatement();
}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index bf67d71..1f201b0 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -3,8 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Mips.td)
tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM MipsGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MipsGenFastISel.inc -gen-fast-isel)
@@ -22,13 +21,13 @@ add_llvm_target(MipsCodeGen
Mips16ISelDAGToDAG.cpp
Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
+ MipsABIInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
- MipsCodeEmitter.cpp
+ MipsCCState.cpp
MipsConstantIslandPass.cpp
MipsDelaySlotFiller.cpp
MipsFastISel.cpp
- MipsJITInfo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
MipsISelLowering.cpp
diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt
index bb70fd3..414b4f7 100644
--- a/lib/Target/Mips/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = MipsDisassembler
parent = Mips
-required_libraries = MC MipsInfo Support
+required_libraries = MCDisassembler MipsInfo Support
add_to_library_groups = Mips
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 902b877..48904ce 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -20,7 +20,6 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -31,15 +30,14 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
-/// MipsDisassemblerBase - a disasembler class for Mips.
+/// A disasembler class for Mips.
class MipsDisassemblerBase : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
MipsDisassemblerBase(const MCSubtargetInfo &STI, MCContext &Ctx,
- bool bigEndian) :
- MCDisassembler(STI, Ctx),
- IsN64(STI.getFeatureBits() & Mips::FeatureN64), isBigEndian(bigEndian) {}
+ bool IsBigEndian)
+ : MCDisassembler(STI, Ctx),
+ IsN64(STI.getFeatureBits() & Mips::FeatureN64),
+ IsBigEndian(IsBigEndian) {}
virtual ~MipsDisassemblerBase() {}
@@ -48,15 +46,13 @@ public:
private:
bool IsN64;
protected:
- bool isBigEndian;
+ bool IsBigEndian;
};
-/// MipsDisassembler - a disasembler class for Mips32.
+/// A disasembler class for Mips32.
class MipsDisassembler : public MipsDisassemblerBase {
bool IsMicroMips;
public:
- /// Constructor - Initializes the disassembler.
- ///
MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian)
: MipsDisassemblerBase(STI, Ctx, bigEndian) {
IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
@@ -75,32 +71,23 @@ public:
return !hasMips32() && !hasMips3();
}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
-
-/// Mips64Disassembler - a disasembler class for Mips64.
+/// A disasembler class for Mips64.
class Mips64Disassembler : public MipsDisassemblerBase {
public:
- /// Constructor - Initializes the disassembler.
- ///
Mips64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
bool bigEndian) :
MipsDisassemblerBase(STI, Ctx, bigEndian) {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -117,6 +104,11 @@ static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -142,11 +134,6 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-
static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -255,6 +242,11 @@ static DecodeStatus DecodeMem(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCacheOp(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -272,6 +264,14 @@ static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFMem2(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -341,6 +341,10 @@ static DecodeStatus
DecodeBlezGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
namespace llvm {
extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
TheMips64elTarget;
@@ -456,7 +460,7 @@ static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rs >= Rt) {
@@ -495,7 +499,7 @@ static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rs >= Rt) {
@@ -535,7 +539,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rt == 0)
@@ -580,7 +584,7 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
if (Rt == 0)
return MCDisassembler::Fail;
@@ -622,7 +626,7 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
bool HasRt = false;
@@ -671,7 +675,7 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
- InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4;
bool HasRs = false;
if (Rt == 0)
@@ -696,43 +700,31 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
return MCDisassembler::Success;
}
- /// readInstruction - read four bytes from the MemoryObject
- /// and return 32 bit word sorted according to the given endianess
-static DecodeStatus readInstruction32(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint32_t &insn,
- bool isBigEndian,
- bool IsMicroMips) {
- uint8_t Bytes[4];
-
+/// Read four bytes from the ArrayRef and return 32 bit word sorted
+/// according to the given endianess
+static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint32_t &Insn,
+ bool IsBigEndian, bool IsMicroMips) {
// We want to read exactly 4 Bytes of data.
- if (region.readBytes(address, 4, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 4) {
+ Size = 0;
return MCDisassembler::Fail;
}
- if (isBigEndian) {
+ if (IsBigEndian) {
// Encoded as a big-endian 32-bit word in the stream.
- insn = (Bytes[3] << 0) |
- (Bytes[2] << 8) |
- (Bytes[1] << 16) |
- (Bytes[0] << 24);
- }
- else {
+ Insn =
+ (Bytes[3] << 0) | (Bytes[2] << 8) | (Bytes[1] << 16) | (Bytes[0] << 24);
+ } else {
// Encoded as a small-endian 32-bit word in the stream.
// Little-endian byte ordering:
// mips32r2: 4 | 3 | 2 | 1
// microMIPS: 2 | 1 | 4 | 3
if (IsMicroMips) {
- insn = (Bytes[2] << 0) |
- (Bytes[3] << 8) |
- (Bytes[0] << 16) |
+ Insn = (Bytes[2] << 0) | (Bytes[3] << 8) | (Bytes[0] << 16) |
(Bytes[1] << 24);
} else {
- insn = (Bytes[0] << 0) |
- (Bytes[1] << 8) |
- (Bytes[2] << 16) |
+ Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
(Bytes[3] << 24);
}
}
@@ -740,24 +732,22 @@ static DecodeStatus readInstruction32(const MemoryObject &region,
return MCDisassembler::Success;
}
-DecodeStatus
-MipsDisassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
uint32_t Insn;
- DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian, IsMicroMips);
+ DecodeStatus Result =
+ readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, IsMicroMips);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
if (IsMicroMips) {
DEBUG(dbgs() << "Trying MicroMips32 table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address,
+ Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
@@ -769,7 +759,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
if (hasCOP3()) {
DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
Result =
- decodeInstruction(DecoderTableCOP3_32, instr, Insn, Address, this, STI);
+ decodeInstruction(DecoderTableCOP3_32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -778,7 +768,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
if (hasMips32r6() && isGP64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (GPR64) table (32-bit opcodes):\n");
- Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, instr, Insn,
+ Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, Instr, Insn,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
@@ -788,7 +778,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
if (hasMips32r6()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 table (32-bit opcodes):\n");
- Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn,
+ Result = decodeInstruction(DecoderTableMips32r6_64r632, Instr, Insn,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
@@ -798,8 +788,8 @@ MipsDisassembler::getInstruction(MCInst &instr,
DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -808,30 +798,28 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
-DecodeStatus
-Mips64Disassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+DecodeStatus Mips64Disassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
uint32_t Insn;
- DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian, false);
+ DecodeStatus Result =
+ readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableMips6432, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
// If we fail to decode in Mips64 decoder space we can try in Mips32
- Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
@@ -862,6 +850,13 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeGPRMM16RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return MCDisassembler::Fail;
+}
+
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -914,18 +909,6 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- if (RegNo > 31)
- return MCDisassembler::Fail;
-
- unsigned Reg = getReg(Decoder, Mips::FGRH32RegClassID, RegNo);
- Inst.addOperand(MCOperand::CreateReg(Reg));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -981,6 +964,23 @@ static DecodeStatus DecodeMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCacheOp(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Hint = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ Inst.addOperand(MCOperand::CreateImm(Hint));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
@@ -1012,15 +1012,15 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
break;
case Mips::LD_H:
case Mips::ST_H:
- Inst.addOperand(MCOperand::CreateImm(Offset << 1));
+ Inst.addOperand(MCOperand::CreateImm(Offset * 2));
break;
case Mips::LD_W:
case Mips::ST_W:
- Inst.addOperand(MCOperand::CreateImm(Offset << 2));
+ Inst.addOperand(MCOperand::CreateImm(Offset * 4));
break;
case Mips::LD_D:
case Mips::ST_D:
- Inst.addOperand(MCOperand::CreateImm(Offset << 3));
+ Inst.addOperand(MCOperand::CreateImm(Offset * 8));
break;
}
@@ -1038,12 +1038,23 @@ static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
- if (Inst.getOpcode() == Mips::SC_MM)
+ switch (Inst.getOpcode()) {
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ if (DecodeRegListOperand(Inst, Insn, Address, Decoder)
+ == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ break;
+ case Mips::SC_MM:
Inst.addOperand(MCOperand::CreateReg(Reg));
-
- Inst.addOperand(MCOperand::CreateReg(Reg));
- Inst.addOperand(MCOperand::CreateReg(Base));
- Inst.addOperand(MCOperand::CreateImm(Offset));
+ // fallthrough
+ default:
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+ }
return MCDisassembler::Success;
}
@@ -1084,6 +1095,42 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFMem2(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMem3(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::COP3RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1242,7 +1289,7 @@ static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = (SignExtend32<16>(Offset) << 2) + 4;
+ int32_t BranchOffset = (SignExtend32<16>(Offset) * 4) + 4;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
}
@@ -1261,7 +1308,7 @@ static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = SignExtend32<21>(Offset) << 2;
+ int32_t BranchOffset = SignExtend32<21>(Offset) * 4;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
@@ -1271,7 +1318,7 @@ static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = SignExtend32<26>(Offset) << 2;
+ int32_t BranchOffset = SignExtend32<26>(Offset) * 4;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
@@ -1281,7 +1328,7 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- int32_t BranchOffset = SignExtend32<16>(Offset) << 1;
+ int32_t BranchOffset = SignExtend32<16>(Offset) * 2;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
}
@@ -1334,12 +1381,35 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) * 4));
return MCDisassembler::Success;
}
static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) << 3));
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) * 8));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRegListOperand(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
+ Mips::S6, Mips::FP};
+ unsigned RegNum;
+
+ unsigned RegLst = fieldFromInstruction(Insn, 21, 5);
+ // Empty register lists are not allowed.
+ if (RegLst == 0)
+ return MCDisassembler::Fail;
+
+ RegNum = RegLst & 0xf;
+ for (unsigned i = 0; i < RegNum; i++)
+ Inst.addOperand(MCOperand::CreateReg(Regs[i]));
+
+ if (RegLst & 0x10)
+ Inst.addOperand(MCOperand::CreateReg(Mips::RA));
+
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 8c79751..ab6b225 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -225,6 +225,18 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
// If PIC target the target is loaded as the
// pattern lw $25,%call16($28)
+
+ // opNum can be invalid if instruction had reglist as operand.
+ // MemOperand is always last operand of instruction (base + offset).
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ opNum = MI->getNumOperands() - 2;
+ break;
+ }
+
printOperand(MI, opNum+1, O);
O << "(";
printOperand(MI, opNum, O);
@@ -324,3 +336,13 @@ void MipsInstPrinter::printSaveRestore(const MCInst *MI, raw_ostream &O) {
}
}
+void MipsInstPrinter::
+printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) {
+ // - 2 because register List is always first operand of instruction and it is
+ // always followed by memory operand (base + offset).
+ for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) {
+ if (i != opNum)
+ O << ", ";
+ printRegName(O, MI->getOperand(i).getReg());
+ }
+}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 550a0f1..42df013 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSINSTPRINTER_H
-#define MIPSINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
+#define LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -107,6 +107,7 @@ private:
unsigned OpNo1, raw_ostream &OS);
bool printAlias(const MCInst &MI, raw_ostream &OS);
void printSaveRestore(const MCInst *MI, raw_ostream &O);
+ void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt
index e6d3a42..0e8d902 100644
--- a/lib/Target/Mips/LLVMBuild.txt
+++ b/lib/Target/Mips/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = MipsCodeGen
parent = Mips
-required_libraries = Analysis AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo Scalar SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target
add_to_library_groups = Mips
diff --git a/lib/Target/Mips/MCTargetDesc/Android.mk b/lib/Target/Mips/MCTargetDesc/Android.mk
index c8d18fc..89e132d 100644
--- a/lib/Target/Mips/MCTargetDesc/Android.mk
+++ b/lib/Target/Mips/MCTargetDesc/Android.mk
@@ -15,6 +15,7 @@ mips_mc_desc_SRC_FILES := \
MipsMCCodeEmitter.cpp \
MipsMCExpr.cpp \
MipsMCTargetDesc.cpp \
+ MipsOptionRecord.cpp \
MipsNaClELFStreamer.cpp \
MipsTargetStreamer.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index c14ee35..6b3788c 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -8,5 +8,6 @@ add_llvm_library(LLVMMipsDesc
MipsMCExpr.cpp
MipsMCTargetDesc.cpp
MipsNaClELFStreamer.cpp
+ MipsOptionRecord.cpp
MipsTargetStreamer.cpp
)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 52d5dd3..5b0f950 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -41,6 +41,12 @@ StringRef MipsABIFlagsSection::getFpABIString(FpABIKind Value) {
}
}
+uint8_t MipsABIFlagsSection::getCPR1SizeValue() {
+ if (FpABI == FpABIKind::XX)
+ return (uint8_t)AFL_REG_32;
+ return (uint8_t)CPR1Size;
+}
+
namespace llvm {
MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
// Write out a Elf_Internal_ABIFlags_v0 struct
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index ab18c44..8bcfb0f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSABIFLAGSSECTION_H
-#define MIPSABIFLAGSSECTION_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
#include "llvm/MC/MCStreamer.h"
@@ -115,7 +115,7 @@ public:
uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
uint8_t getISARevisionValue() { return (uint8_t)ISARevision; }
uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; }
- uint8_t getCPR1SizeValue() { return (uint8_t)CPR1Size; }
+ uint8_t getCPR1SizeValue();
uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; }
uint8_t getFpABIValue();
uint32_t getISAExtensionSetValue() { return (uint32_t)ISAExtensionSet; }
@@ -181,7 +181,7 @@ public:
template <class PredicateLibrary>
void setCPR1SizeFromPredicates(const PredicateLibrary &P) {
- if (P.mipsSEUsesSoftFloat())
+ if (P.abiUsesSoftFloat())
CPR1Size = AFL_REG_NONE;
else if (P.hasMSA())
CPR1Size = AFL_REG_128;
@@ -212,10 +212,10 @@ public:
if (P.isABI_N32() || P.isABI_N64())
FpABI = FpABIKind::S64;
else if (P.isABI_O32()) {
- if (P.isFP64bit())
- FpABI = FpABIKind::S64;
- else if (P.isABI_FPXX())
+ if (P.isABI_FPXX())
FpABI = FpABIKind::XX;
+ else if (P.isFP64bit())
+ FpABI = FpABIKind::S64;
else
FpABI = FpABIKind::S32;
}
@@ -228,6 +228,7 @@ public:
setCPR1SizeFromPredicates(P);
setASESetFromPredicates(P);
setFpAbiFromPredicates(P);
+ OddSPReg = P.useOddSPReg();
}
};
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index d8e6128..efeb54d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -367,7 +367,12 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// Check for a less than instruction size number of bytes
// FIXME: 16 bit instructions are not handled yet here.
// We shouldn't be using a hard coded number for instruction size.
- if (Count % 4) return false;
+
+ // If the count is not 4-byte aligned, we must be writing data into the text
+ // section (otherwise we have unaligned instructions, and thus have far
+ // bigger problems), so just write zeros instead.
+ for (uint64_t i = 0, e = Count % 4; i != e; ++i)
+ OW->Write8(0);
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index d5c3dbc..d4f4983 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
//
-#ifndef MIPSASMBACKEND_H
-#define MIPSASMBACKEND_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSASMBACKEND_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSASMBACKEND_H
#include "MCTargetDesc/MipsFixupKinds.h"
#include "llvm/MC/MCAsmBackend.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index d2323dc..ff7779e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -11,8 +11,8 @@
// the Mips target useful for the compiler back-end and the MC libraries.
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSBASEINFO_H
-#define MIPSBASEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSBASEINFO_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSBASEINFO_H
#include "MipsFixupKinds.h"
#include "MipsMCTargetDesc.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 49ac256..4ea7846 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -30,7 +30,8 @@ namespace {
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
- bool needsRelocateWithSymbol(unsigned Type) const override;
+ bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const override;
};
}
@@ -216,7 +217,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
}
bool
-MipsELFObjectWriter::needsRelocateWithSymbol(unsigned Type) const {
+MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const {
// FIXME: This is extremelly conservative. This really needs to use a
// whitelist with a clear explanation for why each realocation needs to
// point to the symbol, not to the section.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index fe37829..18c4a20 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -8,12 +8,72 @@
//===----------------------------------------------------------------------===//
#include "MipsELFStreamer.h"
+#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ MCELFStreamer::EmitInstruction(Inst, STI);
+
+ MCContext &Context = getContext();
+ const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
+ MipsTargetELFStreamer *ELFTargetStreamer =
+ static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
+
+ for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
+ const MCOperand &Op = Inst.getOperand(OpIndex);
+
+ if (!Op.isReg())
+ continue;
+
+ unsigned Reg = Op.getReg();
+ RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
+ }
+
+ if (ELFTargetStreamer->isMicroMipsEnabled()) {
+ for (auto Label : Labels) {
+ MCSymbolData &Data = getOrCreateSymbolData(Label);
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ MCELF::setOther(Data, ELF::STO_MIPS_MICROMIPS >> 2);
+ }
+ }
+
+ Labels.clear();
+}
+
+void MipsELFStreamer::EmitLabel(MCSymbol *Symbol) {
+ MCELFStreamer::EmitLabel(Symbol);
+ Labels.push_back(Symbol);
+}
+
+void MipsELFStreamer::SwitchSection(const MCSection * Section,
+ const MCExpr *Subsection) {
+ MCELFStreamer::SwitchSection(Section, Subsection);
+ Labels.clear();
+}
+
+void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) {
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
+ Labels.clear();
+}
+
+void MipsELFStreamer::EmitMipsOptionRecords() {
+ for (const auto &I : MipsOptionRecords)
+ I->EmitMipsOptionRecord();
+}
namespace llvm {
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack) {
+ const MCSubtargetInfo &STI,
+ bool RelaxAll) {
return new MipsELFStreamer(Context, MAB, OS, Emitter, STI);
}
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 641f8cf..136146b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -12,11 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSELFSTREAMER_H
-#define MIPSELFSTREAMER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H
+#include "MipsOptionRecord.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/Support/raw_ostream.h"
+#include <memory>
namespace llvm {
class MCAsmBackend;
@@ -25,18 +27,48 @@ class MCContext;
class MCSubtargetInfo;
class MipsELFStreamer : public MCELFStreamer {
+ SmallVector<std::unique_ptr<MipsOptionRecord>, 8> MipsOptionRecords;
+ MipsRegInfoRecord *RegInfoRecord;
+ SmallVector<MCSymbol*, 4> Labels;
+
public:
MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
MCCodeEmitter *Emitter, const MCSubtargetInfo &STI)
- : MCELFStreamer(Context, MAB, OS, Emitter) {}
+ : MCELFStreamer(Context, MAB, OS, Emitter) {
+
+ RegInfoRecord = new MipsRegInfoRecord(this, Context, STI);
+ MipsOptionRecords.push_back(
+ std::unique_ptr<MipsRegInfoRecord>(RegInfoRecord));
+ }
+
+ /// Overriding this function allows us to add arbitrary behaviour before the
+ /// \p Inst is actually emitted. For example, we can inspect the operands and
+ /// gather sufficient information that allows us to reason about the register
+ /// usage for the translation unit.
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+
+ /// Overriding this function allows us to record all labels that should be
+ /// marked as microMIPS. Based on this data marking is done in
+ /// EmitInstruction.
+ void EmitLabel(MCSymbol *Symbol) override;
+
+ /// Overriding this function allows us to dismiss all labels that are
+ /// candidates for marking as microMIPS when .section directive is processed.
+ void SwitchSection(const MCSection *Section,
+ const MCExpr *Subsection = nullptr) override;
+
+ /// Overriding this function allows us to dismiss all labels that are
+ /// candidates for marking as microMIPS when .word directive is emitted.
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) override;
- virtual ~MipsELFStreamer() {}
+ /// Emits all the option records stored up until the point it's called.
+ void EmitMipsOptionRecords();
};
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack);
+ const MCSubtargetInfo &STI, bool RelaxAll);
} // namespace llvm.
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 05080f0..317db16 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_MIPS_MIPSFIXUPKINDS_H
-#define LLVM_MIPS_MIPSFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSFIXUPKINDS_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
@@ -199,4 +199,4 @@ namespace Mips {
} // namespace llvm
-#endif // LLVM_MIPS_MIPSFIXUPKINDS_H
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index e415412..2f5d196 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -41,6 +41,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
UseAssignmentForEHBegin = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
- HasLEB128 = true;
DwarfRegNumForCFI = true;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 37ba0c4..59ff1c4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSTARGETASMINFO_H
-#define MIPSTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCASMINFO_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 43fc521..d632c27 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -345,6 +345,67 @@ getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getUImm5Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ // The immediate is encoded as 'immediate << 2'.
+ unsigned Res = getMachineOpValue(MI, MO, Fixups, STI);
+ assert((Res & 3) == 0);
+ return Res >> 2;
+ }
+
+ assert(MO.isExpr() &&
+ "getUImm5Lsl2Encoding expects only expressions or an immediate");
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
+getSImm3Lsa2Value(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ int Value = MO.getImm();
+ return Value >> 2;
+ }
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
+getUImm6Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ unsigned Value = MO.getImm();
+ return Value >> 2;
+ }
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
+getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ unsigned Binary = (MO.getImm() >> 2) & 0x0000ffff;
+ return (((Binary & 0x8000) >> 7) | (Binary & 0x00ff));
+ }
+
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
int64_t Res;
@@ -577,6 +638,17 @@ unsigned MipsMCCodeEmitter::
getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
+ // opNum can be invalid if instruction had reglist as operand.
+ // MemOperand is always last operand of instruction (base + offset).
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ OpNo = MI.getNumOperands() - 2;
+ break;
+ }
+
// Base register is encoded in bits 20-16, offset is encoded in bits 11-0.
assert(MI.getOperand(OpNo).isReg());
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) << 16;
@@ -659,4 +731,61 @@ MipsMCCodeEmitter::getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo,
return 0;
}
+unsigned
+MipsMCCodeEmitter::getUImm3Mod8Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(OpNo).isImm());
+ const MCOperand &MO = MI.getOperand(OpNo);
+ return MO.getImm() % 8;
+}
+
+unsigned
+MipsMCCodeEmitter::getUImm4AndValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(OpNo).isImm());
+ const MCOperand &MO = MI.getOperand(OpNo);
+ unsigned Value = MO.getImm();
+ switch (Value) {
+ case 128: return 0x0;
+ case 1: return 0x1;
+ case 2: return 0x2;
+ case 3: return 0x3;
+ case 4: return 0x4;
+ case 7: return 0x5;
+ case 8: return 0x6;
+ case 15: return 0x7;
+ case 16: return 0x8;
+ case 31: return 0x9;
+ case 32: return 0xa;
+ case 63: return 0xb;
+ case 64: return 0xc;
+ case 255: return 0xd;
+ case 32768: return 0xe;
+ case 65535: return 0xf;
+ }
+ llvm_unreachable("Unexpected value");
+}
+
+unsigned
+MipsMCCodeEmitter::getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned res = 0;
+
+ // Register list operand is always first operand of instruction and it is
+ // placed before memory operand (register + imm).
+
+ for (unsigned I = OpNo, E = MI.getNumOperands() - 2; I < E; ++I) {
+ unsigned Reg = MI.getOperand(I).getReg();
+ unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg);
+ if (RegNo != 31)
+ res++;
+ else
+ res |= 0x10;
+ }
+ return res;
+}
+
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 304167f..9016fcf 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
//
-#ifndef MIPS_MC_CODE_EMITTER_H
-#define MIPS_MC_CODE_EMITTER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/Support/DataTypes.h"
@@ -60,7 +60,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- // getBranchJumpOpValue - Return binary encoding of the jump
+ // getJumpTargetOpValue - Return binary encoding of the jump
// target operand. If the machine operand requires relocation,
// record the relocation and return zero.
unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
@@ -74,6 +74,26 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // getUImm5Lsl2Encoding - Return binary encoding of the microMIPS jump
+ // target operand.
+ unsigned getUImm5Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getSImm3Lsa2Value(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getUImm6Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getSImm9AddiuspValue - Return binary encoding of the microMIPS addiusp
+ // instruction immediate operand.
+ unsigned getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
// getBranchTargetOpValue - Return binary encoding of the branch
// target operand. If the machine operand requires relocation,
// record the relocation and return zero.
@@ -145,9 +165,19 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getUImm3Mod8Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getUImm4AndValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
}; // class MipsMCCodeEmitter
} // namespace llvm.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 5bba3e5..74490f3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -80,8 +80,9 @@ void MipsMCExpr::PrintImpl(raw_ostream &OS) const {
bool
MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return getSubExpr()->EvaluateAsRelocatable(Res, Layout);
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup);
}
void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index f193dc9..2b8f0c8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCEXPR_H
-#define MIPSMCEXPR_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCEXPR_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCEXPR_H
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
@@ -48,7 +48,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 01d5363..e756b47 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCNACL_H
-#define MIPSMCNACL_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCNACL_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCNACL_H
#include "llvm/MC/MCELFStreamer.h"
@@ -26,8 +26,7 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
const MCSubtargetInfo &STI,
- bool RelaxAll, bool NoExecStack);
-
+ bool RelaxAll);
}
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index d2b929b..bab4254 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -109,15 +109,12 @@ static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll, bool NoExecStack) {
+ const MCSubtargetInfo &STI, bool RelaxAll) {
MCStreamer *S;
if (!Triple(TT).isOSNaCl())
- S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll,
- NoExecStack);
+ S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
else
- S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll,
- NoExecStack);
+ S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
new MipsTargetELFStreamer(*S, STI);
return S;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 161d1ea..f08a8f4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCTARGETDESC_H
-#define MIPSMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCTARGETDESC_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 6cde8f9..92b8455 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -255,13 +255,11 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
const MCSubtargetInfo &STI,
- bool RelaxAll, bool NoExecStack) {
+ bool RelaxAll) {
MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter,
STI);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
// Set bundle-alignment as required by the NaCl ABI for the target.
S->EmitBundleAlignMode(MIPS_NACL_BUNDLE_ALIGN);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
new file mode 100644
index 0000000..0ef2208
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -0,0 +1,92 @@
+//===-- MipsOptionRecord.cpp - Abstraction for storing information --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsOptionRecord.h"
+#include "MipsELFStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+void MipsRegInfoRecord::EmitMipsOptionRecord() {
+ MCAssembler &MCA = Streamer->getAssembler();
+ Triple T(STI.getTargetTriple());
+ uint64_t Features = STI.getFeatureBits();
+
+ Streamer->PushSection();
+
+ // We need to distinguish between N64 and the rest because at the moment
+ // we don't emit .Mips.options for other ELFs other than N64.
+ // Since .reginfo has the same information as .Mips.options (ODK_REGINFO),
+ // we can use the same abstraction (MipsRegInfoRecord class) to handle both.
+ if (Features & Mips::FeatureN64) {
+ // The EntrySize value of 1 seems strange since the records are neither
+ // 1-byte long nor fixed length but it matches the value GAS emits.
+ const MCSectionELF *Sec =
+ Context.getELFSection(".MIPS.options", ELF::SHT_MIPS_OPTIONS,
+ ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP,
+ SectionKind::getMetadata(), 1, "");
+ MCA.getOrCreateSectionData(*Sec).setAlignment(8);
+ Streamer->SwitchSection(Sec);
+
+ Streamer->EmitIntValue(1, 1); // kind
+ Streamer->EmitIntValue(40, 1); // size
+ Streamer->EmitIntValue(0, 2); // section
+ Streamer->EmitIntValue(0, 4); // info
+ Streamer->EmitIntValue(ri_gprmask, 4);
+ Streamer->EmitIntValue(0, 4); // pad
+ Streamer->EmitIntValue(ri_cprmask[0], 4);
+ Streamer->EmitIntValue(ri_cprmask[1], 4);
+ Streamer->EmitIntValue(ri_cprmask[2], 4);
+ Streamer->EmitIntValue(ri_cprmask[3], 4);
+ Streamer->EmitIntValue(ri_gp_value, 8);
+ } else {
+ const MCSectionELF *Sec =
+ Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC,
+ SectionKind::getMetadata(), 24, "");
+ MCA.getOrCreateSectionData(*Sec)
+ .setAlignment(Features & Mips::FeatureN32 ? 8 : 4);
+ Streamer->SwitchSection(Sec);
+
+ Streamer->EmitIntValue(ri_gprmask, 4);
+ Streamer->EmitIntValue(ri_cprmask[0], 4);
+ Streamer->EmitIntValue(ri_cprmask[1], 4);
+ Streamer->EmitIntValue(ri_cprmask[2], 4);
+ Streamer->EmitIntValue(ri_cprmask[3], 4);
+ assert((ri_gp_value & 0xffffffff) == ri_gp_value);
+ Streamer->EmitIntValue(ri_gp_value, 4);
+ }
+
+ Streamer->PopSection();
+}
+
+void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg,
+ const MCRegisterInfo *MCRegInfo) {
+ unsigned Value = 0;
+
+ for (MCSubRegIterator SubRegIt(Reg, MCRegInfo, true); SubRegIt.isValid();
+ ++SubRegIt) {
+ unsigned CurrentSubReg = *SubRegIt;
+
+ unsigned EncVal = MCRegInfo->getEncodingValue(CurrentSubReg);
+ Value |= 1 << EncVal;
+
+ if (GPR32RegClass->contains(CurrentSubReg) ||
+ GPR64RegClass->contains(CurrentSubReg))
+ ri_gprmask |= Value;
+ else if (FGR32RegClass->contains(CurrentSubReg) ||
+ FGR64RegClass->contains(CurrentSubReg) ||
+ AFGR64RegClass->contains(CurrentSubReg) ||
+ MSA128BRegClass->contains(CurrentSubReg))
+ ri_cprmask[1] |= Value;
+ else if (COP2RegClass->contains(CurrentSubReg))
+ ri_cprmask[2] |= Value;
+ else if (COP3RegClass->contains(CurrentSubReg))
+ ri_cprmask[3] |= Value;
+ }
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index fbe375b..1e092f2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsELFStreamer.h"
#include "MipsMCTargetDesc.h"
#include "MipsTargetObjectFile.h"
#include "MipsTargetStreamer.h"
@@ -28,17 +29,21 @@
using namespace llvm;
MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
- : MCTargetStreamer(S), canHaveModuleDirective(true) {}
+ : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
+ GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+}
void MipsTargetStreamer::emitDirectiveSetMicroMips() {}
void MipsTargetStreamer::emitDirectiveSetNoMicroMips() {}
void MipsTargetStreamer::emitDirectiveSetMips16() {}
-void MipsTargetStreamer::emitDirectiveSetNoMips16() {}
-void MipsTargetStreamer::emitDirectiveSetReorder() {}
+void MipsTargetStreamer::emitDirectiveSetNoMips16() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetReorder() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoReorder() {}
-void MipsTargetStreamer::emitDirectiveSetMacro() {}
-void MipsTargetStreamer::emitDirectiveSetNoMacro() {}
-void MipsTargetStreamer::emitDirectiveSetAt() {}
-void MipsTargetStreamer::emitDirectiveSetNoAt() {}
+void MipsTargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoAt() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveEnd(StringRef Name) {}
void MipsTargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {}
void MipsTargetStreamer::emitDirectiveAbiCalls() {}
@@ -51,11 +56,26 @@ void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) {
}
-void MipsTargetStreamer::emitDirectiveSetMips32R2() {}
-void MipsTargetStreamer::emitDirectiveSetMips64() {}
-void MipsTargetStreamer::emitDirectiveSetMips64R2() {}
-void MipsTargetStreamer::emitDirectiveSetDsp() {}
-void MipsTargetStreamer::emitDirectiveCpload(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveSetArch(StringRef Arch) {
+ forbidModuleDirective();
+}
+void MipsTargetStreamer::emitDirectiveSetMips0() {}
+void MipsTargetStreamer::emitDirectiveSetMips1() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips3() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips4() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips5() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32R2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips32R6() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64R2() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetPop() {}
+void MipsTargetStreamer::emitDirectiveSetPush() {}
+void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) {
}
@@ -71,52 +91,62 @@ MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
void MipsTargetAsmStreamer::emitDirectiveSetMicroMips() {
OS << "\t.set\tmicromips\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMicroMips() {
OS << "\t.set\tnomicromips\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetMips16() {
OS << "\t.set\tmips16\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMips16() {
OS << "\t.set\tnomips16\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetNoMips16();
}
void MipsTargetAsmStreamer::emitDirectiveSetReorder() {
OS << "\t.set\treorder\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetReorder();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoReorder() {
OS << "\t.set\tnoreorder\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveSetMacro() {
OS << "\t.set\tmacro\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMacro();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMacro() {
OS << "\t.set\tnomacro\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetNoMacro();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMsa() {
+ OS << "\t.set\tmsa\n";
+ MipsTargetStreamer::emitDirectiveSetMsa();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoMsa() {
+ OS << "\t.set\tnomsa\n";
+ MipsTargetStreamer::emitDirectiveSetNoMsa();
}
void MipsTargetAsmStreamer::emitDirectiveSetAt() {
OS << "\t.set\tat\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetAt();
}
void MipsTargetAsmStreamer::emitDirectiveSetNoAt() {
OS << "\t.set\tnoat\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetNoAt();
}
void MipsTargetAsmStreamer::emitDirectiveEnd(StringRef Name) {
@@ -151,25 +181,82 @@ void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
<< StringRef(MipsInstPrinter::getRegisterName(ReturnReg)).lower() << '\n';
}
+void MipsTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
+ OS << "\t.set arch=" << Arch << "\n";
+ MipsTargetStreamer::emitDirectiveSetArch(Arch);
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips0() { OS << "\t.set\tmips0\n"; }
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips1() {
+ OS << "\t.set\tmips1\n";
+ MipsTargetStreamer::emitDirectiveSetMips1();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips2() {
+ OS << "\t.set\tmips2\n";
+ MipsTargetStreamer::emitDirectiveSetMips2();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips3() {
+ OS << "\t.set\tmips3\n";
+ MipsTargetStreamer::emitDirectiveSetMips3();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips4() {
+ OS << "\t.set\tmips4\n";
+ MipsTargetStreamer::emitDirectiveSetMips4();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips5() {
+ OS << "\t.set\tmips5\n";
+ MipsTargetStreamer::emitDirectiveSetMips5();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips32() {
+ OS << "\t.set\tmips32\n";
+ MipsTargetStreamer::emitDirectiveSetMips32();
+}
+
void MipsTargetAsmStreamer::emitDirectiveSetMips32R2() {
OS << "\t.set\tmips32r2\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMips32R2();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips32R6() {
+ OS << "\t.set\tmips32r6\n";
+ MipsTargetStreamer::emitDirectiveSetMips32R6();
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64() {
OS << "\t.set\tmips64\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMips64();
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64R2() {
OS << "\t.set\tmips64r2\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetMips64R2();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetMips64R6() {
+ OS << "\t.set\tmips64r6\n";
+ MipsTargetStreamer::emitDirectiveSetMips64R6();
}
void MipsTargetAsmStreamer::emitDirectiveSetDsp() {
OS << "\t.set\tdsp\n";
- setCanHaveModuleDir(false);
+ MipsTargetStreamer::emitDirectiveSetDsp();
}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() {
+ OS << "\t.set\tnodsp\n";
+ MipsTargetStreamer::emitDirectiveSetNoDsp();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetPop() { OS << "\t.set\tpop\n"; }
+
+void MipsTargetAsmStreamer::emitDirectiveSetPush() { OS << "\t.set\tpush\n"; }
+
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned Value, raw_ostream &OS) {
OS << "0x";
@@ -191,10 +278,10 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask,
OS << "," << FPUTopSavedRegOff << '\n';
}
-void MipsTargetAsmStreamer::emitDirectiveCpload(unsigned RegNo) {
+void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
OS << "\t.cpload\t$"
<< StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -213,7 +300,7 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
OS << ", ";
OS << Sym.getName() << "\n";
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetAsmStreamer::emitDirectiveModuleFP(
@@ -281,27 +368,29 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
else
EFlags |= ELF::EF_MIPS_ARCH_1;
- if (T.isArch64Bit()) {
- if (Features & Mips::FeatureN32)
- EFlags |= ELF::EF_MIPS_ABI2;
- else if (Features & Mips::FeatureO32) {
- EFlags |= ELF::EF_MIPS_ABI_O32;
- EFlags |= ELF::EF_MIPS_32BITMODE; /* Compatibility Mode */
- }
- // No need to set any bit for N64 which is the default ABI at the moment
- // for 64-bit Mips architectures.
- } else {
- if (Features & Mips::FeatureMips64r2 || Features & Mips::FeatureMips64)
- EFlags |= ELF::EF_MIPS_32BITMODE;
-
- // ABI
+ // ABI
+ // N64 does not require any ABI bits.
+ if (Features & Mips::FeatureO32)
EFlags |= ELF::EF_MIPS_ABI_O32;
- }
+ else if (Features & Mips::FeatureN32)
+ EFlags |= ELF::EF_MIPS_ABI2;
+
+ if (Features & Mips::FeatureGP64Bit) {
+ if (Features & Mips::FeatureO32)
+ EFlags |= ELF::EF_MIPS_32BITMODE; /* Compatibility Mode */
+ } else if (Features & Mips::FeatureMips64r2 || Features & Mips::FeatureMips64)
+ EFlags |= ELF::EF_MIPS_32BITMODE;
// Other options.
if (Features & Mips::FeatureNaN2008)
EFlags |= ELF::EF_MIPS_NAN2008;
+ // -mabicalls and -mplt are not implemented but we should act as if they were
+ // given.
+ EFlags |= ELF::EF_MIPS_CPIC;
+ if (Features & Mips::FeatureN64)
+ EFlags |= ELF::EF_MIPS_PIC;
+
MCA.setELFHeaderEFlags(EFlags);
}
@@ -321,41 +410,26 @@ void MipsTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
void MipsTargetELFStreamer::finish() {
MCAssembler &MCA = getStreamer().getAssembler();
- MCContext &Context = MCA.getContext();
- MCStreamer &OS = getStreamer();
- Triple T(STI.getTargetTriple());
- uint64_t Features = STI.getFeatureBits();
+ const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo();
+
+ // .bss, .text and .data are always at least 16-byte aligned.
+ MCSectionData &TextSectionData =
+ MCA.getOrCreateSectionData(*OFI.getTextSection());
+ MCSectionData &DataSectionData =
+ MCA.getOrCreateSectionData(*OFI.getDataSection());
+ MCSectionData &BSSSectionData =
+ MCA.getOrCreateSectionData(*OFI.getBSSSection());
+
+ TextSectionData.setAlignment(std::max(16u, TextSectionData.getAlignment()));
+ DataSectionData.setAlignment(std::max(16u, DataSectionData.getAlignment()));
+ BSSSectionData.setAlignment(std::max(16u, BSSSectionData.getAlignment()));
+
+ // Emit all the option records.
+ // At the moment we are only emitting .Mips.options (ODK_REGINFO) and
+ // .reginfo.
+ MipsELFStreamer &MEF = static_cast<MipsELFStreamer &>(Streamer);
+ MEF.EmitMipsOptionRecords();
- if (T.isArch64Bit() && (Features & Mips::FeatureN64)) {
- const MCSectionELF *Sec = Context.getELFSection(
- ".MIPS.options", ELF::SHT_MIPS_OPTIONS,
- ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, SectionKind::getMetadata());
- OS.SwitchSection(Sec);
-
- OS.EmitIntValue(1, 1); // kind
- OS.EmitIntValue(40, 1); // size
- OS.EmitIntValue(0, 2); // section
- OS.EmitIntValue(0, 4); // info
- OS.EmitIntValue(0, 4); // ri_gprmask
- OS.EmitIntValue(0, 4); // pad
- OS.EmitIntValue(0, 4); // ri_cpr[0]mask
- OS.EmitIntValue(0, 4); // ri_cpr[1]mask
- OS.EmitIntValue(0, 4); // ri_cpr[2]mask
- OS.EmitIntValue(0, 4); // ri_cpr[3]mask
- OS.EmitIntValue(0, 8); // ri_gp_value
- } else {
- const MCSectionELF *Sec =
- Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC,
- SectionKind::getMetadata());
- OS.SwitchSection(Sec);
-
- OS.EmitIntValue(0, 4); // ri_gprmask
- OS.EmitIntValue(0, 4); // ri_cpr[0]mask
- OS.EmitIntValue(0, 4); // ri_cpr[1]mask
- OS.EmitIntValue(0, 4); // ri_cpr[2]mask
- OS.EmitIntValue(0, 4); // ri_cpr[3]mask
- OS.EmitIntValue(0, 4); // ri_gp_value
- }
emitMipsAbiFlags();
}
@@ -390,11 +464,12 @@ void MipsTargetELFStreamer::emitDirectiveSetMicroMips() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_MICROMIPS;
MCA.setELFHeaderEFlags(Flags);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveSetNoMicroMips() {
MicroMipsEnabled = false;
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveSetMips16() {
@@ -402,17 +477,7 @@ void MipsTargetELFStreamer::emitDirectiveSetMips16() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_ARCH_ASE_M16;
MCA.setELFHeaderEFlags(Flags);
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetNoMips16() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetReorder() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
@@ -420,35 +485,49 @@ void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_NOREORDER;
MCA.setELFHeaderEFlags(Flags);
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
-void MipsTargetELFStreamer::emitDirectiveSetMacro() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCContext &Context = MCA.getContext();
+ MCStreamer &OS = getStreamer();
-void MipsTargetELFStreamer::emitDirectiveSetNoMacro() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+ const MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHT_REL,
+ SectionKind::getMetadata());
-void MipsTargetELFStreamer::emitDirectiveSetAt() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+ const MCSymbolRefExpr *ExprRef =
+ MCSymbolRefExpr::Create(Name, MCSymbolRefExpr::VK_None, Context);
-void MipsTargetELFStreamer::emitDirectiveSetNoAt() {
- // FIXME: implement.
- setCanHaveModuleDir(false);
-}
+ MCSectionData &SecData = MCA.getOrCreateSectionData(*Sec);
+ SecData.setAlignment(4);
-void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
- // FIXME: implement.
+ OS.PushSection();
+
+ OS.SwitchSection(Sec);
+
+ OS.EmitValueImpl(ExprRef, 4);
+
+ OS.EmitIntValue(GPRInfoSet ? GPRBitMask : 0, 4); // reg_mask
+ OS.EmitIntValue(GPRInfoSet ? GPROffset : 0, 4); // reg_offset
+
+ OS.EmitIntValue(FPRInfoSet ? FPRBitMask : 0, 4); // fpreg_mask
+ OS.EmitIntValue(FPRInfoSet ? FPROffset : 0, 4); // fpreg_offset
+
+ OS.EmitIntValue(FrameInfoSet ? FrameOffset : 0, 4); // frame_offset
+ OS.EmitIntValue(FrameInfoSet ? FrameReg : 0, 4); // frame_reg
+ OS.EmitIntValue(FrameInfoSet ? ReturnReg : 0, 4); // return_reg
+
+ // The .end directive marks the end of a procedure. Invalidate
+ // the information gathered up until this point.
+ GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+
+ OS.PopSection();
}
void MipsTargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
- // FIXME: implement.
+ GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
}
void MipsTargetELFStreamer::emitDirectiveAbiCalls() {
@@ -494,37 +573,31 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() {
}
void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
- unsigned ReturnReg) {
- // FIXME: implement.
+ unsigned ReturnReg_) {
+ MCContext &Context = getStreamer().getAssembler().getContext();
+ const MCRegisterInfo *RegInfo = Context.getRegisterInfo();
+
+ FrameInfoSet = true;
+ FrameReg = RegInfo->getEncodingValue(StackReg);
+ FrameOffset = StackSize;
+ ReturnReg = RegInfo->getEncodingValue(ReturnReg_);
}
void MipsTargetELFStreamer::emitMask(unsigned CPUBitmask,
int CPUTopSavedRegOff) {
- // FIXME: implement.
+ GPRInfoSet = true;
+ GPRBitMask = CPUBitmask;
+ GPROffset = CPUTopSavedRegOff;
}
void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask,
int FPUTopSavedRegOff) {
- // FIXME: implement.
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetMips32R2() {
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetMips64() {
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetMips64R2() {
- setCanHaveModuleDir(false);
-}
-
-void MipsTargetELFStreamer::emitDirectiveSetDsp() {
- setCanHaveModuleDir(false);
+ FPRInfoSet = true;
+ FPRBitMask = FPUBitmask;
+ FPROffset = FPUTopSavedRegOff;
}
-void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
+void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
// .cpload $reg
// This directive expands to:
// lui $gp, %hi(_gp_disp)
@@ -572,7 +645,7 @@ void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
TmpInst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(TmpInst, STI);
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -629,7 +702,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
Inst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(Inst, STI);
- setCanHaveModuleDir(false);
+ forbidModuleDirective();
}
void MipsTargetELFStreamer::emitMipsAbiFlags() {
@@ -638,7 +711,7 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() {
MCStreamer &OS = getStreamer();
const MCSectionELF *Sec =
Context.getELFSection(".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS,
- ELF::SHF_ALLOC, SectionKind::getMetadata());
+ ELF::SHF_ALLOC, SectionKind::getMetadata(), 24, "");
MCSectionData &ABIShndxSD = MCA.getOrCreateSectionData(*Sec);
ABIShndxSD.setAlignment(8);
OS.SwitchSection(Sec);
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 41efa47..56db450 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -13,7 +13,7 @@ TARGET = Mips
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
- MipsGenAsmWriter.inc MipsGenFastISel.inc MipsGenCodeEmitter.inc \
+ MipsGenAsmWriter.inc MipsGenFastISel.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
MipsGenDisassemblerTables.inc \
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index b93017a..fae7059 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -123,10 +123,10 @@ def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd,
II_MFC1, bitconvert>, MFC1_FM_MM<0x80>;
def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd,
II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>;
-def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
- MFC1_FM_MM<3>, ISA_MIPS32R2;
-def MTHC1_MM : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>,
- MFC1_FM_MM<7>, ISA_MIPS32R2;
+def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
+ MFC1_FM_MM<0xc0>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+def MTHC1_MM : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
+ MFC1_FM_MM<0xe0>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
MADDS_FM_MM<0x1>;
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 15b951d..59bf949 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -41,6 +41,95 @@ class MicroMipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern,
// MicroMIPS 16-bit Instruction Formats
//===----------------------------------------------------------------------===//
+class ARITH_FM_MM16<bit funct> {
+ bits<3> rd;
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x01;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = rs;
+ let Inst{0} = funct;
+}
+
+class ANDI_FM_MM16<bits<6> funct> {
+ bits<3> rd;
+ bits<3> rs;
+ bits<4> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = funct;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = imm;
+}
+
+class LOGIC_FM_MM16<bits<4> funct> {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-6} = funct;
+ let Inst{5-3} = rt;
+ let Inst{2-0} = rs;
+}
+
+class SHIFT_FM_MM16<bits<1> funct> {
+ bits<3> rd;
+ bits<3> rt;
+ bits<3> shamt;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x09;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = shamt;
+ let Inst{0} = funct;
+}
+
+class ADDIUR2_FM_MM16 {
+ bits<3> rd;
+ bits<3> rs;
+ bits<3> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x1b;
+ let Inst{9-7} = rd;
+ let Inst{6-4} = rs;
+ let Inst{3-1} = imm;
+ let Inst{0} = 0;
+}
+
+class ADDIUS5_FM_MM16 {
+ bits<5> rd;
+ bits<4> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x13;
+ let Inst{9-5} = rd;
+ let Inst{4-1} = imm;
+ let Inst{0} = 0;
+}
+
+class ADDIUSP_FM_MM16 {
+ bits<9> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x13;
+ let Inst{9-1} = imm;
+ let Inst{0} = 1;
+}
+
class MOVE_FM_MM16<bits<6> funct> {
bits<5> rs;
bits<5> rd;
@@ -52,6 +141,17 @@ class MOVE_FM_MM16<bits<6> funct> {
let Inst{4-0} = rs;
}
+class LI_FM_MM16 {
+ bits<3> rd;
+ bits<7> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x3b;
+ let Inst{9-7} = rd;
+ let Inst{6-0} = imm;
+}
+
class JALR_FM_MM16<bits<5> op> {
bits<5> rs;
@@ -72,6 +172,29 @@ class MFHILO_FM_MM16<bits<5> funct> {
let Inst{4-0} = rd;
}
+class JRADDIUSP_FM_MM16<bits<5> op> {
+ bits<5> rs;
+ bits<5> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-5} = op;
+ let Inst{4-0} = imm;
+}
+
+class ADDIUR1SP_FM_MM16 {
+ bits<3> rd;
+ bits<6> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x1b;
+ let Inst{9-7} = rd;
+ let Inst{6-1} = imm;
+ let Inst{0} = 1;
+}
+
//===----------------------------------------------------------------------===//
// MicroMIPS 32-bit Instruction Formats
//===----------------------------------------------------------------------===//
@@ -621,3 +744,76 @@ class MADDS_FM_MM<bits<6> funct>: MMArch {
let Inst{10-6} = fr;
let Inst{5-0} = funct;
}
+
+class COMPACT_BRANCH_FM_MM<bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class COP0_TLB_FM_MM<bits<10> op> : MMArch {
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-16} = 0x0;
+ let Inst{15-6} = op;
+ let Inst{5-0} = 0x3c;
+}
+
+class SDBBP_FM_MM : MMArch {
+ bits<10> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-16} = code_;
+ let Inst{15-6} = 0x36d;
+ let Inst{5-0} = 0x3c;
+}
+
+class RDHWR_FM_MM : MMArch {
+ bits<5> rt;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rd;
+ let Inst{15-6} = 0x1ac;
+ let Inst{5-0} = 0x3c;
+}
+
+class LWXS_FM_MM<bits<10> funct> {
+ bits<5> rd;
+ bits<5> base;
+ bits<5> index;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x0;
+ let Inst{25-21} = index;
+ let Inst{20-16} = base;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0;
+ let Inst{9-0} = funct;
+}
+
+class LWM_FM_MM<bits<4> funct> : MMArch {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x8;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-12} = funct;
+ let Inst{11-0} = addr{11-0};
+}
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 87a3a3e..e854620 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1,9 +1,51 @@
def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrMM", [frameindex]>;
+def simm4 : Operand<i32>;
+def simm7 : Operand<i32>;
+
def simm12 : Operand<i32> {
let DecoderMethod = "DecodeSimm12";
}
+def uimm5_lsl2 : Operand<OtherVT> {
+ let EncoderMethod = "getUImm5Lsl2Encoding";
+}
+
+def uimm6_lsl2 : Operand<i32> {
+ let EncoderMethod = "getUImm6Lsl2Encoding";
+}
+
+def simm9_addiusp : Operand<i32> {
+ let EncoderMethod = "getSImm9AddiuspValue";
+}
+
+def uimm3_shift : Operand<i32> {
+ let EncoderMethod = "getUImm3Mod8Encoding";
+}
+
+def simm3_lsa2 : Operand<i32> {
+ let EncoderMethod = "getSImm3Lsa2Value";
+}
+
+def uimm4_andi : Operand<i32> {
+ let EncoderMethod = "getUImm4AndValue";
+}
+
+def immSExtAddiur2 : ImmLeaf<i32, [{return Imm == 1 || Imm == -1 ||
+ ((Imm % 4 == 0) &&
+ Imm < 28 && Imm > 0);}]>;
+
+def immSExtAddius5 : ImmLeaf<i32, [{return Imm >= -8 && Imm <= 7;}]>;
+
+def immZExtAndi16 : ImmLeaf<i32,
+ [{return (Imm == 128 || (Imm >= 1 && Imm <= 4) || Imm == 7 || Imm == 8 ||
+ Imm == 15 || Imm == 16 || Imm == 31 || Imm == 32 || Imm == 63 ||
+ Imm == 64 || Imm == 255 || Imm == 32768 || Imm == 65535 );}]>;
+
+def immZExt2Shift : ImmLeaf<i32, [{return Imm >= 1 && Imm <= 8;}]>;
+
+def immLi16 : ImmLeaf<i32, [{return Imm >= -1 && Imm <= 126;}]>;
+
def mem_mm_12 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops GPR32, simm12);
@@ -26,6 +68,16 @@ def brtarget_mm : Operand<OtherVT> {
let DecoderMethod = "DecodeBranchTargetMM";
}
+class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 0;
+ let Defs = [AT];
+}
+
let canFoldAsLoad = 1 in
class LoadLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
Operand MemOpnd> :
@@ -70,6 +122,61 @@ class LoadMM<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
let mayLoad = 1;
}
+class ArithRMM16<string opstr, RegisterOperand RO, bit isComm = 0,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+ let isCommutable = isComm;
+}
+
+class AndImmMM16<string opstr, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, uimm4_andi:$imm),
+ !strconcat(opstr, "\t$rd, $rs, $imm"), [], Itin, FrmI>;
+
+class LogicRMM16<string opstr, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag> :
+ MicroMipsInst16<(outs RO:$dst), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$rt, $rs"),
+ [(set RO:$dst, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+ let isCommutable = 1;
+ let Constraints = "$rt = $dst";
+}
+
+class NotMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$rt), (ins RO:$rs),
+ !strconcat(opstr, "\t$rt, $rs"),
+ [(set RO:$rt, (not RO:$rs))], NoItinerary, FrmR>;
+
+class ShiftIMM16<string opstr, Operand ImmOpnd, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rt, ImmOpnd:$shamt),
+ !strconcat(opstr, "\t$rd, $rt, $shamt"), [], Itin, FrmR>;
+
+class AddImmUR2<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, simm3_lsa2:$imm),
+ !strconcat(opstr, "\t$rd, $rs, $imm"),
+ [], NoItinerary, FrmR> {
+ let isCommutable = 1;
+}
+
+class AddImmUS5<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$dst), (ins RO:$rd, simm4:$imm),
+ !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmR> {
+ let Constraints = "$rd = $dst";
+}
+
+class AddImmUR1SP<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs RO:$rd), (ins uimm6_lsl2:$imm),
+ !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmR>;
+
+class AddImmUSP<string opstr> :
+ MicroMipsInst16<(outs), (ins simm9_addiusp:$imm),
+ !strconcat(opstr, "\t$imm"), [], NoItinerary, FrmI>;
+
class MoveFromHILOMM<string opstr, RegisterOperand RO, Register UseReg> :
MicroMipsInst16<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"),
[], II_MFHI_MFLO, FrmR> {
@@ -85,6 +192,13 @@ class MoveMM16<string opstr, RegisterOperand RO, bit isComm = 0,
let isReMaterializable = 1;
}
+class LoadImmMM16<string opstr, Operand Od, RegisterOperand RO,
+ SDPatternOperator imm_type = null_frag> :
+ MicroMipsInst16<(outs RO:$rd), (ins Od:$imm),
+ !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmI> {
+ let isReMaterializable = 1;
+}
+
// 16-bit Jump and Link (Call)
class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
@@ -94,16 +208,140 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
let Defs = [RA];
}
+// 16-bit Jump Reg
+class JumpRegMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], IIBranch, FrmR> {
+ let hasDelaySlot = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Base class for JRADDIUSP instruction.
+class JumpRAddiuStackMM16 :
+ MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jraddiusp\t$imm",
+ [], IIBranch, FrmR> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// 16-bit Jump and Link (Call) - Short Delay Slot
+class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], IIBranch, FrmR> {
+ let isCall = 1;
+ let hasDelaySlot = 1;
+ let Defs = [RA];
+}
+
+// 16-bit Jump Register Compact - No delay slot
+class JumpRegCMM16<string opstr, RegisterOperand RO> :
+ MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], IIBranch, FrmR> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// MicroMIPS Jump and Link (Call) - Short Delay Slot
+let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
+ class JumpLinkMM<string opstr, DAGOperand opnd> :
+ InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+ [], IIBranch, FrmJ, opstr> {
+ let DecoderMethod = "DecodeJumpTargetMM";
+ }
+
+ class JumpLinkRegMM<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [], IIBranch, FrmR>;
+
+ class BranchCompareToZeroLinkMM<string opstr, DAGOperand opnd,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
+}
+
+class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
+ !strconcat(opstr, "\t$rd, ${index}(${base})"), [], Itin, FrmFI>;
+
+/// A list of registers used by load/store multiple instructions.
+def RegListAsmOperand : AsmOperandClass {
+ let Name = "RegList";
+ let ParserMethod = "parseRegisterList";
+}
+
+def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
+}
+
+class StoreMultMM<string opstr,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs), (ins reglist:$rt, mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ let mayStore = 1;
+}
+
+class LoadMultMM<string opstr,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs reglist:$rt), (ins mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ let mayLoad = 1;
+}
+
+def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
+ ARITH_FM_MM16<0>;
+def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+ ARITH_FM_MM16<1>;
+def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>;
+def AND16_MM : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
+ LOGIC_FM_MM16<0x2>;
+def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
+ LOGIC_FM_MM16<0x3>;
+def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+ LOGIC_FM_MM16<0x1>;
+def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>;
+def SLL16_MM : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
+ SHIFT_FM_MM16<0>;
+def SRL16_MM : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
+ SHIFT_FM_MM16<1>;
+def ADDIUR1SP_MM : AddImmUR1SP<"addiur1sp", GPRMM16Opnd>, ADDIUR1SP_FM_MM16;
+def ADDIUR2_MM : AddImmUR2<"addiur2", GPRMM16Opnd>, ADDIUR2_FM_MM16;
+def ADDIUS5_MM : AddImmUS5<"addius5", GPR32Opnd>, ADDIUS5_FM_MM16;
+def ADDIUSP_MM : AddImmUSP<"addiusp">, ADDIUSP_FM_MM16;
def MFHI16_MM : MoveFromHILOMM<"mfhi", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x10>;
def MFLO16_MM : MoveFromHILOMM<"mflo", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x12>;
def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>;
+def LI16_MM : LoadImmMM16<"li16", simm7, GPRMM16Opnd, immLi16>,
+ LI_FM_MM16, IsAsCheapAsAMove;
def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>;
+def JALRS16_MM : JumpLinkRegSMM16<"jalrs16", GPR32Opnd>, JALR_FM_MM16<0x0f>;
+def JRC16_MM : JumpRegCMM16<"jrc", GPR32Opnd>, JALR_FM_MM16<0x0d>;
+def JRADDIUSP : JumpRAddiuStackMM16, JRADDIUSP_FM_MM16<0x18>;
+def JR16_MM : JumpRegMM16<"jr16", GPR32Opnd>, JALR_FM_MM16<0x0c>;
class WaitMM<string opstr> :
InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
NoItinerary, FrmOther, opstr>;
let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
+ /// Compact Branch Instructions
+ def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, seteq, GPR32Opnd>,
+ COMPACT_BRANCH_FM_MM<0x7>;
+ def BNEZC_MM : CompactBranchMM<"bnezc", brtarget_mm, setne, GPR32Opnd>,
+ COMPACT_BRANCH_FM_MM<0x5>;
+
/// Arithmetic Instructions (ALU Immediate)
def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>,
ADDI_FM_MM<0xc>;
@@ -179,6 +417,8 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
}
+ def LWXS_MM : LoadWordIndexedScaledMM<"lwxs", GPR32Opnd>, LWXS_FM_MM<0x118>;
+
def LWU_MM : LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU>, LL_FM_MM<0xe>;
/// Load and Store Instructions - unaligned
@@ -191,6 +431,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
LWL_FM_MM<0x9>;
+ /// Load and Store Instructions - multiple
+ def SWM32_MM : StoreMultMM<"swm32">, LWM_FM_MM<0xd>;
+ def LWM32_MM : LoadMultMM<"lwm32">, LWM_FM_MM<0x5>;
+
/// Move Conditional
def MOVZ_I_MM : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd,
NoItinerary>, ADD_FM_MM<0, 0x58>;
@@ -247,6 +491,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>;
def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>;
+ /// Jump Instructions - Short Delay Slot
+ def JALS_MM : JumpLinkMM<"jals", calltarget_mm>, J_FM_MM<0x1d>;
+ def JALRS_MM : JumpLinkRegMM<"jalrs", GPR32Opnd>, JALR_FM_MM<0x13c>;
+
/// Branch Instructions
def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>,
BEQ_FM_MM<0x25>;
@@ -265,6 +513,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def BLTZAL_MM : MMRel, BGEZAL_FT<"bltzal", brtarget_mm, GPR32Opnd>,
BGEZAL_FM_MM<0x01>;
+ /// Branch Instructions - Short Delay Slot
+ def BGEZALS_MM : BranchCompareToZeroLinkMM<"bgezals", brtarget_mm,
+ GPR32Opnd>, BGEZAL_FM_MM<0x13>;
+ def BLTZALS_MM : BranchCompareToZeroLinkMM<"bltzals", brtarget_mm,
+ GPR32Opnd>, BGEZAL_FM_MM<0x11>;
+
/// Control Instructions
def SYNC_MM : MMRel, SYNC_FT<"sync">, SYNC_FM_MM;
def BREAK_MM : MMRel, BRK_FT<"break">, BRK_FM_MM;
@@ -295,12 +549,47 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
/// Load-linked, Store-conditional
def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>;
def SC_MM : SCBaseMM<"sc", GPR32Opnd>, LL_FM_MM<0xb>;
+
+ def TLBP_MM : MMRel, TLB<"tlbp">, COP0_TLB_FM_MM<0x0d>;
+ def TLBR_MM : MMRel, TLB<"tlbr">, COP0_TLB_FM_MM<0x4d>;
+ def TLBWI_MM : MMRel, TLB<"tlbwi">, COP0_TLB_FM_MM<0x8d>;
+ def TLBWR_MM : MMRel, TLB<"tlbwr">, COP0_TLB_FM_MM<0xcd>;
+
+ def SDBBP_MM : MMRel, SYS_FT<"sdbbp">, SDBBP_FM_MM;
+ def RDHWR_MM : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM_MM;
}
+let Predicates = [InMicroMips] in {
+
+//===----------------------------------------------------------------------===//
+// MicroMips arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+def : MipsPat<(add GPRMM16:$src, immSExtAddiur2:$imm),
+ (ADDIUR2_MM GPRMM16:$src, immSExtAddiur2:$imm)>;
+def : MipsPat<(add GPR32:$src, immSExtAddius5:$imm),
+ (ADDIUS5_MM GPR32:$src, immSExtAddius5:$imm)>;
+def : MipsPat<(add GPR32:$src, immSExt16:$imm),
+ (ADDiu_MM GPR32:$src, immSExt16:$imm)>;
+
+def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm),
+ (ANDI16_MM GPRMM16:$src, immZExtAndi16:$imm)>;
+def : MipsPat<(and GPR32:$src, immZExt16:$imm),
+ (ANDi_MM GPR32:$src, immZExt16:$imm)>;
+
+def : MipsPat<(shl GPRMM16:$src, immZExt2Shift:$imm),
+ (SLL16_MM GPRMM16:$src, immZExt2Shift:$imm)>;
+def : MipsPat<(shl GPR32:$src, immZExt5:$imm),
+ (SLL_MM GPR32:$src, immZExt5:$imm)>;
+
+def : MipsPat<(srl GPRMM16:$src, immZExt2Shift:$imm),
+ (SRL16_MM GPRMM16:$src, immZExt2Shift:$imm)>;
+def : MipsPat<(srl GPR32:$src, immZExt5:$imm),
+ (SRL_MM GPR32:$src, immZExt5:$imm)>;
+
//===----------------------------------------------------------------------===//
// MicroMips instruction aliases
//===----------------------------------------------------------------------===//
-let Predicates = [InMicroMips] in {
def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>;
}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index d512d65..87f1b04 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_MIPS_H
-#define TARGET_MIPS_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS_H
+#define LLVM_LIB_TARGET_MIPS_MIPS_H
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
@@ -26,8 +26,6 @@ namespace llvm {
FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
- FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
- JITCodeEmitter &JCE);
FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
} // end namespace llvm;
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index dd3bc9b..3e1d047 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -57,6 +57,8 @@ def MipsInstrInfo : InstrInfo;
// Mips Subtarget features //
//===----------------------------------------------------------------------===//
+def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
+ "Disable SVR4-style position-independent code.">;
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
"General Purpose Registers are 64-bit wide.">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
@@ -67,13 +69,13 @@ def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
"IEEE 754-2008 NaN encoding.">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
-def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
+def FeatureO32 : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
"Enable o32 ABI">;
-def FeatureN32 : SubtargetFeature<"n32", "MipsABI", "N32",
+def FeatureN32 : SubtargetFeature<"n32", "ABI", "MipsABIInfo::N32()",
"Enable n32 ABI">;
-def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
+def FeatureN64 : SubtargetFeature<"n64", "ABI", "MipsABIInfo::N64()",
"Enable n64 ABI">;
-def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
+def FeatureEABI : SubtargetFeature<"eabi", "ABI", "MipsABIInfo::EABI()",
"Enable eabi ABI">;
def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
"Disable odd numbered single-precision "
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 93706c2..6070276 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -36,7 +36,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -84,7 +84,7 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -154,7 +154,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount = -Amount;
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
}
@@ -174,7 +174,7 @@ void Mips16FrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
const MipsRegisterInfo &RI = TII.getRegisterInfo();
const BitVector Reserved = RI.getReservedRegs(MF);
bool SaveS2 = Reserved[Mips::S2];
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 1fb7eda..012d558 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16_FRAMEINFO_H
-#define MIPS16_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16FRAMELOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16FRAMELOWERING_H
#include "MipsFrameLowering.h"
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 14055d6..9488e63 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -403,7 +403,7 @@ static bool fixupFPReturnAndCall
Attribute::ReadNone);
A = A.addAttribute(C, AttributeSet::FunctionIndex,
Attribute::NoInline);
- Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, NULL));
+ Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
CallInst::Create(F, Params, "", &Inst );
} else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
const Value* V = CI->getCalledValue();
diff --git a/lib/Target/Mips/Mips16HardFloat.h b/lib/Target/Mips/Mips16HardFloat.h
index 826887e..19b7bf2 100644
--- a/lib/Target/Mips/Mips16HardFloat.h
+++ b/lib/Target/Mips/Mips16HardFloat.h
@@ -12,15 +12,14 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
+
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsTargetMachine.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
-
-#ifndef MIPS16HARDFLOAT_H
-#define MIPS16HARDFLOAT_H
-
using namespace llvm;
namespace llvm {
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.h b/lib/Target/Mips/Mips16HardFloatInfo.h
index 02444d9..7295c28 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.h
+++ b/lib/Target/Mips/Mips16HardFloatInfo.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16HARDFLOATINFO_H
-#define MIPS16HARDFLOATINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOATINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOATINFO_H
namespace llvm {
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 6672aef..7732be4 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -37,6 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
if (!Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
@@ -71,7 +72,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC =
@@ -102,7 +103,7 @@ void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
@@ -134,8 +135,9 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
switch (SD->getMemoryVT().getSizeInBits()) {
case 8:
case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
+ AliasReg = TM.getSubtargetImpl()->getFrameLowering()->hasFP(*MF)
+ ? AliasFPReg
+ : getMips16SPAliasReg();
return;
}
break;
@@ -145,8 +147,9 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
switch (SD->getMemoryVT().getSizeInBits()) {
case 8:
case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
+ AliasReg = TM.getSubtargetImpl()->getFrameLowering()->hasFP(*MF)
+ ? AliasFPReg
+ : getMips16SPAliasReg();
return;
}
break;
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index e653b39..ae0e61e 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16ISELDAGTODAG_H
-#define MIPS16ISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16ISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16ISELDAGTODAG_H
#include "MipsISelDAGToDAG.h"
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 81a05df..d4852c4 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "Mips16ISelLowering.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips16HardFloatInfo.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/StringRef.h"
@@ -27,7 +29,7 @@ using namespace llvm;
static cl::opt<bool> DontExpandCondPseudos16(
"mips16-dont-expand-cond-pseudo",
cl::init(false),
- cl::desc("Dont expand conditional move related "
+ cl::desc("Don't expand conditional move related "
"pseudos for Mips 16"),
cl::Hidden);
@@ -118,13 +120,14 @@ static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = {
{"truncf", "__mips16_call_stub_sf_1"},
};
-Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
- : MipsTargetLowering(TM) {
+Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI)
+ : MipsTargetLowering(TM, STI) {
// Set up the register classes
addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
- if (Subtarget->inMips16HardFloat())
+ if (!TM.Options.UseSoftFloat)
setMips16HardFloatLibCalls();
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
@@ -150,14 +153,16 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
}
const MipsTargetLowering *
-llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
- return new Mips16TargetLowering(TM);
+llvm::createMips16TargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI) {
+ return new Mips16TargetLowering(TM, STI);
}
bool
-Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
return false;
}
@@ -239,10 +244,9 @@ Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
}
-bool Mips16TargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
+bool Mips16TargetLowering::isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const {
// No tail call optimization for mips16.
return false;
}
@@ -317,7 +321,7 @@ unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
}
//
-// prefixs are attached to stub numbers depending on the return type .
+// Prefixes are attached to stub numbers depending on the return type.
// return type: float sf_
// double df_
// single complex sc_
@@ -328,17 +332,16 @@ unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
// The full name of a helper function is__mips16_call_stub +
// return type dependent prefix + stub number
//
-//
-// This is something that probably should be in a different source file and
-// perhaps done differently but my main purpose is to not waste runtime
+// FIXME: This is something that probably should be in a different source file
+// and perhaps done differently but my main purpose is to not waste runtime
// on something that we can enumerate in the source. Another possibility is
// to have a python script to generate these mapping tables. This will do
// for now. There are a whole series of helper function mapping arrays, one
// for each return type class as outlined above. There there are 11 possible
-// entries. Ones with 0 are ones which should never be selected
+// entries. Ones with 0 are ones which should never be selected.
//
// All the arrays are similar except for ones which return neither
-// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// sf, df, sc, dc, in which we only care about ones which have sf or df as a
// first parameter.
//
#define P_ "__mips16_call_stub_"
@@ -420,14 +423,15 @@ void Mips16TargetLowering::
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const {
SelectionDAG &DAG = CLI.DAG;
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
const char* Mips16HelperFunction = nullptr;
bool NeedMips16Helper = false;
- if (Subtarget->inMips16HardFloat()) {
+ if (Subtarget.inMips16HardFloat()) {
//
// currently we don't have symbols tagged with the mips16 or mips32
// qualifier so we will assume that we don't know what kind it is.
@@ -510,14 +514,16 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
Ops.push_back(JumpTarget);
MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
- InternalLinkage, CLI, Callee, Chain);
+ InternalLinkage, IsCallReloc, CLI, Callee,
+ Chain);
}
MachineBasicBlock *Mips16TargetLowering::
emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -579,7 +585,8 @@ MachineBasicBlock *Mips16TargetLowering::emitSelT16
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -643,7 +650,8 @@ MachineBasicBlock *Mips16TargetLowering::emitSeliT16
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -708,7 +716,8 @@ MachineBasicBlock
MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned regX = MI->getOperand(0).getReg();
unsigned regY = MI->getOperand(1).getReg();
MachineBasicBlock *target = MI->getOperand(2).getMBB();
@@ -724,7 +733,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned regX = MI->getOperand(0).getReg();
int64_t imm = MI->getOperand(1).getImm();
MachineBasicBlock *target = MI->getOperand(2).getMBB();
@@ -758,7 +768,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
MachineInstr *MI, MachineBasicBlock *BB) const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned CC = MI->getOperand(0).getReg();
unsigned regX = MI->getOperand(1).getReg();
unsigned regY = MI->getOperand(2).getReg();
@@ -775,7 +786,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
MachineInstr *MI, MachineBasicBlock *BB )const {
if (DontExpandCondPseudos16)
return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
unsigned CC = MI->getOperand(0).getReg();
unsigned regX = MI->getOperand(1).getReg();
int64_t Imm = MI->getOperand(2).getImm();
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index 2a5eec5..d3b9f75 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -11,27 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16ISELLOWERING_H
-#define MIPS16ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16ISELLOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16ISELLOWERING_H
#include "MipsISelLowering.h"
namespace llvm {
class Mips16TargetLowering : public MipsTargetLowering {
public:
- explicit Mips16TargetLowering(MipsTargetMachine &TM);
+ explicit Mips16TargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- bool *Fast) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ unsigned Align,
+ bool *Fast) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
private:
- bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const override;
+ bool isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const override;
void setMips16HardFloatLibCalls();
@@ -45,7 +47,7 @@ namespace llvm {
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee,
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
SDValue Chain) const override;
MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
@@ -77,4 +79,4 @@ namespace llvm {
};
}
-#endif // Mips16ISELLOWERING_H
+#endif
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
index da3a1f1..4ff68be 100644
--- a/lib/Target/Mips/Mips16InstrFormats.td
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -591,7 +591,7 @@ class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
bits<3> funct;
let funct = _funct;
- let I8 = 0b0110;
+ let I8 = 0b00110;
let Inst{26-21} = imm16{10-5};
let Inst{20-16} = imm16{15-11};
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 79607de..4dd9af2 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -31,9 +31,8 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-instrinfo"
-Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
- : MipsInstrInfo(tm, Mips::Bimm16),
- RI(*tm.getSubtargetImpl()) {}
+Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI)
+ : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
return RI;
@@ -44,9 +43,8 @@ const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
-unsigned Mips16InstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
+unsigned Mips16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
return 0;
}
@@ -55,9 +53,8 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
-unsigned Mips16InstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
+unsigned Mips16InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
return 0;
}
@@ -93,11 +90,12 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
}
-void Mips16InstrInfo::
-storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- int64_t Offset) const {
+void Mips16InstrInfo::storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -110,10 +108,12 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addMemOperand(MMO);
}
-void Mips16InstrInfo::
-loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, int64_t Offset) const {
+void Mips16InstrInfo::loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -171,7 +171,8 @@ unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const {
}
static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
- const std::vector<CalleeSavedInfo> &CSI, unsigned Flags=0) {
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned Flags = 0) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in. Do not add if the register is
// RA and return address is taken, because it has already been added in
@@ -195,8 +196,8 @@ static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
}
// Adjust SP by FrameSize bytes. Save RA, S0, S1
void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -265,9 +266,6 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
MachineBasicBlock::iterator I,
unsigned Reg1, unsigned Reg2) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-// MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
-// unsigned Reg1 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
-// unsigned Reg2 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
//
// li reg1, constant
// move reg2, sp
@@ -287,9 +285,9 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
MIB4.addReg(Reg1, RegState::Kill);
}
-void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
+ unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
assert(false && "adjust stack pointer amount exceeded");
}
@@ -305,11 +303,10 @@ void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
/// This function generates the sequence of instructions needed to get the
/// result of adding register REG and immediate IMM.
-unsigned
-Mips16InstrInfo::loadImmediate(unsigned FrameReg,
- int64_t Imm, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator II, DebugLoc DL,
- unsigned &NewImm) const {
+unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II,
+ DebugLoc DL, unsigned &NewImm) const {
//
// given original instruction is:
// Instr rx, T[offset] where offset is too big.
@@ -345,7 +342,7 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg,
!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
Candidates.reset(MO.getReg());
}
- //
+
// If the same register was used and defined in an instruction, then
// it will not be in the list of candidates.
//
@@ -354,7 +351,6 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg,
// present as an operand of the instruction. this tells
// whether the register is live before the instruction. if it's not
// then we don't need to save it in case there are no free registers.
- //
int DefReg = 0;
for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
MachineOperand &MO = II->getOperand(i);
@@ -363,9 +359,8 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg,
break;
}
}
- //
- BitVector Available = rs.getRegsAvailable(&Mips::CPU16RegsRegClass);
+ BitVector Available = rs.getRegsAvailable(&Mips::CPU16RegsRegClass);
Available &= Candidates;
//
// we use T0 for the first register, if we need to save something away.
@@ -374,7 +369,6 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg,
unsigned FirstRegSaved =0, SecondRegSaved=0;
unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0;
-
Reg = Available.find_first();
if (Reg == -1) {
@@ -442,7 +436,6 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
}
-
const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
if (validSpImm8(Imm))
return get(Mips::AddiuSpImm16);
@@ -456,8 +449,8 @@ void Mips16InstrInfo::BuildAddiuSpImm
BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
}
-const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
- return new Mips16InstrInfo(TM);
+const MipsInstrInfo *llvm::createMips16InstrInfo(const MipsSubtarget &STI) {
+ return new Mips16InstrInfo(STI);
}
bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg,
@@ -497,7 +490,6 @@ bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg,
unsigned Mips16InstrInfo::getInlineAsmLength(const char *Str,
const MCAsmInfo &MAI) const {
-
// Count the number of instructions in the asm.
bool atInsnStart = true;
unsigned Length = 0;
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 0dc0046..e7d0c07 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16INSTRUCTIONINFO_H
-#define MIPS16INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16INSTRINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16INSTRINFO_H
#include "Mips16RegisterInfo.h"
#include "MipsInstrInfo.h"
@@ -23,7 +23,7 @@ class Mips16InstrInfo : public MipsInstrInfo {
const Mips16RegisterInfo RI;
public:
- explicit Mips16InstrInfo(MipsTargetMachine &TM);
+ explicit Mips16InstrInfo(const MipsSubtarget &STI);
const MipsRegisterInfo &getRegisterInfo() const override;
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 5e4eebb..2364f4d 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -1771,9 +1771,9 @@ def: Mips16Pat
//
// For constants, llvm transforms this to:
-// x > (k -1) and then reverses the operands to use setlt. So this pattern
+// x > (k - 1) and then reverses the operands to use setlt. So this pattern
// is not used now by the compiler. (Presumably checking that k-1 does not
-// overflow). The compiler never uses this at a the current time, due to
+// overflow). The compiler never uses this at the current time, due to
// other optimizations.
//
//def: Mips16Pat
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index dbee774..0bb452a 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -65,7 +65,7 @@ bool Mips16RegisterInfo::saveScavengerRegister
const TargetRegisterClass *RC,
unsigned Reg) const {
DebugLoc DL;
- const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
return true;
@@ -106,7 +106,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Mips::SP;
else {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
FrameReg = Mips::S0;
}
@@ -140,8 +140,8 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DebugLoc DL = II->getDebugLoc();
unsigned NewImm;
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo*>(
- MBB.getParent()->getTarget().getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
Offset = SignExtend64<16>(NewImm);
IsKill = true;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index f59f1a7..3cdf836 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS16REGISTERINFO_H
-#define MIPS16REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPS16REGISTERINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPS16REGISTERINFO_H
#include "MipsRegisterInfo.h"
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index e4ec96a..e9a4289 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -403,7 +403,7 @@ class JMP_IDX_COMPACT_FM<bits<6> funct> : MipsR6Inst {
bits<32> Inst;
let Inst{31-26} = funct;
- let Inst{25-21} = 0b000000;
+ let Inst{25-21} = 0b00000;
let Inst{20-16} = rt;
let Inst{15-0} = offset;
}
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index d06e5ca..6d6735b 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -796,8 +796,8 @@ def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
(SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
ISA_MIPS32R6;
def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
- (OR (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)),
- (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)))>,
+ (OR (SELNEZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
+ (SELEQZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
ISA_MIPS32R6;
def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t,
i32:$f),
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index f0b6814..4e2dcd8 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -419,6 +419,10 @@ defm : SetgePats<GPR64, SLT64, SLTu64>;
defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
// truncate
+def : MipsPat<(trunc (assertsext GPR64:$src)),
+ (EXTRACT_SUBREG GPR64:$src, sub_32)>;
+def : MipsPat<(trunc (assertzext GPR64:$src)),
+ (EXTRACT_SUBREG GPR64:$src, sub_32)>;
def : MipsPat<(i32 (trunc GPR64:$src)),
(SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
@@ -442,28 +446,22 @@ def : MipsInstAlias<"move $dst, $src",
GPR_64;
def : MipsInstAlias<"daddu $rs, $rt, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3;
def : MipsInstAlias<"dadd $rs, $rt, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"daddu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3;
def : MipsInstAlias<"dadd $rs, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
0>, ISA_MIPS3_NOT_32R6_64R6;
-def : MipsInstAlias<"add $rs, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
-def : MipsInstAlias<"addu $rs, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
def : MipsInstAlias<"dsll $rd, $rt, $rs",
(DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
def : MipsInstAlias<"dsubu $rt, $rs, $imm",
(DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs,
- InvertedImOperand64:$imm), 0>;
+ InvertedImOperand64:$imm), 0>, ISA_MIPS3;
def : MipsInstAlias<"dsubi $rs, $rt, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rt,
InvertedImOperand64:$imm),
@@ -483,7 +481,7 @@ def : MipsInstAlias<"dsub $rs, $imm",
def : MipsInstAlias<"dsubu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs,
InvertedImOperand64:$imm),
- 0>;
+ 0>, ISA_MIPS3;
def : MipsInstAlias<"dsra $rd, $rt, $rs",
(DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
@@ -510,3 +508,9 @@ def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0
def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+let Predicates = [HasMips64, HasCnMips] in {
+def : MipsInstAlias<"synciobdma", (SYNC 0x2), 0>;
+def : MipsInstAlias<"syncs", (SYNC 0x6), 0>;
+def : MipsInstAlias<"syncw", (SYNC 0x4), 0>;
+def : MipsInstAlias<"syncws", (SYNC 0x5), 0>;
+}
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index 63cf60b..6b546e8 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -191,9 +191,9 @@ def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i64:$t, i64:$f),
immZExt16:$imm))))>,
ISA_MIPS64R6;
def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i64:$t, i64:$f),
- (OR64 (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond,
+ (OR64 (SELNEZ64 i64:$t, (SLL64_32 (XORi i32:$cond,
immZExt16:$imm))),
- (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond,
+ (SELEQZ64 i64:$f, (SLL64_32 (XORi i32:$cond,
immZExt16:$imm))))>,
ISA_MIPS64R6;
diff --git a/lib/Target/Mips/MipsABIInfo.cpp b/lib/Target/Mips/MipsABIInfo.cpp
new file mode 100644
index 0000000..f885369
--- /dev/null
+++ b/lib/Target/Mips/MipsABIInfo.cpp
@@ -0,0 +1,45 @@
+//===---- MipsABIInfo.cpp - Information about MIPS ABI's ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsABIInfo.h"
+#include "MipsRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};
+
+static const MCPhysReg Mips64IntRegs[8] = {
+ Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
+ Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
+}
+
+const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
+ if (IsO32())
+ return makeArrayRef(O32IntRegs);
+ if (IsN32() || IsN64())
+ return makeArrayRef(Mips64IntRegs);
+ llvm_unreachable("Unhandled ABI");
+}
+
+const ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
+ if (IsO32())
+ return makeArrayRef(O32IntRegs);
+ if (IsN32() || IsN64())
+ return makeArrayRef(Mips64IntRegs);
+ llvm_unreachable("Unhandled ABI");
+}
+
+unsigned MipsABIInfo::GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const {
+ if (IsO32())
+ return CC != CallingConv::Fast ? 16 : 0;
+ if (IsN32() || IsN64() || IsEABI())
+ return 0;
+ llvm_unreachable("Unhandled ABI");
+}
diff --git a/lib/Target/Mips/MipsABIInfo.h b/lib/Target/Mips/MipsABIInfo.h
new file mode 100644
index 0000000..bea585e
--- /dev/null
+++ b/lib/Target/Mips/MipsABIInfo.h
@@ -0,0 +1,61 @@
+//===---- MipsABIInfo.h - Information about MIPS ABI's --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSABIINFO_H
+#define MIPSABIINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+class MipsABIInfo {
+public:
+ enum class ABI { Unknown, O32, N32, N64, EABI };
+
+protected:
+ ABI ThisABI;
+
+public:
+ MipsABIInfo(ABI ThisABI) : ThisABI(ThisABI) {}
+
+ static MipsABIInfo Unknown() { return MipsABIInfo(ABI::Unknown); }
+ static MipsABIInfo O32() { return MipsABIInfo(ABI::O32); }
+ static MipsABIInfo N32() { return MipsABIInfo(ABI::N32); }
+ static MipsABIInfo N64() { return MipsABIInfo(ABI::N64); }
+ static MipsABIInfo EABI() { return MipsABIInfo(ABI::EABI); }
+
+ bool IsKnown() const { return ThisABI != ABI::Unknown; }
+ bool IsO32() const { return ThisABI == ABI::O32; }
+ bool IsN32() const { return ThisABI == ABI::N32; }
+ bool IsN64() const { return ThisABI == ABI::N64; }
+ bool IsEABI() const { return ThisABI == ABI::EABI; }
+ ABI GetEnumValue() const { return ThisABI; }
+
+ /// The registers to use for byval arguments.
+ const ArrayRef<MCPhysReg> GetByValArgRegs() const;
+
+ /// The registers to use for the variable argument list.
+ const ArrayRef<MCPhysReg> GetVarArgRegs() const;
+
+ /// Obtain the size of the area allocated by the callee for arguments.
+ /// CallingConv::FastCall affects the value for O32.
+ unsigned GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const;
+
+ /// Ordering of ABI's
+ /// MipsGenSubtargetInfo.inc will use this to resolve conflicts when given
+ /// multiple ABI options.
+ bool operator<(const MipsABIInfo Other) const {
+ return ThisABI < Other.GetEnumValue();
+ }
+};
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 31a9b7d..161345d 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -72,7 +72,8 @@ void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
if (Imm & 0x8000) {
InstSeqLs SeqLsORi;
GetInstSeqLsORi(Imm, RemSize, SeqLsORi);
- SeqLs.insert(SeqLs.end(), SeqLsORi.begin(), SeqLsORi.end());
+ SeqLs.append(std::make_move_iterator(SeqLsORi.begin()),
+ std::make_move_iterator(SeqLsORi.end()));
}
}
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index cc09034..ae3c38c 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -6,8 +6,8 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS_ANALYZE_IMMEDIATE_H
-#define MIPS_ANALYZE_IMMEDIATE_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSANALYZEIMMEDIATE_H
+#define LLVM_LIB_TARGET_MIPS_MIPSANALYZEIMMEDIATE_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1fb75a2..832fa05 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -58,10 +58,12 @@ MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() {
}
bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
// Initialize TargetLoweringObjectFile.
- if (Subtarget->allowMixed16_32())
- const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
+
MipsFI = MF.getInfo<MipsFunctionInfo>();
if (Subtarget->inMips16Mode())
for (std::map<
@@ -129,7 +131,7 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MipsTargetStreamer &TS = getTargetStreamer();
- TS.setCanHaveModuleDir(false);
+ TS.forbidModuleDirective();
if (MI->isDebugValue()) {
SmallString<128> Str;
@@ -264,7 +266,8 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
if (Mips::GPR32RegClass.contains(Reg))
break;
- unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum =
+ TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
if (Mips::AFGR64RegClass.contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
@@ -279,7 +282,8 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
// Set CPU Bitmask.
for (; i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum =
+ TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
CPUBitmask |= (1 << RegNum);
}
@@ -304,7 +308,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
/// Frame Directive
void MipsAsmPrinter::emitFrameDirective() {
- const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ const TargetRegisterInfo &RI = *TM.getSubtargetImpl()->getRegisterInfo();
unsigned stackReg = RI.getFrameRegister(*MF);
unsigned returnReg = RI.getRARegister();
@@ -315,11 +319,11 @@ void MipsAsmPrinter::emitFrameDirective() {
/// Emit Set directives.
const char *MipsAsmPrinter::getCurrentABIString() const {
- switch (Subtarget->getTargetABI()) {
- case MipsSubtarget::O32: return "abi32";
- case MipsSubtarget::N32: return "abiN32";
- case MipsSubtarget::N64: return "abi64";
- case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+ switch (Subtarget->getABI().GetEnumValue()) {
+ case MipsABIInfo::ABI::O32: return "abi32";
+ case MipsABIInfo::ABI::N32: return "abiN32";
+ case MipsABIInfo::ABI::N64: return "abi64";
+ case MipsABIInfo::ABI::EABI: return "eabi32"; // TODO: handle eabi64
default: llvm_unreachable("Unknown Mips ABI");
}
}
@@ -469,14 +473,12 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
case 'z': {
// $0 if zero, regular printing otherwise
- if (MO.getType() != MachineOperand::MO_Immediate)
- return true;
- int64_t Val = MO.getImm();
- if (Val)
- O << Val;
- else
+ if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) {
O << "$0";
- return false;
+ return false;
+ }
+ // If not, call printOperand as normal.
+ break;
}
case 'D': // Second part of a double word register operand
case 'L': // Low order register of a double word register operand
@@ -558,7 +560,7 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
bool closeP = false;
@@ -643,6 +645,18 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
// If PIC target the target is loaded as the
// pattern lw $25,%call16($28)
+
+ // opNum can be invalid if instruction has reglist as operand.
+ // MemOperand is always last operand of instruction (base + offset).
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case Mips::SWM32_MM:
+ case Mips::LWM32_MM:
+ opNum = MI->getNumOperands() - 2;
+ break;
+ }
+
printOperand(MI, opNum+1, O);
O << "(";
printOperand(MI, opNum, O);
@@ -666,13 +680,19 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
}
+void MipsAsmPrinter::
+printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) {
+ for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+ if (i != opNum) O << ", ";
+ printOperand(MI, i, O);
+ }
+}
+
void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
- // TODO: Need to add -mabicalls and -mno-abicalls flags.
- // Currently we assume that -mabicalls is the default.
- bool IsABICalls = true;
+ bool IsABICalls = Subtarget->isABICalls();
if (IsABICalls) {
getTargetStreamer().emitDirectiveAbiCalls();
- Reloc::Model RM = Subtarget->getRelocationModel();
+ Reloc::Model RM = TM.getRelocationModel();
// FIXME: This condition should be a lot more complicated that it is here.
// Ideally it should test for properties of the ABI and not the ABI
// itself.
@@ -706,9 +726,19 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
getTargetStreamer().updateABIInfo(*Subtarget);
- getTargetStreamer().emitDirectiveModuleFP();
- if (Subtarget->isABI_O32())
+ // We should always emit a '.module fp=...' but binutils 2.24 does not accept
+ // it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or
+ // -mfp64) and omit it otherwise.
+ if (Subtarget->isABI_O32() && (Subtarget->isABI_FPXX() ||
+ Subtarget->isFP64bit()))
+ getTargetStreamer().emitDirectiveModuleFP();
+
+ // We should always emit a '.module [no]oddspreg' but binutils 2.24 does not
+ // accept it. We therefore emit it when it contradicts the default or an
+ // option has changed the default (i.e. FPXX) and omit it otherwise.
+ if (Subtarget->isABI_O32() && (!Subtarget->useOddSPReg() ||
+ Subtarget->isABI_FPXX()))
getTargetStreamer().emitDirectiveModuleOddSPReg(Subtarget->useOddSPReg(),
Subtarget->isABI_O32());
}
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 967aa0b..0582e21 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSASMPRINTER_H
-#define MIPSASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSASMPRINTER_H
+#define LLVM_LIB_TARGET_MIPS_MIPSASMPRINTER_H
#include "Mips16HardFloatInfo.h"
#include "MipsMCInstLower.h"
@@ -89,11 +89,14 @@ public:
const MipsFunctionInfo *MipsFI;
MipsMCInstLower MCInstLowering;
- explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), MCP(nullptr), InConstantPool(false),
- MCInstLowering(*this) {
- Subtarget = &TM.getSubtarget<MipsSubtarget>();
- }
+ // We initialize the subtarget here and in runOnMachineFunction
+ // since there are certain target specific flags (ABI) that could
+ // reside on the TargetMachine, but are on the subtarget currently
+ // and we need them for the beginning of file output before we've
+ // seen a single function.
+ explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), MCP(nullptr), InConstantPool(false),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()), MCInstLowering(*this) {}
const char *getPassName() const override {
return "Mips Assembly Printer";
@@ -131,6 +134,7 @@ public:
void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
+ void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O);
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
new file mode 100644
index 0000000..e18cc8b
--- /dev/null
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -0,0 +1,142 @@
+//===---- MipsCCState.cpp - CCState with Mips specific extensions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsCCState.h"
+#include "MipsSubtarget.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+/// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall(const char *CallSym) {
+ const char *const LibCalls[] = {
+ "__addtf3", "__divtf3", "__eqtf2", "__extenddftf2",
+ "__extendsftf2", "__fixtfdi", "__fixtfsi", "__fixtfti",
+ "__fixunstfdi", "__fixunstfsi", "__fixunstfti", "__floatditf",
+ "__floatsitf", "__floattitf", "__floatunditf", "__floatunsitf",
+ "__floatuntitf", "__getf2", "__gttf2", "__letf2",
+ "__lttf2", "__multf3", "__netf2", "__powitf2",
+ "__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2",
+ "ceill", "copysignl", "cosl", "exp2l",
+ "expl", "floorl", "fmal", "fmodl",
+ "log10l", "log2l", "logl", "nearbyintl",
+ "powl", "rintl", "sinl", "sqrtl",
+ "truncl"};
+
+ const char *const *End = LibCalls + array_lengthof(LibCalls);
+
+ // Check that LibCalls is sorted alphabetically.
+ MipsTargetLowering::LTStr Comp;
+
+#ifndef NDEBUG
+ for (const char *const *I = LibCalls; I < End - 1; ++I)
+ assert(Comp(*I, *(I + 1)));
+#endif
+
+ return std::binary_search(LibCalls, End, CallSym, Comp);
+}
+
+/// This function returns true if Ty is fp128, {f128} or i128 which was
+/// originally a fp128.
+static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+ if (Ty->isFP128Ty())
+ return true;
+
+ if (Ty->isStructTy() && Ty->getStructNumElements() == 1 &&
+ Ty->getStructElementType(0)->isFP128Ty())
+ return true;
+
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+ // If the Ty is i128 and the function being called is a long double emulation
+ // routine, then the original type is f128.
+ return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+}
+
+MipsCCState::SpecialCallingConvType
+MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
+ const MipsSubtarget &Subtarget) {
+ MipsCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv;
+ if (Subtarget.inMips16HardFloat()) {
+ if (const GlobalAddressSDNode *G =
+ dyn_cast<const GlobalAddressSDNode>(Callee)) {
+ llvm::StringRef Sym = G->getGlobal()->getName();
+ Function *F = G->getGlobal()->getParent()->getFunction(Sym);
+ if (F && F->hasFnAttribute("__Mips16RetHelper")) {
+ SpecialCallingConv = Mips16RetHelperConv;
+ }
+ }
+ }
+ return SpecialCallingConv;
+}
+
+void MipsCCState::PreAnalyzeCallResultForF128(
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const TargetLowering::CallLoweringInfo &CLI) {
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode()));
+ OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy());
+ }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this for use by RetCC_MipsN.
+void MipsCCState::PreAnalyzeReturnForF128(
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ const MachineFunction &MF = getMachineFunction();
+ for (unsigned i = 0; i < Outs.size(); ++i) {
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(MF.getFunction()->getReturnType(), nullptr));
+ OriginalArgWasFloat.push_back(
+ MF.getFunction()->getReturnType()->isFloatingPointTy());
+ }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this.
+void MipsCCState::PreAnalyzeCallOperands(
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+ const SDNode *CallNode) {
+ for (unsigned i = 0; i < Outs.size(); ++i) {
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode));
+ OriginalArgWasFloat.push_back(
+ FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+ CallOperandIsFixed.push_back(Outs[i].IsFixed);
+ }
+}
+
+/// Identify lowered values that originated from f128 arguments and record
+/// this.
+void MipsCCState::PreAnalyzeFormalArgumentsForF128(
+ const SmallVectorImpl<ISD::InputArg> &Ins) {
+ const MachineFunction &MF = getMachineFunction();
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+
+ // SRet arguments cannot originate from f128 or {f128} returns so we just
+ // push false. We have to handle this specially since SRet arguments
+ // aren't mapped to an original argument.
+ if (Ins[i].Flags.isSRet()) {
+ OriginalArgWasF128.push_back(false);
+ OriginalArgWasFloat.push_back(false);
+ continue;
+ }
+
+ assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size());
+ std::advance(FuncArg, Ins[i].OrigArgIndex);
+
+ OriginalArgWasF128.push_back(
+ originalTypeIsF128(FuncArg->getType(), nullptr));
+ OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+ }
+}
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
new file mode 100644
index 0000000..cc4531d
--- /dev/null
+++ b/lib/Target/Mips/MipsCCState.h
@@ -0,0 +1,136 @@
+//===---- MipsCCState.h - CCState with Mips specific extensions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSCCSTATE_H
+#define MIPSCCSTATE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "MipsISelLowering.h"
+
+namespace llvm {
+class SDNode;
+class MipsSubtarget;
+
+class MipsCCState : public CCState {
+public:
+ enum SpecialCallingConvType { Mips16RetHelperConv, NoSpecialCallingConv };
+
+ /// Determine the SpecialCallingConvType for the given callee
+ static SpecialCallingConvType
+ getSpecialCallingConvForCallee(const SDNode *Callee,
+ const MipsSubtarget &Subtarget);
+
+private:
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this for use by RetCC_MipsN.
+ void PreAnalyzeCallResultForF128(const SmallVectorImpl<ISD::InputArg> &Ins,
+ const TargetLowering::CallLoweringInfo &CLI);
+
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this for use by RetCC_MipsN.
+ void PreAnalyzeReturnForF128(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this.
+ void
+ PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+ const SDNode *CallNode);
+
+ /// Identify lowered values that originated from f128 arguments and record
+ /// this.
+ void
+ PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
+
+ /// Records whether the value has been lowered from an f128.
+ SmallVector<bool, 4> OriginalArgWasF128;
+
+ /// Records whether the value has been lowered from float.
+ SmallVector<bool, 4> OriginalArgWasFloat;
+
+ /// Records whether the value was a fixed argument.
+ /// See ISD::OutputArg::IsFixed,
+ SmallVector<bool, 4> CallOperandIsFixed;
+
+ // Used to handle MIPS16-specific calling convention tweaks.
+ // FIXME: This should probably be a fully fledged calling convention.
+ SpecialCallingConvType SpecialCallingConv;
+
+public:
+ MipsCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+ SpecialCallingConvType SpecialCC = NoSpecialCallingConv)
+ : CCState(CC, isVarArg, MF, locs, C), SpecialCallingConv(SpecialCC) {}
+
+ void
+ AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn,
+ std::vector<TargetLowering::ArgListEntry> &FuncArgs,
+ const SDNode *CallNode) {
+ PreAnalyzeCallOperands(Outs, FuncArgs, CallNode);
+ CCState::AnalyzeCallOperands(Outs, Fn);
+ OriginalArgWasF128.clear();
+ OriginalArgWasFloat.clear();
+ CallOperandIsFixed.clear();
+ }
+
+ // The AnalyzeCallOperands in the base class is not usable since we must
+ // provide a means of accessing ArgListEntry::IsFixed. Delete them from this
+ // class. This doesn't stop them being used via the base class though.
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+ void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) LLVM_DELETED_FUNCTION;
+
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ PreAnalyzeFormalArgumentsForF128(Ins);
+ CCState::AnalyzeFormalArguments(Ins, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ }
+
+ void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn,
+ const TargetLowering::CallLoweringInfo &CLI) {
+ PreAnalyzeCallResultForF128(Ins, CLI);
+ CCState::AnalyzeCallResult(Ins, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ }
+
+ void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ PreAnalyzeReturnForF128(Outs);
+ CCState::AnalyzeReturn(Outs, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ }
+
+ bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ CCAssignFn Fn) {
+ PreAnalyzeReturnForF128(ArgsFlags);
+ bool Return = CCState::CheckReturn(ArgsFlags, Fn);
+ OriginalArgWasFloat.clear();
+ OriginalArgWasF128.clear();
+ return Return;
+ }
+
+ bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; }
+ bool WasOriginalArgFloat(unsigned ValNo) {
+ return OriginalArgWasFloat[ValNo];
+ }
+ bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
+ SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
+};
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 007213c..7318de2 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -10,13 +10,60 @@
//===----------------------------------------------------------------------===//
/// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
- CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+class CCIfSubtarget<string F, CCAction A, string Invert = "">
+ : CCIf<!strconcat(Invert,
+ "static_cast<const MipsSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
+
+// The inverse of CCIfSubtarget
+class CCIfSubtargetNot<string F, CCAction A> : CCIfSubtarget<F, A, "!">;
+
+// For soft-float, f128 values are returned in A0_64 rather than V1_64.
+def RetCC_F128SoftFloat : CallingConv<[
+ CCAssignToReg<[V0_64, A0_64]>
+]>;
+
+// For hard-float, f128 values are returned as a pair of f64's rather than a
+// pair of i64's.
+def RetCC_F128HardFloat : CallingConv<[
+ CCBitConvertToType<f64>,
+
+ // Contrary to the ABI documentation, a struct containing a long double is
+ // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
+ // match the de facto ABI as implemented by GCC.
+ CCIfInReg<CCAssignToReg<[D0_64, D1_64]>>,
+
+ CCAssignToReg<[D0_64, D2_64]>
+]>;
+
+// Handle F128 specially since we can't identify the original type during the
+// tablegen-erated code.
+def RetCC_F128 : CallingConv<[
+ CCIfSubtarget<"abiUsesSoftFloat()",
+ CCIfType<[i64], CCDelegateTo<RetCC_F128SoftFloat>>>,
+ CCIfSubtargetNot<"abiUsesSoftFloat()",
+ CCIfType<[i64], CCDelegateTo<RetCC_F128HardFloat>>>
+]>;
//===----------------------------------------------------------------------===//
// Mips O32 Calling Convention
//===----------------------------------------------------------------------===//
+def CC_MipsO32 : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
// Only the return rules are defined here for O32. The rules for argument
// passing are defined in MipsISelLowering.cpp.
def RetCC_MipsO32 : CallingConv<[
@@ -26,26 +73,46 @@ def RetCC_MipsO32 : CallingConv<[
// f32 are returned in registers F0, F2
CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
- // f64 arguments are returned in D0_64 and D1_64 in FP64bit mode or
+ // f64 arguments are returned in D0_64 and D2_64 in FP64bit mode or
// in D0 and D1 in FP32bit mode.
- CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D1_64]>>>,
- CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", CCAssignToReg<[D0, D1]>>>
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D2_64]>>>,
+ CCIfType<[f64], CCIfSubtargetNot<"isFP64bit()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+def CC_MipsO32_FP32 : CustomCallingConv;
+def CC_MipsO32_FP64 : CustomCallingConv;
+
+def CC_MipsO32_FP : CallingConv<[
+ CCIfSubtargetNot<"isFP64bit()", CCDelegateTo<CC_MipsO32_FP32>>,
+ CCIfSubtarget<"isFP64bit()", CCDelegateTo<CC_MipsO32_FP64>>
]>;
//===----------------------------------------------------------------------===//
// Mips N32/64 Calling Convention
//===----------------------------------------------------------------------===//
+def CC_MipsN_SoftFloat : CallingConv<[
+ CCAssignToRegWithShadow<[A0, A1, A2, A3,
+ T0, T1, T2, T3],
+ [D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64, D19_64]>,
+ CCAssignToStack<4, 8>
+]>;
+
def CC_MipsN : CallingConv<[
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i8, i16, i32],
+ CCIfSubtargetNot<"isLittle()",
+ CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
- // Integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3,
- T0, T1, T2, T3],
- [F12, F13, F14, F15,
- F16, F17, F18, F19]>>,
+ // All integers (except soft-float integers) are promoted to 64-bit.
+ CCIfType<[i8, i16, i32],
+ CCIf<"!static_cast<MipsCCState *>(&State)->WasOriginalArgFloat(ValNo)",
+ CCPromoteToType<i64>>>,
+
+ // The only i32's we have left are soft-float arguments.
+ CCIfSubtarget<"abiUsesSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,
+ // Integer arguments are passed in integer registers.
CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
T0_64, T1_64, T2_64, T3_64],
[D12_64, D13_64, D14_64, D15_64,
@@ -64,29 +131,49 @@ def CC_MipsN : CallingConv<[
T0_64, T1_64, T2_64, T3_64]>>,
// All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
- CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[f32], CCAssignToStack<4, 8>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
// N32/64 variable arguments.
// All arguments are passed in integer registers.
def CC_MipsN_VarArg : CallingConv<[
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ // All integers are promoted to 64-bit.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
- CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+ CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
T0_64, T1_64, T2_64, T3_64]>>,
// All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
- CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+ CCIfType<[f32], CCAssignToStack<4, 8>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
def RetCC_MipsN : CallingConv<[
- // i32 are returned in registers V0, V1
- CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+ // f128 needs to be handled similarly to f32 and f64. However, f128 is not
+ // legal and is lowered to i128 which is further lowered to a pair of i64's.
+ // This presents us with a problem for the calling convention since hard-float
+ // still needs to pass them in FPU registers, and soft-float needs to use $v0,
+ // and $a0 instead of the usual $v0, and $v1. We therefore resort to a
+ // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on
+ // whether the result was originally an f128 into the tablegen-erated code.
+ //
+ // f128 should only occur for the N64 ABI where long double is 128-bit. On
+ // N32, long double is equivalent to double.
+ CCIfType<[i64],
+ CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
+ CCDelegateTo<RetCC_F128>>>,
+
+ // Aggregate returns are positioned at the lowest address in the slot for
+ // both little and big-endian targets. When passing in registers, this
+ // requires that big-endian targets shift the value into the upper bits.
+ CCIfSubtarget<"isLittle()",
+ CCIfType<[i8, i16, i32, i64], CCIfInReg<CCPromoteToType<i64>>>>,
+ CCIfSubtargetNot<"isLittle()",
+ CCIfType<[i8, i16, i32, i64],
+ CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
// i64 are returned in registers V0_64, V1_64
CCIfType<[i64], CCAssignToReg<[V0_64, V1_64]>>,
@@ -98,12 +185,6 @@ def RetCC_MipsN : CallingConv<[
CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
]>;
-// In soft-mode, register A0_64, instead of V1_64, is used to return a long
-// double value.
-def RetCC_F128Soft : CallingConv<[
- CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>>
-]>;
-
//===----------------------------------------------------------------------===//
// Mips EABI Calling Convention
//===----------------------------------------------------------------------===//
@@ -119,11 +200,11 @@ def CC_MipsEABI : CallingConv<[
CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
- CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCIfType<[f32], CCIfSubtargetNot<"isSingleFloat()",
CCAssignToReg<[F12, F14, F16, F18]>>>,
// The first 4 double fp arguments are passed in single fp registers.
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()",
CCAssignToReg<[D6, D7, D8, D9]>>>,
// Integer values get stored in stack slots that are 4 bytes in
@@ -132,7 +213,7 @@ def CC_MipsEABI : CallingConv<[
// Integer values get stored in stack slots that are 8 bytes in
// size and 8-byte aligned.
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+ CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()", CCAssignToStack<8, 8>>>
]>;
def RetCC_MipsEABI : CallingConv<[
@@ -143,7 +224,7 @@ def RetCC_MipsEABI : CallingConv<[
CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
// f64 are returned in register D0
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+ CCIfType<[f64], CCIfSubtargetNot<"isSingleFloat()", CCAssignToReg<[D0]>>>
]>;
//===----------------------------------------------------------------------===//
@@ -151,16 +232,20 @@ def RetCC_MipsEABI : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
// f64 arguments are passed in double-precision floating pointer registers.
- CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()",
- CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9]>>>,
- CCIfType<[f64], CCIfSubtarget<"isFP64bit()",
+ CCIfType<[f64], CCIfSubtargetNot<"isFP64bit()",
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6,
+ D7, D8, D9]>>>,
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCIfSubtarget<"useOddSPReg()",
CCAssignToReg<[D0_64, D1_64, D2_64, D3_64,
D4_64, D5_64, D6_64, D7_64,
D8_64, D9_64, D10_64, D11_64,
D12_64, D13_64, D14_64, D15_64,
D16_64, D17_64, D18_64,
- D19_64]>>>,
+ D19_64]>>>>,
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCIfSubtarget<"noOddSPReg()",
+ CCAssignToReg<[D0_64, D2_64, D4_64, D6_64,
+ D8_64, D10_64, D12_64, D14_64,
+ D16_64, D18_64]>>>>,
// Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 8>>
@@ -192,7 +277,7 @@ def CC_Mips_FastCC : CallingConv<[
// Integer arguments are passed in integer registers. All scratch registers,
// except for AT, V0 and T9, are available to be used as argument registers.
- CCIfType<[i32], CCIfSubtarget<"isNotTargetNaCl()",
+ CCIfType<[i32], CCIfSubtargetNot<"isTargetNaCl()",
CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, V1]>>>,
// In NaCl, T6, T7 and T8 are reserved and not available as argument
@@ -203,8 +288,13 @@ def CC_Mips_FastCC : CallingConv<[
CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, V1]>>>,
// f32 arguments are passed in single-precision floating pointer registers.
- CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10,
- F11, F12, F13, F14, F15, F16, F17, F18, F19]>>,
+ CCIfType<[f32], CCIfSubtarget<"useOddSPReg()",
+ CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13,
+ F14, F15, F16, F17, F18, F19]>>>,
+
+ // Don't use odd numbered single-precision registers for -mno-odd-spreg.
+ CCIfType<[f32], CCIfSubtarget<"noOddSPReg()",
+ CCAssignToReg<[F0, F2, F4, F6, F8, F10, F12, F14, F16, F18]>>>,
// Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
@@ -214,13 +304,6 @@ def CC_Mips_FastCC : CallingConv<[
CCDelegateTo<CC_MipsN_FastCC>
]>;
-//==
-
-def CC_Mips16RetHelper : CallingConv<[
- // Integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>
-]>;
-
//===----------------------------------------------------------------------===//
// Mips Calling Convention Dispatch
//===----------------------------------------------------------------------===//
@@ -232,6 +315,66 @@ def RetCC_Mips : CallingConv<[
CCDelegateTo<RetCC_MipsO32>
]>;
+def CC_Mips_ByVal : CallingConv<[
+ CCIfSubtarget<"isABI_O32()", CCIfByVal<CCPassByVal<4, 4>>>,
+ CCIfByVal<CCPassByVal<8, 8>>
+]>;
+
+def CC_Mips16RetHelper : CallingConv<[
+ CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>
+]>;
+
+def CC_Mips_FixedArg : CallingConv<[
+ // Mips16 needs special handling on some functions.
+ CCIf<"State.getCallingConv() != CallingConv::Fast",
+ CCIf<"static_cast<MipsCCState *>(&State)->getSpecialCallingConv() == "
+ "MipsCCState::Mips16RetHelperConv",
+ CCDelegateTo<CC_Mips16RetHelper>>>,
+
+ CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+ // f128 needs to be handled similarly to f32 and f64 on hard-float. However,
+ // f128 is not legal and is lowered to i128 which is further lowered to a pair
+ // of i64's.
+ // This presents us with a problem for the calling convention since hard-float
+ // still needs to pass them in FPU registers. We therefore resort to a
+ // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on
+ // whether the argument was originally an f128 into the tablegen-erated code.
+ //
+ // f128 should only occur for the N64 ABI where long double is 128-bit. On
+ // N32, long double is equivalent to double.
+ CCIfType<[i64],
+ CCIfSubtargetNot<"abiUsesSoftFloat()",
+ CCIf<"static_cast<MipsCCState *>(&State)->WasOriginalArgF128(ValNo)",
+ CCBitConvertToType<f64>>>>,
+
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
+
+ // FIXME: There wasn't an EABI case in the original code and it seems unlikely
+ // that it's the same as CC_MipsN
+ CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FP>>,
+ CCDelegateTo<CC_MipsN>
+]>;
+
+def CC_Mips_VarArg : CallingConv<[
+ CCIfByVal<CCDelegateTo<CC_Mips_ByVal>>,
+
+ // FIXME: There wasn't an EABI case in the original code and it seems unlikely
+ // that it's the same as CC_MipsN_VarArg
+ CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FP>>,
+ CCDelegateTo<CC_MipsN_VarArg>
+]>;
+
+def CC_Mips : CallingConv<[
+ CCIfVarArg<
+ CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)",
+ CCDelegateTo<CC_Mips_VarArg>>>,
+ CCDelegateTo<CC_Mips_FixedArg>
+]>;
+
//===----------------------------------------------------------------------===//
// Callee-saved register lists.
//===----------------------------------------------------------------------===//
@@ -247,8 +390,9 @@ def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
(sequence "S%u", 7, 0))>;
-def CSR_O32_FP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 20), RA, FP,
- (sequence "S%u", 7, 0))>;
+def CSR_O32_FP64 :
+ CalleeSavedRegs<(add (decimate (sequence "D%u_64", 30, 20), 2), RA, FP,
+ (sequence "S%u", 7, 0))>;
def CSR_N32 : CalleeSavedRegs<(add D20_64, D22_64, D24_64, D26_64, D28_64,
D30_64, RA_64, FP_64, GP_64,
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
deleted file mode 100644
index 151ef13..0000000
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ /dev/null
@@ -1,434 +0,0 @@
-//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===---------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the Mips machine instructions
-// into relocatable machine code.
-//
-//===---------------------------------------------------------------------===//
-
-#include "Mips.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsInstrInfo.h"
-#include "MipsRelocations.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#ifndef NDEBUG
-#include <iomanip>
-#endif
-
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
-
-class MipsCodeEmitter : public MachineFunctionPass {
- MipsJITInfo *JTI;
- const MipsInstrInfo *II;
- const DataLayout *TD;
- const MipsSubtarget *Subtarget;
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- const std::vector<MachineConstantPoolEntry> *MCPEs;
- const std::vector<MachineJumpTableEntry> *MJTEs;
- bool IsPIC;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo> ();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
-
-public:
- MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr),
- TM(tm), MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "Mips Machine Code Emitter";
- }
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
- void emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB);
-
-private:
-
- void emitWord(unsigned Word);
-
- /// Routines that handle operands which add machine relocations which are
- /// fixed up by the relocation stage.
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getJumpTargetOpValueMM(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getBranchTargetOpValueMM(const MachineInstr &MI,
- unsigned OpNo) const;
-
- unsigned getBranchTarget21OpValue(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getBranchTarget26OpValue(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getJumpOffset16OpValue(const MachineInstr &MI, unsigned OpNo) const;
-
- unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMSAMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSimm18Lsl3Encoding(const MachineInstr &MI, unsigned OpNo) const;
-
- /// Expand pseudo instructions with accumulator register operands.
- void expandACCInstr(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB, unsigned Opc) const;
-
- /// \brief Expand pseudo instruction. Return true if MI was expanded.
- bool expandPseudos(MachineBasicBlock::instr_iterator &MI,
- MachineBasicBlock &MBB) const;
-};
-}
-
-char MipsCodeEmitter::ID = 0;
-
-bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- MipsTargetMachine &Target = static_cast<MipsTargetMachine &>(
- const_cast<TargetMachine &>(MF.getTarget()));
-
- JTI = Target.getJITInfo();
- II = Target.getInstrInfo();
- TD = Target.getDataLayout();
- Subtarget = &TM.getSubtarget<MipsSubtarget> ();
- MCPEs = &MF.getConstantPool()->getConstants();
- MJTEs = nullptr;
- if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
- JTI->Initialize(MF, IsPIC, Subtarget->isLittle());
- MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
-
- do {
- DEBUG(errs() << "JITTing function '"
- << MF.getName() << "'\n");
- MCE.startFunction(MF);
-
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB){
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
- E = MBB->instr_end(); I != E;)
- emitInstruction(*I++, *MBB);
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const {
- // NOTE: This relocations are for static.
- uint64_t TSFlags = MI.getDesc().TSFlags;
- uint64_t Form = TSFlags & MipsII::FormMask;
- if (Form == MipsII::FrmJ)
- return Mips::reloc_mips_26;
- if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
- && MI.isBranch())
- return Mips::reloc_mips_pc16;
- if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
- return Mips::reloc_mips_hi;
- return Mips::reloc_mips_lo;
-}
-
-unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- MachineOperand MO = MI.getOperand(OpNo);
- if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- else
- llvm_unreachable("Unexpected jump target operand kind.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getJumpTargetOpValueMM(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTarget21OpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTarget26OpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getJumpOffset16OpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
- unsigned OpNo) const {
- MachineOperand MO = MI.getOperand(OpNo);
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- return 0;
-}
-
-unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
- assert(MI.getOperand(OpNo).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16;
- return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
-}
-
-unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getMSAMemEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // size is encoded as size-1.
- return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
-}
-
-unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // size is encoded as pos+size-1.
- return getMachineOpValue(MI, MI.getOperand(OpNo-1)) +
- getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
-}
-
-unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getSimm18Lsl3Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Unimplemented function.");
- return 0;
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
- if (MO.isReg())
- return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
- else if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
- else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO));
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- else
- llvm_unreachable("Unable to encode MachineOperand!");
- return 0;
-}
-
-void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const {
- MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), 0,
- MayNeedFarStub));
-}
-
-void MipsCodeEmitter::
-emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES, 0, 0));
-}
-
-void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, 0, false));
-}
-
-void MipsCodeEmitter::
-emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- Reloc, JTIndex, 0, false));
-}
-
-void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Reloc, BB));
-}
-
-void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
-
- // Expand pseudo instruction. Skip if MI was not expanded.
- if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) &&
- !expandPseudos(MI, MBB))
- return;
-
- MCE.processDebugLoc(MI->getDebugLoc(), true);
-
- emitWord(getBinaryCodeForInstr(*MI));
- ++NumEmitted; // Keep track of the # of mi's emitted
-
- MCE.processDebugLoc(MI->getDebugLoc(), false);
-}
-
-void MipsCodeEmitter::emitWord(unsigned Word) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Word) << "\n");
- if (Subtarget->isLittle())
- MCE.emitWordLE(Word);
- else
- MCE.emitWordBE(Word);
-}
-
-void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB,
- unsigned Opc) const {
- // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc))
- .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg());
-}
-
-bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
- MachineBasicBlock &MBB) const {
- switch (MI->getOpcode()) {
- case Mips::NOP:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
- .addReg(Mips::ZERO).addImm(0);
- break;
- case Mips::B:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO)
- .addReg(Mips::ZERO).addOperand(MI->getOperand(0));
- break;
- case Mips::TRAP:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BREAK)).addImm(0)
- .addImm(0);
- break;
- case Mips::JALRPseudo:
- BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
- .addReg(MI->getOperand(0).getReg());
- break;
- case Mips::PseudoMULT:
- expandACCInstr(MI, MBB, Mips::MULT);
- break;
- case Mips::PseudoMULTu:
- expandACCInstr(MI, MBB, Mips::MULTu);
- break;
- case Mips::PseudoSDIV:
- expandACCInstr(MI, MBB, Mips::SDIV);
- break;
- case Mips::PseudoUDIV:
- expandACCInstr(MI, MBB, Mips::UDIV);
- break;
- case Mips::PseudoMADD:
- expandACCInstr(MI, MBB, Mips::MADD);
- break;
- case Mips::PseudoMADDU:
- expandACCInstr(MI, MBB, Mips::MADDU);
- break;
- case Mips::PseudoMSUB:
- expandACCInstr(MI, MBB, Mips::MSUB);
- break;
- case Mips::PseudoMSUBU:
- expandACCInstr(MI, MBB, Mips::MSUBU);
- break;
- default:
- return false;
- }
-
- (MI--)->eraseFromBundle();
- return true;
-}
-
-/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
-/// code to the specified MCE object.
-FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new MipsCodeEmitter(TM, JCE);
-}
-
-#include "MipsGenCodeEmitter.inc"
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index a37062f..c4e5ac0 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -28,6 +28,7 @@
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -343,7 +344,6 @@ namespace {
const TargetMachine &TM;
bool IsPIC;
- unsigned ABI;
const MipsSubtarget *STI;
const Mips16InstrInfo *TII;
MipsFunctionInfo *MFI;
@@ -365,11 +365,9 @@ namespace {
public:
static char ID;
MipsConstantIslands(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_),
- ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
- STI(&TM.getSubtarget<MipsSubtarget>()), MF(nullptr), MCP(nullptr),
- PrescannedForConstants(false){}
+ : MachineFunctionPass(ID), TM(tm),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), STI(nullptr),
+ MF(nullptr), MCP(nullptr), PrescannedForConstants(false) {}
const char *getPassName() const override {
return "Mips Constant Islands";
@@ -450,12 +448,14 @@ bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// FIXME:
MF = &mf;
MCP = mf.getConstantPool();
+ STI = &mf.getTarget().getSubtarget<MipsSubtarget>();
DEBUG(dbgs() << "constant island machine function " << "\n");
- if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode() ||
- !MipsSubtarget::useConstantIslands()) {
+ if (!STI->inMips16Mode() || !MipsSubtarget::useConstantIslands()) {
return false;
}
- TII = (const Mips16InstrInfo*)MF->getTarget().getInstrInfo();
+ TII = (const Mips16InstrInfo *)MF->getTarget()
+ .getSubtargetImpl()
+ ->getInstrInfo();
MFI = MF->getInfo<MipsFunctionInfo>();
DEBUG(dbgs() << "constant island processing " << "\n");
//
@@ -562,7 +562,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = *MF->getSubtarget().getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -588,9 +588,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
if (InsPoint[a] == InsAt)
InsPoint[a] = CPEMI;
// Add a new CPEntry, but no corresponding CPUser yet.
- std::vector<CPEntry> CPEs;
- CPEs.push_back(CPEntry(CPEMI, i));
- CPEntries.push_back(CPEs);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, i));
++NumCPEs;
DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
<< Size << ", align = " << Align <<'\n');
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index bcfbc12..d7ba6d4 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -275,7 +275,11 @@ static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
#ifndef NDEBUG
const MachineFunction &MF = *MBB.getParent();
- assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
+ assert(MF.getTarget()
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getAllocatableSet(MF)
+ .test(R) &&
"Shouldn't move an instruction with unallocatable registers across "
"basic block boundaries.");
#endif
@@ -286,8 +290,8 @@ static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
}
RegDefsUses::RegDefsUses(TargetMachine &TM)
- : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
- Uses(TRI.getNumRegs(), false) {}
+ : TRI(*TM.getSubtargetImpl()->getRegisterInfo()),
+ Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {}
void RegDefsUses::init(const MachineInstr &MI) {
// Add all register operands which are explicit and non-variadic.
@@ -451,7 +455,8 @@ bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
if (MayStore)
- return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad;
+ return !Defs.insert(V).second || Uses.count(V) || SeenNoObjStore ||
+ SeenNoObjLoad;
Uses.insert(V);
return Defs.count(V) || SeenNoObjStore;
@@ -493,30 +498,38 @@ getUnderlyingObjects(const MachineInstr &MI,
/// We assume there is only one delay slot per delayed instruction.
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ bool InMicroMipsMode = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode();
for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
if (!hasUnoccupiedSlot(&*I))
continue;
- ++FilledSlots;
- Changed = true;
-
- // Delay slot filling is disabled at -O0.
- if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
- if (searchBackward(MBB, I))
- continue;
+ // For microMIPS, at the moment, do not fill delay slots of call
+ // instructions.
+ //
+ // TODO: Support for replacing regular call instructions with corresponding
+ // short delay slot instructions should be implemented.
+ if (!InMicroMipsMode || !I->isCall()) {
+ ++FilledSlots;
+ Changed = true;
+
+ // Delay slot filling is disabled at -O0.
+ if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
+ if (searchBackward(MBB, I))
+ continue;
- if (I->isTerminator()) {
- if (searchSuccBBs(MBB, I))
+ if (I->isTerminator()) {
+ if (searchSuccBBs(MBB, I))
+ continue;
+ } else if (searchForward(MBB, I)) {
continue;
- } else if (searchForward(MBB, I)) {
- continue;
+ }
}
}
// Bundle the NOP to the instruction with the delay slot.
- const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ const MipsInstrInfo *TII = static_cast<const MipsInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
MIBundleBuilder(MBB, I, std::next(I, 2));
}
@@ -554,9 +567,10 @@ bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
// branches are not checked because non-NaCl targets never put them in
// delay slots.
unsigned AddrIdx;
- if ((isBasePlusOffsetMemoryAccess(I->getOpcode(), &AddrIdx)
- && baseRegNeedsLoadStoreMask(I->getOperand(AddrIdx).getReg()))
- || I->modifiesRegister(Mips::SP, TM.getRegisterInfo()))
+ if ((isBasePlusOffsetMemoryAccess(I->getOpcode(), &AddrIdx) &&
+ baseRegNeedsLoadStoreMask(I->getOperand(AddrIdx).getReg())) ||
+ I->modifiesRegister(Mips::SP,
+ TM.getSubtargetImpl()->getRegisterInfo()))
continue;
}
@@ -667,7 +681,7 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
std::pair<MipsInstrInfo::BranchType, MachineInstr *>
Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr;
SmallVector<MachineInstr*, 2> BranchInstrs;
SmallVector<MachineOperand, 2> Cond;
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 617801b..2bb16e3 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -8,6 +8,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "MipsCCState.h"
#include "MipsRegisterInfo.h"
#include "MipsISelLowering.h"
#include "MipsMachineFunction.h"
@@ -18,22 +19,43 @@ using namespace llvm;
namespace {
-// All possible address modes.
-typedef struct Address {
- enum { RegBase, FrameIndexBase } BaseType;
+class MipsFastISel final : public FastISel {
- union {
- unsigned Reg;
- int FI;
- } Base;
+ // All possible address modes.
+ class Address {
+ public:
+ typedef enum { RegBase, FrameIndexBase } BaseKind;
- int64_t Offset;
+ private:
+ BaseKind Kind;
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
- // Innocuous defaults for our address.
- Address() : BaseType(RegBase), Offset(0) { Base.Reg = 0; }
-} Address;
+ int64_t Offset;
-class MipsFastISel final : public FastISel {
+ const GlobalValue *GV;
+
+ public:
+ // Innocuous defaults for our address.
+ Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; }
+ void setKind(BaseKind K) { Kind = K; }
+ BaseKind getKind() const { return Kind; }
+ bool isRegBase() const { return Kind == RegBase; }
+ void setReg(unsigned Reg) {
+ assert(isRegBase() && "Invalid base register access!");
+ Base.Reg = Reg;
+ }
+ unsigned getReg() const {
+ assert(isRegBase() && "Invalid base register access!");
+ return Base.Reg;
+ }
+ void setOffset(int64_t Offset_) { Offset = Offset_; }
+ int64_t getOffset() const { return Offset; }
+ void setGlobalValue(const GlobalValue *G) { GV = G; }
+ const GlobalValue *getGlobalValue() { return GV; }
+ };
/// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -47,74 +69,267 @@ class MipsFastISel final : public FastISel {
// Convenience variables to avoid some queries.
LLVMContext *Context;
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+
bool TargetSupported;
+ bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle
+ // floating point but not reject doing fast-isel in other
+ // situations
+
+private:
+ // Selection routines.
+ bool selectLoad(const Instruction *I);
+ bool selectStore(const Instruction *I);
+ bool selectBranch(const Instruction *I);
+ bool selectCmp(const Instruction *I);
+ bool selectFPExt(const Instruction *I);
+ bool selectFPTrunc(const Instruction *I);
+ bool selectFPToInt(const Instruction *I, bool IsSigned);
+ bool selectRet(const Instruction *I);
+ bool selectTrunc(const Instruction *I);
+ bool selectIntExt(const Instruction *I);
+
+ // Utility helper routines.
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool computeAddress(const Value *Obj, Address &Addr);
+ bool computeCallAddress(const Value *V, Address &Addr);
+
+ // Emit helper routines.
+ bool emitCmp(unsigned DestReg, const CmpInst *CI);
+ bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0);
+ bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
+ MachineMemOperand *MMO = nullptr);
+ bool emitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
+ unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ bool emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg,
+
+ bool IsZExt);
+ bool emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg);
+
+ bool emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg);
+ bool emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg);
+ bool emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg);
+
+ unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned);
+
+ unsigned materializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned materializeGV(const GlobalValue *GV, MVT VT);
+ unsigned materializeInt(const Constant *C, MVT VT);
+ unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+
+ MachineInstrBuilder emitInst(unsigned Opc) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ }
+ MachineInstrBuilder emitInst(unsigned Opc, unsigned DstReg) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ DstReg);
+ }
+ MachineInstrBuilder emitInstStore(unsigned Opc, unsigned SrcReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return emitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
+ }
+ MachineInstrBuilder emitInstLoad(unsigned Opc, unsigned DstReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
+ }
+ // for some reason, this default is not generated by tablegen
+ // so we explicitly generate it here.
+ //
+ unsigned fastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill, uint64_t imm1,
+ uint64_t imm2, unsigned Op3, bool Op3IsKill) {
+ return 0;
+ }
+
+ // Call handling routines.
+private:
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
+ bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
+ unsigned &NumBytes);
+ bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
public:
+ // Backend specific FastISel code.
explicit MipsFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo),
M(const_cast<Module &>(*funcInfo.Fn->getParent())),
- TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getSubtargetImpl()->getInstrInfo()),
+ TLI(*TM.getSubtargetImpl()->getTargetLowering()),
Subtarget(&TM.getSubtarget<MipsSubtarget>()) {
MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
Context = &funcInfo.Fn->getContext();
TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) &&
- (Subtarget->hasMips32r2() && (Subtarget->isABI_O32())));
+ ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) &&
+ (Subtarget->isABI_O32())));
+ UnsupportedFPMode = Subtarget->isFP64bit();
}
- bool TargetSelectInstruction(const Instruction *I) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ bool fastSelectInstruction(const Instruction *I) override;
- bool ComputeAddress(const Value *Obj, Address &Addr);
+#include "MipsGenFastISel.inc"
+};
+} // end anonymous namespace.
-private:
- bool EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
- unsigned Alignment = 0);
- bool EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
- unsigned Alignment = 0);
- bool SelectLoad(const Instruction *I);
- bool SelectRet(const Instruction *I);
- bool SelectStore(const Instruction *I);
+static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) LLVM_ATTRIBUTE_UNUSED;
- bool isTypeLegal(Type *Ty, MVT &VT);
- bool isLoadTypeLegal(Type *Ty, MVT &VT);
+static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ llvm_unreachable("should not be called");
+}
- unsigned MaterializeFP(const ConstantFP *CFP, MVT VT);
- unsigned MaterializeGV(const GlobalValue *GV, MVT VT);
- unsigned MaterializeInt(const Constant *C, MVT VT);
- unsigned Materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) {
+ llvm_unreachable("should not be called");
+}
- // for some reason, this default is not generated by tablegen
- // so we explicitly generate it here.
- //
- unsigned FastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill, uint64_t imm1,
- uint64_t imm2, unsigned Op3, bool Op3IsKill) {
+#include "MipsGenCallingConv.inc"
+
+CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const {
+ return CC_MipsO32;
+}
+
+unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return 0;
- }
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ int64_t Imm;
+ if ((VT != MVT::i1) && CI->isNegative())
+ Imm = CI->getSExtValue();
+ else
+ Imm = CI->getZExtValue();
+ return materialize32BitInt(Imm, RC);
+}
- MachineInstrBuilder EmitInst(unsigned Opc) {
- return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
- }
+unsigned MipsFastISel::materialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned ResultReg = createResultReg(RC);
- MachineInstrBuilder EmitInst(unsigned Opc, unsigned DstReg) {
- return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
- DstReg);
+ if (isInt<16>(Imm)) {
+ unsigned Opc = Mips::ADDiu;
+ emitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm);
+ return ResultReg;
+ } else if (isUInt<16>(Imm)) {
+ emitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm);
+ return ResultReg;
}
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+ if (Lo) {
+ // Both Lo and Hi have nonzero bits.
+ unsigned TmpReg = createResultReg(RC);
+ emitInst(Mips::LUi, TmpReg).addImm(Hi);
+ emitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo);
+ } else {
+ emitInst(Mips::LUi, ResultReg).addImm(Hi);
+ }
+ return ResultReg;
+}
- MachineInstrBuilder EmitInstStore(unsigned Opc, unsigned SrcReg,
- unsigned MemReg, int64_t MemOffset) {
- return EmitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
+unsigned MipsFastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
+ if (UnsupportedFPMode)
+ return 0;
+ int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (VT == MVT::f32) {
+ const TargetRegisterClass *RC = &Mips::FGR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg = materialize32BitInt(Imm, &Mips::GPR32RegClass);
+ emitInst(Mips::MTC1, DestReg).addReg(TempReg);
+ return DestReg;
+ } else if (VT == MVT::f64) {
+ const TargetRegisterClass *RC = &Mips::AFGR64RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg1 = materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass);
+ unsigned TempReg2 =
+ materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass);
+ emitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1);
+ return DestReg;
}
+ return 0;
+}
- MachineInstrBuilder EmitInstLoad(unsigned Opc, unsigned DstReg,
- unsigned MemReg, int64_t MemOffset) {
- return EmitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
+unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32)
+ return 0;
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ bool IsThreadLocal = GVar && GVar->isThreadLocal();
+ // TLS not supported at this time.
+ if (IsThreadLocal)
+ return 0;
+ emitInst(Mips::LW, DestReg)
+ .addReg(MFI->getGlobalBaseReg())
+ .addGlobalAddress(GV, 0, MipsII::MO_GOT);
+ if ((GV->hasInternalLinkage() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV)))) {
+ unsigned TempReg = createResultReg(RC);
+ emitInst(Mips::ADDiu, TempReg)
+ .addReg(DestReg)
+ .addGlobalAddress(GV, 0, MipsII::MO_ABS_LO);
+ DestReg = TempReg;
}
+ return DestReg;
+}
-#include "MipsGenFastISel.inc"
-};
+// Materialize a constant into a register, and return the register
+// number (or zero if we failed to handle it).
+unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple())
+ return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return (UnsupportedFPMode) ? 0 : materializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return materializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return materializeInt(C, VT);
+
+ return 0;
+}
+
+bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
+ // This construct looks a big awkward but it is how other ports handle this
+ // and as this function is more fully completed, these cases which
+ // return false will have additional code in them.
+ //
+ if (isa<Instruction>(Obj))
+ return false;
+ else if (isa<ConstantExpr>(Obj))
+ return false;
+ Addr.setReg(getRegForValue(Obj));
+ return Addr.getReg() != 0;
+}
+
+bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
+ const GlobalValue *GV = dyn_cast<GlobalValue>(V);
+ if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic())
+ return false;
+ if (!GV)
+ return false;
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Addr.setGlobalValue(GV);
+ return true;
+ }
+ return false;
+}
bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
@@ -138,21 +353,134 @@ bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
return true;
return false;
}
-
-bool MipsFastISel::ComputeAddress(const Value *Obj, Address &Addr) {
- // This construct looks a big awkward but it is how other ports handle this
- // and as this function is more fully completed, these cases which
- // return false will have additional code in them.
- //
- if (isa<Instruction>(Obj))
+// Because of how EmitCmp is called with fast-isel, you can
+// end up with redundant "andi" instructions after the sequences emitted below.
+// We should try and solve this issue in the future.
+//
+bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
+ const Value *Left = CI->getOperand(0), *Right = CI->getOperand(1);
+ bool IsUnsigned = CI->isUnsigned();
+ unsigned LeftReg = getRegEnsuringSimpleIntegerWidening(Left, IsUnsigned);
+ if (LeftReg == 0)
return false;
- else if (isa<ConstantExpr>(Obj))
+ unsigned RightReg = getRegEnsuringSimpleIntegerWidening(Right, IsUnsigned);
+ if (RightReg == 0)
return false;
- Addr.Base.Reg = getRegForValue(Obj);
- return Addr.Base.Reg != 0;
-}
+ CmpInst::Predicate P = CI->getPredicate();
-bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ switch (P) {
+ default:
+ return false;
+ case CmpInst::ICMP_EQ: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::SLTiu, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_NE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg);
+ break;
+ }
+ case CmpInst::ICMP_UGT: {
+ emitInst(Mips::SLTu, ResultReg).addReg(RightReg).addReg(LeftReg);
+ break;
+ }
+ case CmpInst::ICMP_ULT: {
+ emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg);
+ break;
+ }
+ case CmpInst::ICMP_UGE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_ULE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLTu, TempReg).addReg(RightReg).addReg(LeftReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_SGT: {
+ emitInst(Mips::SLT, ResultReg).addReg(RightReg).addReg(LeftReg);
+ break;
+ }
+ case CmpInst::ICMP_SLT: {
+ emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg);
+ break;
+ }
+ case CmpInst::ICMP_SGE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::ICMP_SLE: {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLT, TempReg).addReg(RightReg).addReg(LeftReg);
+ emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
+ break;
+ }
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_OLE:
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_OGE: {
+ if (UnsupportedFPMode)
+ return false;
+ bool IsFloat = Left->getType()->isFloatTy();
+ bool IsDouble = Left->getType()->isDoubleTy();
+ if (!IsFloat && !IsDouble)
+ return false;
+ unsigned Opc, CondMovOpc;
+ switch (P) {
+ case CmpInst::FCMP_OEQ:
+ Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32;
+ CondMovOpc = Mips::MOVT_I;
+ break;
+ case CmpInst::FCMP_UNE:
+ Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32;
+ CondMovOpc = Mips::MOVF_I;
+ break;
+ case CmpInst::FCMP_OLT:
+ Opc = IsFloat ? Mips::C_OLT_S : Mips::C_OLT_D32;
+ CondMovOpc = Mips::MOVT_I;
+ break;
+ case CmpInst::FCMP_OLE:
+ Opc = IsFloat ? Mips::C_OLE_S : Mips::C_OLE_D32;
+ CondMovOpc = Mips::MOVT_I;
+ break;
+ case CmpInst::FCMP_OGT:
+ Opc = IsFloat ? Mips::C_ULE_S : Mips::C_ULE_D32;
+ CondMovOpc = Mips::MOVF_I;
+ break;
+ case CmpInst::FCMP_OGE:
+ Opc = IsFloat ? Mips::C_ULT_S : Mips::C_ULT_D32;
+ CondMovOpc = Mips::MOVF_I;
+ break;
+ default:
+ llvm_unreachable("Only switching of a subset of CCs.");
+ }
+ unsigned RegWithZero = createResultReg(&Mips::GPR32RegClass);
+ unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
+ emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
+ emitInst(Opc).addReg(LeftReg).addReg(RightReg).addReg(
+ Mips::FCC0, RegState::ImplicitDefine);
+ MachineInstrBuilder MI = emitInst(CondMovOpc, ResultReg)
+ .addReg(RegWithOne)
+ .addReg(Mips::FCC0)
+ .addReg(RegWithZero, RegState::Implicit);
+ MI->tieOperands(0, 3);
+ break;
+ }
+ }
+ return true;
+}
+bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment) {
//
// more cases will be handled here in following patches.
@@ -175,11 +503,15 @@ bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
break;
}
case MVT::f32: {
+ if (UnsupportedFPMode)
+ return false;
ResultReg = createResultReg(&Mips::FGR32RegClass);
Opc = Mips::LWC1;
break;
}
case MVT::f64: {
+ if (UnsupportedFPMode)
+ return false;
ResultReg = createResultReg(&Mips::AFGR64RegClass);
Opc = Mips::LDC1;
break;
@@ -187,31 +519,11 @@ bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
default:
return false;
}
- EmitInstLoad(Opc, ResultReg, Addr.Base.Reg, Addr.Offset);
+ emitInstLoad(Opc, ResultReg, Addr.getReg(), Addr.getOffset());
return true;
}
-// Materialize a constant into a register, and return the register
-// number (or zero if we failed to handle it).
-unsigned MipsFastISel::TargetMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
-
- // Only handle simple types.
- if (!CEVT.isSimple())
- return 0;
- MVT VT = CEVT.getSimpleVT();
-
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return MaterializeFP(CFP, VT);
- else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return MaterializeGV(GV, VT);
- else if (isa<ConstantInt>(C))
- return MaterializeInt(C, VT);
-
- return 0;
-}
-
-bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment) {
//
// more cases will be handled here in following patches.
@@ -228,19 +540,23 @@ bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
Opc = Mips::SW;
break;
case MVT::f32:
+ if (UnsupportedFPMode)
+ return false;
Opc = Mips::SWC1;
break;
case MVT::f64:
+ if (UnsupportedFPMode)
+ return false;
Opc = Mips::SDC1;
break;
default:
return false;
}
- EmitInstStore(Opc, SrcReg, Addr.Base.Reg, Addr.Offset);
+ emitInstStore(Opc, SrcReg, Addr.getReg(), Addr.getOffset());
return true;
}
-bool MipsFastISel::SelectLoad(const Instruction *I) {
+bool MipsFastISel::selectLoad(const Instruction *I) {
// Atomic loads need special handling.
if (cast<LoadInst>(I)->isAtomic())
return false;
@@ -252,17 +568,17 @@ bool MipsFastISel::SelectLoad(const Instruction *I) {
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(0), Addr))
+ if (!computeAddress(I->getOperand(0), Addr))
return false;
unsigned ResultReg;
- if (!EmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ if (!emitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
-bool MipsFastISel::SelectStore(const Instruction *I) {
+bool MipsFastISel::selectStore(const Instruction *I) {
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
@@ -282,15 +598,394 @@ bool MipsFastISel::SelectStore(const Instruction *I) {
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(1), Addr))
+ if (!computeAddress(I->getOperand(1), Addr))
return false;
- if (!EmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ if (!emitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
return false;
return true;
}
-bool MipsFastISel::SelectRet(const Instruction *I) {
+//
+// This can cause a redundant sltiu to be generated.
+// FIXME: try and eliminate this in a future patch.
+//
+bool MipsFastISel::selectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *BrBB = FuncInfo.MBB;
+ //
+ // TBB is the basic block for the case where the comparison is true.
+ // FBB is the basic block for the case where the comparison is false.
+ // if (cond) goto TBB
+ // goto FBB
+ // TBB:
+ //
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+ BI->getCondition();
+ // For now, just try the simplest case where it's fed by a compare.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ unsigned CondReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitCmp(CondReg, CI))
+ return false;
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
+ .addReg(CondReg)
+ .addMBB(TBB);
+ fastEmitBranch(FBB, DbgLoc);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ return false;
+}
+
+bool MipsFastISel::selectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitCmp(ResultReg, CI))
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point extend instruction.
+bool MipsFastISel::selectFPExt(const Instruction *I) {
+ if (UnsupportedFPMode)
+ return false;
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg =
+ getRegForValue(Src); // his must be a 32 bit floating point register class
+ // maybe we should handle this differently
+ if (!SrcReg)
+ return false;
+
+ unsigned DestReg = createResultReg(&Mips::AFGR64RegClass);
+ emitInst(Mips::CVT_D32_S, DestReg).addReg(SrcReg);
+ updateValueMap(I, DestReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool MipsFastISel::selectFPTrunc(const Instruction *I) {
+ if (UnsupportedFPMode)
+ return false;
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ unsigned DestReg = createResultReg(&Mips::FGR32RegClass);
+ if (!DestReg)
+ return false;
+
+ emitInst(Mips::CVT_S_D32, DestReg).addReg(SrcReg);
+ updateValueMap(I, DestReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) {
+ if (UnsupportedFPMode)
+ return false;
+ MVT DstVT, SrcVT;
+ if (!IsSigned)
+ return false; // We don't handle this case yet. There is no native
+ // instruction for this but it can be synthesized.
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+ unsigned TempReg = createResultReg(&Mips::FGR32RegClass);
+ unsigned Opc;
+
+ if (SrcVT == MVT::f32)
+ Opc = Mips::TRUNC_W_S;
+ else
+ Opc = Mips::TRUNC_W_D32;
+
+ // Generate the convert.
+ emitInst(Opc, TempReg).addReg(SrcReg);
+
+ emitInst(Mips::MFC1, DestReg).addReg(TempReg);
+
+ updateValueMap(I, DestReg);
+ return true;
+}
+//
+bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
+ SmallVectorImpl<MVT> &OutVTs,
+ unsigned &NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+ // This is the minimum argument area used for A0-A3.
+ if (NumBytes < 16)
+ NumBytes = 16;
+
+ emitInst(Mips::ADJCALLSTACKDOWN).addImm(16);
+ // Process the args.
+ MVT firstMVT;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ const Value *ArgVal = CLI.OutVals[VA.getValNo()];
+ MVT ArgVT = OutVTs[VA.getValNo()];
+
+ if (i == 0) {
+ firstMVT = ArgVT;
+ if (ArgVT == MVT::f32) {
+ VA.convertToReg(Mips::F12);
+ } else if (ArgVT == MVT::f64) {
+ VA.convertToReg(Mips::D6);
+ }
+ } else if (i == 1) {
+ if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) {
+ if (ArgVT == MVT::f32) {
+ VA.convertToReg(Mips::F14);
+ } else if (ArgVT == MVT::f64) {
+ VA.convertToReg(Mips::D7);
+ }
+ }
+ }
+ if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32)) && VA.isMemLoc()) {
+ switch (VA.getLocMemOffset()) {
+ case 0:
+ VA.convertToReg(Mips::A0);
+ break;
+ case 4:
+ VA.convertToReg(Mips::A1);
+ break;
+ case 8:
+ VA.convertToReg(Mips::A2);
+ break;
+ case 12:
+ VA.convertToReg(Mips::A3);
+ break;
+ default:
+ break;
+ }
+ }
+ unsigned ArgReg = getRegForValue(ArgVal);
+ if (!ArgReg)
+ return false;
+
+ // Handle arg promotion: SExt, ZExt, AExt.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::AExt:
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ MVT SrcVT = ArgVT;
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
+ if (!ArgReg)
+ return false;
+ break;
+ }
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ MVT SrcVT = ArgVT;
+ ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
+ if (!ArgReg)
+ return false;
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
+ CLI.OutRegs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ llvm_unreachable("Mips does not use custom args.");
+ return false;
+ } else {
+ //
+ // FIXME: This path will currently return false. It was copied
+ // from the AArch64 port and should be essentially fine for Mips too.
+ // The work to finish up this path will be done in a follow-on patch.
+ //
+ assert(VA.isMemLoc() && "Assuming store on stack.");
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
+ // Need to store on the stack.
+ // FIXME: This alignment is incorrect but this path is disabled
+ // for now (will return false). We need to determine the right alignment
+ // based on the normal alignment for the underlying machine type.
+ //
+ unsigned ArgSize = RoundUpToAlignment(ArgVT.getSizeInBits(), 4);
+
+ unsigned BEAlign = 0;
+ if (ArgSize < 8 && !Subtarget->isLittle())
+ BEAlign = 8 - ArgSize;
+
+ Address Addr;
+ Addr.setKind(Address::RegBase);
+ Addr.setReg(Mips::SP);
+ Addr.setOffset(VA.getLocMemOffset() + BEAlign);
+
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+ (void)(MMO);
+ // if (!emitStore(ArgVT, ArgReg, Addr, MMO))
+ return false; // can't store on the stack yet.
+ }
+ }
+
+ return true;
+}
+
+bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
+ unsigned NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+ emitInst(Mips::ADJCALLSTACKUP).addImm(16);
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_Mips);
+
+ // Only handle a single return value.
+ if (RVLocs.size() != 1)
+ return false;
+ // Copy all of the result registers out of their specified physreg.
+ MVT CopyVT = RVLocs[0].getValVT();
+ // Special handling for extended integers.
+ if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+ CopyVT = MVT::i32;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ CLI.InRegs.push_back(RVLocs[0].getLocReg());
+
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = 1;
+ }
+ return true;
+}
+
+bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ // const char *SymName = CLI.SymName;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (IsVarArg)
+ return false;
+
+ // FIXME: Only handle *simple* calls for now.
+ MVT RetVT;
+ if (CLI.RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(CLI.RetTy, RetVT))
+ return false;
+
+ for (auto Flag : CLI.OutFlags)
+ if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<MVT, 16> OutVTs;
+ OutVTs.reserve(CLI.OutVals.size());
+
+ for (auto *Val : CLI.OutVals) {
+ MVT VT;
+ if (!isTypeLegal(Val->getType(), VT) &&
+ !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
+ return false;
+
+ // We don't handle vector parameters yet.
+ if (VT.isVector() || VT.getSizeInBits() > 64)
+ return false;
+
+ OutVTs.push_back(VT);
+ }
+
+ Address Addr;
+ if (!computeCallAddress(Callee, Addr))
+ return false;
+
+ // Handle the arguments now that we've gotten them.
+ unsigned NumBytes;
+ if (!processCallArgs(CLI, OutVTs, NumBytes))
+ return false;
+
+ // Issue the call.
+ unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32);
+ emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress);
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR),
+ Mips::RA).addReg(Mips::T9);
+
+ // Add implicit physical register uses to the call.
+ for (auto Reg : CLI.OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ CLI.Call = MIB;
+
+ // Add implicit physical register uses to the call.
+ for (auto Reg : CLI.OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ CLI.Call = MIB;
+ // Finish off the call including any return values.
+ return finishCall(CLI, RetVT, NumBytes);
+}
+
+bool MipsFastISel::selectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
if (!FuncInfo.CanLowerReturn)
@@ -298,98 +993,181 @@ bool MipsFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
return false;
}
- EmitInst(Mips::RetRA);
+ emitInst(Mips::RetRA);
return true;
}
-bool MipsFastISel::TargetSelectInstruction(const Instruction *I) {
- if (!TargetSupported)
+bool MipsFastISel::selectTrunc(const Instruction *I) {
+ // The high bits for a type smaller than the register size are assumed to be
+ // undefined.
+ Value *Op = I->getOperand(0);
+
+ EVT SrcVT, DestVT;
+ SrcVT = TLI.getValueType(Op->getType(), true);
+ DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
- switch (I->getOpcode()) {
+ if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Op);
+ if (!SrcReg)
+ return false;
+
+ // Because the high bits are undefined, a truncate doesn't generate
+ // any code.
+ updateValueMap(I, SrcReg);
+ return true;
+}
+bool MipsFastISel::selectIntExt(const Instruction *I) {
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool isZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ if (!DestEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+
+ if (!emitIntExt(SrcVT, SrcReg, DestVT, ResultReg, isZExt))
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+}
+bool MipsFastISel::emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ unsigned ShiftAmt;
+ switch (SrcVT.SimpleTy) {
default:
+ return false;
+ case MVT::i8:
+ ShiftAmt = 24;
+ break;
+ case MVT::i16:
+ ShiftAmt = 16;
break;
- case Instruction::Load:
- return SelectLoad(I);
- case Instruction::Store:
- return SelectStore(I);
- case Instruction::Ret:
- return SelectRet(I);
}
- return false;
-}
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ emitInst(Mips::SLL, TempReg).addReg(SrcReg).addImm(ShiftAmt);
+ emitInst(Mips::SRA, DestReg).addReg(TempReg).addImm(ShiftAmt);
+ return true;
}
-unsigned MipsFastISel::MaterializeFP(const ConstantFP *CFP, MVT VT) {
- int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- if (VT == MVT::f32) {
- const TargetRegisterClass *RC = &Mips::FGR32RegClass;
- unsigned DestReg = createResultReg(RC);
- unsigned TempReg = Materialize32BitInt(Imm, &Mips::GPR32RegClass);
- EmitInst(Mips::MTC1, DestReg).addReg(TempReg);
- return DestReg;
- } else if (VT == MVT::f64) {
- const TargetRegisterClass *RC = &Mips::AFGR64RegClass;
- unsigned DestReg = createResultReg(RC);
- unsigned TempReg1 = Materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass);
- unsigned TempReg2 =
- Materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass);
- EmitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1);
- return DestReg;
+bool MipsFastISel::emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ switch (SrcVT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i8:
+ emitInst(Mips::SEB, DestReg).addReg(SrcReg);
+ break;
+ case MVT::i16:
+ emitInst(Mips::SEH, DestReg).addReg(SrcReg);
+ break;
}
- return 0;
+ return true;
}
-unsigned MipsFastISel::MaterializeGV(const GlobalValue *GV, MVT VT) {
- // For now 32-bit only.
- if (VT != MVT::i32)
- return 0;
- const TargetRegisterClass *RC = &Mips::GPR32RegClass;
- unsigned DestReg = createResultReg(RC);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- bool IsThreadLocal = GVar && GVar->isThreadLocal();
- // TLS not supported at this time.
- if (IsThreadLocal)
- return 0;
- EmitInst(Mips::LW, DestReg).addReg(MFI->getGlobalBaseReg()).addGlobalAddress(
- GV, 0, MipsII::MO_GOT);
- return DestReg;
+bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ if ((DestVT != MVT::i32) && (DestVT != MVT::i16))
+ return false;
+ if (Subtarget->hasMips32r2())
+ return emitIntSExt32r2(SrcVT, SrcReg, DestVT, DestReg);
+ return emitIntSExt32r1(SrcVT, SrcReg, DestVT, DestReg);
}
-unsigned MipsFastISel::MaterializeInt(const Constant *C, MVT VT) {
- if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
- return 0;
- const TargetRegisterClass *RC = &Mips::GPR32RegClass;
- const ConstantInt *CI = cast<ConstantInt>(C);
- int64_t Imm;
- if (CI->isNegative())
- Imm = CI->getSExtValue();
- else
- Imm = CI->getZExtValue();
- return Materialize32BitInt(Imm, RC);
+
+bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg) {
+ switch (SrcVT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(1);
+ break;
+ case MVT::i8:
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xff);
+ break;
+ case MVT::i16:
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xffff);
+ break;
+ }
+ return true;
}
-unsigned MipsFastISel::Materialize32BitInt(int64_t Imm,
- const TargetRegisterClass *RC) {
- unsigned ResultReg = createResultReg(RC);
+bool MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt) {
+ if (IsZExt)
+ return emitIntZExt(SrcVT, SrcReg, DestVT, DestReg);
+ return emitIntSExt(SrcVT, SrcReg, DestVT, DestReg);
+}
- if (isInt<16>(Imm)) {
- unsigned Opc = Mips::ADDiu;
- EmitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm);
- return ResultReg;
- } else if (isUInt<16>(Imm)) {
- EmitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm);
- return ResultReg;
+unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ bool isZExt) {
+ unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+ return emitIntExt(SrcVT, SrcReg, DestVT, DestReg, isZExt);
+}
+
+bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
+ if (!TargetSupported)
+ return false;
+ switch (I->getOpcode()) {
+ default:
+ break;
+ case Instruction::Load:
+ return selectLoad(I);
+ case Instruction::Store:
+ return selectStore(I);
+ case Instruction::Br:
+ return selectBranch(I);
+ case Instruction::Ret:
+ return selectRet(I);
+ case Instruction::Trunc:
+ return selectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return selectIntExt(I);
+ case Instruction::FPTrunc:
+ return selectFPTrunc(I);
+ case Instruction::FPExt:
+ return selectFPExt(I);
+ case Instruction::FPToSI:
+ return selectFPToInt(I, /*isSigned*/ true);
+ case Instruction::FPToUI:
+ return selectFPToInt(I, /*isSigned*/ false);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return selectCmp(I);
}
- unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
- if (Lo) {
- // Both Lo and Hi have nonzero bits.
- unsigned TmpReg = createResultReg(RC);
- EmitInst(Mips::LUi, TmpReg).addImm(Hi);
- EmitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo);
- } else {
- EmitInst(Mips::LUi, ResultReg).addImm(Hi);
+ return false;
+}
+
+unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
+ bool IsUnsigned) {
+ unsigned VReg = getRegForValue(V);
+ if (VReg == 0)
+ return 0;
+ MVT VMVT = TLI.getValueType(V->getType(), true).getSimpleVT();
+ if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) {
+ unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned))
+ return 0;
+ VReg = TempReg;
}
- return ResultReg;
+ return VReg;
}
namespace llvm {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 8ba35fa..3014a0d 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -82,9 +82,8 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
-const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM,
- const MipsSubtarget &ST) {
- if (TM.getSubtargetImpl()->inMips16Mode())
+const MipsFrameLowering *MipsFrameLowering::create(const MipsSubtarget &ST) {
+ if (ST.inMips16Mode())
return llvm::createMips16FrameLowering(ST);
return llvm::createMipsSEFrameLowering(ST);
@@ -101,7 +100,7 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
int64_t Offset = 0;
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 8e9196c..90a8d2a 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS_FRAMEINFO_H
-#define MIPS_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSFRAMELOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSFRAMELOWERING_H
#include "Mips.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -28,8 +28,7 @@ public:
explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment)
: TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {}
- static const MipsFrameLowering *create(MipsTargetMachine &TM,
- const MipsSubtarget &ST);
+ static const MipsFrameLowering *create(const MipsSubtarget &ST);
bool hasFP(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 2a6c875..ff8760d 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSISELDAGTODAG_H
-#define MIPSISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPSISELDAGTODAG_H
#include "Mips.h"
#include "MipsSubtarget.h"
@@ -32,7 +32,7 @@ namespace llvm {
class MipsDAGToDAGISel : public SelectionDAGISel {
public:
explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
- : SelectionDAGISel(TM), Subtarget(&TM.getSubtarget<MipsSubtarget>()) {}
+ : SelectionDAGISel(TM), Subtarget(nullptr) {}
// Pass Name
const char *getPassName() const override {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b7af2d4..ff2bfb3 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -14,6 +14,7 @@
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsCCState.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
@@ -24,6 +25,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -56,15 +58,6 @@ EnableMipsFastISel("mips-fast-isel", cl::Hidden,
cl::desc("Allow mips-fast-isel to be used"),
cl::init(false));
-static const MCPhysReg O32IntRegs[4] = {
- Mips::A0, Mips::A1, Mips::A2, Mips::A3
-};
-
-static const MCPhysReg Mips64IntRegs[8] = {
- Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
- Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64
-};
-
static const MCPhysReg Mips64DPRegs[8] = {
Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@@ -208,16 +201,16 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
- : TargetLowering(TM, new MipsTargetObjectFile()),
- Subtarget(&TM.getSubtarget<MipsSubtarget>()) {
+MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// The cmp.cond.fmt instruction in MIPS32r6/MIPS64r6 uses 0 and -1 like MSA
// does. Integer booleans still use 0 and 1.
- if (Subtarget->hasMips32r6())
+ if (Subtarget.hasMips32r6())
setBooleanContents(ZeroOrOneBooleanContent,
ZeroOrNegativeOneBooleanContent);
@@ -251,12 +244,11 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- if (Subtarget->isGP64bit()) {
+ if (Subtarget.isGP64bit()) {
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
@@ -268,14 +260,14 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
}
- if (!Subtarget->isGP64bit()) {
+ if (!Subtarget.isGP64bit()) {
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
setOperationAction(ISD::ADD, MVT::i32, Custom);
- if (Subtarget->isGP64bit())
+ if (Subtarget.isGP64bit())
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Expand);
@@ -299,7 +291,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (Subtarget->hasCnMips()) {
+ if (Subtarget.hasCnMips()) {
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
} else {
@@ -317,10 +309,10 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- if (!Subtarget->hasMips32r2())
+ if (!Subtarget.hasMips32r2())
setOperationAction(ISD::ROTR, MVT::i32, Expand);
- if (!Subtarget->hasMips64r2())
+ if (!Subtarget.hasMips64r2())
setOperationAction(ISD::ROTR, MVT::i64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
@@ -343,7 +335,8 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
@@ -358,23 +351,23 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setInsertFencesForAtomic(true);
- if (!Subtarget->hasMips32r2()) {
+ if (!Subtarget.hasMips32r2()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
}
// MIPS16 lacks MIPS32's clz and clo instructions.
- if (!Subtarget->hasMips32() || Subtarget->inMips16Mode())
+ if (!Subtarget.hasMips32() || Subtarget.inMips16Mode())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
- if (!Subtarget->hasMips64())
+ if (!Subtarget.hasMips64())
setOperationAction(ISD::CTLZ, MVT::i64, Expand);
- if (!Subtarget->hasMips32r2())
+ if (!Subtarget.hasMips32r2())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- if (!Subtarget->hasMips64r2())
+ if (!Subtarget.hasMips64r2())
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
- if (Subtarget->isGP64bit()) {
+ if (Subtarget.isGP64bit()) {
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom);
@@ -390,24 +383,30 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
- setMinFunctionAlignment(Subtarget->isGP64bit() ? 3 : 2);
+ setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2);
+
+ // The arguments on the stack are defined in terms of 4-byte slots on O32
+ // and 8-byte slots on N32/N64.
+ setMinStackArgumentAlignment(
+ (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4);
- setStackPointerRegisterToSaveRestore(Subtarget->isABI_N64() ? Mips::SP_64
- : Mips::SP);
+ setStackPointerRegisterToSaveRestore(Subtarget.isABI_N64() ? Mips::SP_64
+ : Mips::SP);
- setExceptionPointerRegister(Subtarget->isABI_N64() ? Mips::A0_64 : Mips::A0);
- setExceptionSelectorRegister(Subtarget->isABI_N64() ? Mips::A1_64 : Mips::A1);
+ setExceptionPointerRegister(Subtarget.isABI_N64() ? Mips::A0_64 : Mips::A0);
+ setExceptionSelectorRegister(Subtarget.isABI_N64() ? Mips::A1_64 : Mips::A1);
MaxStoresPerMemcpy = 16;
- isMicroMips = Subtarget->inMicroMipsMode();
+ isMicroMips = Subtarget.inMicroMipsMode();
}
-const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
- if (TM.getSubtargetImpl()->inMips16Mode())
- return llvm::createMips16TargetLowering(TM);
+const MipsTargetLowering *MipsTargetLowering::create(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI) {
+ if (STI.inMips16Mode())
+ return llvm::createMips16TargetLowering(TM, STI);
- return llvm::createMipsSETargetLowering(TM);
+ return llvm::createMipsSETargetLowering(TM, STI);
}
// Create a fast isel object.
@@ -427,7 +426,7 @@ EVT MipsTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -537,7 +536,7 @@ static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -616,11 +615,11 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
// Pattern match EXT.
// $dst = and ((sra or srl) $src , pos), (2**size - 1)
// => ext $dst, $src, size, pos
- if (DCI.isBeforeLegalizeOps() || !Subtarget->hasExtractInsert())
+ if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert())
return SDValue();
SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
@@ -656,12 +655,12 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
// Pattern match INS.
// $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
// where mask1 = (2**size - 1) << pos, mask0 = ~mask1
// => ins $dst, $src, size, pos, $src1
- if (DCI.isBeforeLegalizeOps() || !Subtarget->hasExtractInsert())
+ if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert())
return SDValue();
SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
@@ -710,7 +709,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
// (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
if (DCI.isBeforeLegalizeOps())
@@ -791,6 +790,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
case ISD::SETCC: return lowerSETCC(Op, DAG);
case ISD::VASTART: return lowerVASTART(Op, DAG);
+ case ISD::VAARG: return lowerVAARG(Op, DAG);
case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
@@ -933,16 +933,16 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::DIVU:
case Mips::MOD:
case Mips::MODU:
- return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
- false);
+ return insertDivByZeroTrap(
+ MI, *BB, *getTargetMachine().getSubtargetImpl()->getInstrInfo(), false);
case Mips::PseudoDSDIV:
case Mips::PseudoDUDIV:
case Mips::DDIV:
case Mips::DDIVU:
case Mips::DMOD:
case Mips::DMODU:
- return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
- true);
+ return insertDivByZeroTrap(
+ MI, *BB, *getTargetMachine().getSubtargetImpl()->getInstrInfo(), true);
case Mips::SEL_D:
return emitSEL_D(MI, BB);
}
@@ -959,7 +959,8 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, AND, NOR, ZERO, BEQ;
@@ -968,16 +969,16 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
LL = Mips::LL_MM;
SC = Mips::SC_MM;
} else {
- LL = Subtarget->hasMips32r6() ? Mips::LL : Mips::LL_R6;
- SC = Subtarget->hasMips32r6() ? Mips::SC : Mips::SC_R6;
+ LL = Subtarget.hasMips32r6() ? Mips::LL_R6 : Mips::LL;
+ SC = Subtarget.hasMips32r6() ? Mips::SC_R6 : Mips::SC;
}
AND = Mips::AND;
NOR = Mips::NOR;
ZERO = Mips::ZERO;
BEQ = Mips::BEQ;
} else {
- LL = Subtarget->hasMips64r6() ? Mips::LLD : Mips::LLD_R6;
- SC = Subtarget->hasMips64r6() ? Mips::SCD : Mips::SCD_R6;
+ LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
+ SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
AND = Mips::AND64;
NOR = Mips::NOR64;
ZERO = Mips::ZERO_64;
@@ -1042,15 +1043,16 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg(
MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg,
unsigned SrcReg) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- if (Subtarget->hasMips32r2() && Size == 1) {
+ if (Subtarget.hasMips32r2() && Size == 1) {
BuildMI(BB, DL, TII->get(Mips::SEB), DstReg).addReg(SrcReg);
return BB;
}
- if (Subtarget->hasMips32r2() && Size == 2) {
+ if (Subtarget.hasMips32r2() && Size == 2) {
BuildMI(BB, DL, TII->get(Mips::SEH), DstReg).addReg(SrcReg);
return BB;
}
@@ -1078,7 +1080,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
@@ -1140,7 +1143,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
.addReg(Ptr).addReg(MaskLSB2);
BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
- if (Subtarget->isLittle()) {
+ if (Subtarget.isLittle()) {
BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
} else {
unsigned Off = RegInfo.createVirtualRegister(RC);
@@ -1228,7 +1231,8 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, ZERO, BNE, BEQ;
@@ -1310,7 +1314,8 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
@@ -1380,7 +1385,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
.addReg(Ptr).addReg(MaskLSB2);
BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
- if (Subtarget->isLittle()) {
+ if (Subtarget.isLittle()) {
BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
} else {
unsigned Off = RegInfo.createVirtualRegister(RC);
@@ -1445,8 +1450,10 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock::iterator II(MI);
@@ -1487,11 +1494,11 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
MachinePointerInfo::getJumpTable(), MemVT, false, false,
- 0);
+ false, 0);
Chain = Addr.getValue(1);
if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) ||
- Subtarget->isABI_N64()) {
+ Subtarget.isABI_N64()) {
// For PIC, the sequence is:
// BRIND(load(Jumptable + index) + RelocBase)
// RelocBase can be JumpTable, GOT or some sort of global base.
@@ -1509,7 +1516,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(2);
SDLoc DL(Op);
- assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
+ assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6());
SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
// Return if flag is not set by a floating point comparison.
@@ -1529,7 +1536,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsTargetLowering::
lowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
- assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
+ assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
// Return if flag is not set by a floating point comparison.
@@ -1555,7 +1562,7 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
}
SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
+ assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op);
assert(Cond.getOpcode() == MipsISD::FPCmp &&
@@ -1569,26 +1576,18 @@ SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- // FIXME there isn't actually debug info here
- SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = N->getGlobal();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
- !Subtarget->isABI_N64()) {
+ !Subtarget.isABI_N64()) {
const MipsTargetObjectFile &TLOF =
(const MipsTargetObjectFile&)getObjFileLowering();
- // %gp_rel relocation
- if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
- SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
- MipsII::MO_GPREL);
- SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL,
- DAG.getVTList(MVT::i32), GA);
- SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
- return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode);
- }
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine()))
+ // %gp_rel relocation
+ return getAddrGPRel(N, Ty, DAG);
// %hi/%lo relocation
return getAddrNonPIC(N, Ty, DAG);
@@ -1596,7 +1595,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
return getAddrLocal(N, Ty, DAG,
- Subtarget->isABI_N32() || Subtarget->isABI_N64());
+ Subtarget.isABI_N32() || Subtarget.isABI_N64());
if (LargeGOT)
return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
@@ -1604,7 +1603,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
MachinePointerInfo::getGOT());
return getAddrGlobal(N, Ty, DAG,
- (Subtarget->isABI_N32() || Subtarget->isABI_N64())
+ (Subtarget.isABI_N32() || Subtarget.isABI_N64())
? MipsII::MO_GOT_DISP
: MipsII::MO_GOT16,
DAG.getEntryNode(), MachinePointerInfo::getGOT());
@@ -1616,11 +1615,11 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
EVT Ty = Op.getValueType();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
- !Subtarget->isABI_N64())
+ !Subtarget.isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
return getAddrLocal(N, Ty, DAG,
- Subtarget->isABI_N32() || Subtarget->isABI_N64());
+ Subtarget.isABI_N32() || Subtarget.isABI_N64());
}
SDValue MipsTargetLowering::
@@ -1709,34 +1708,33 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
EVT Ty = Op.getValueType();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
- !Subtarget->isABI_N64())
+ !Subtarget.isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
return getAddrLocal(N, Ty, DAG,
- Subtarget->isABI_N32() || Subtarget->isABI_N64());
+ Subtarget.isABI_N32() || Subtarget.isABI_N64());
}
SDValue MipsTargetLowering::
lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
{
- // gp_rel relocation
- // FIXME: we should reference the constant pool using small data sections,
- // but the asm printer currently doesn't support this feature without
- // hacking it. This feature should come soon so we can uncomment the
- // stuff below.
- //if (IsInSmallSection(C->getType())) {
- // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
- // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
- // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
EVT Ty = Op.getValueType();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
- !Subtarget->isABI_N64())
+ !Subtarget.isABI_N64()) {
+ const MipsTargetObjectFile &TLOF =
+ (const MipsTargetObjectFile&)getObjFileLowering();
+
+ if (TLOF.IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
+ // %gp_rel relocation
+ return getAddrGPRel(N, Ty, DAG);
+
return getAddrNonPIC(N, Ty, DAG);
+ }
return getAddrLocal(N, Ty, DAG,
- Subtarget->isABI_N32() || Subtarget->isABI_N64());
+ Subtarget.isABI_N32() || Subtarget.isABI_N64());
}
SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -1754,6 +1752,65 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV), false, false, 0);
}
+SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ SDValue Chain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ SDLoc DL(Node);
+ unsigned ArgSlotSizeInBytes =
+ (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? 8 : 4;
+
+ SDValue VAListLoad = DAG.getLoad(getPointerTy(), DL, Chain, VAListPtr,
+ MachinePointerInfo(SV), false, false, false,
+ 0);
+ SDValue VAList = VAListLoad;
+
+ // Re-align the pointer if necessary.
+ // It should only ever be necessary for 64-bit types on O32 since the minimum
+ // argument alignment is the same as the maximum type alignment for N32/N64.
+ //
+ // FIXME: We currently align too often. The code generator doesn't notice
+ // when the pointer is still aligned from the last va_arg (or pair of
+ // va_args for the i64 on O32 case).
+ if (Align > getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(Align - 1,
+ VAList.getValueType()));
+
+ VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(-(int64_t)Align,
+ VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg.
+ unsigned ArgSizeInBytes = getDataLayout()->getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
+ DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes, ArgSlotSizeInBytes),
+ VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr,
+ MachinePointerInfo(SV), false, false, 0);
+
+ // In big-endian mode we must adjust the pointer when the load size is smaller
+ // than the argument slot size. We must also reduce the known alignment to
+ // match. For example in the N64 ABI, we must add 4 bytes to the offset to get
+ // the correct half of the slot, and reduce the alignment from 8 (slot
+ // alignment) down to 4 (type alignment).
+ if (!Subtarget.isLittle() && ArgSizeInBytes < ArgSlotSizeInBytes) {
+ unsigned Adjustment = ArgSlotSizeInBytes - ArgSizeInBytes;
+ VAList = DAG.getNode(ISD::ADD, DL, VAListPtr.getValueType(), VAList,
+ DAG.getIntPtrConstant(Adjustment));
+ }
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo(), false, false,
+ false, 0);
+}
+
static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG,
bool HasExtractInsert) {
EVT TyX = Op.getOperand(0).getValueType();
@@ -1851,10 +1908,10 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
SDValue
MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
- if (Subtarget->isGP64bit())
- return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasExtractInsert());
+ if (Subtarget.isGP64bit())
+ return lowerFCOPYSIGN64(Op, DAG, Subtarget.hasExtractInsert());
- return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasExtractInsert());
+ return lowerFCOPYSIGN32(Op, DAG, Subtarget.hasExtractInsert());
}
SDValue MipsTargetLowering::
@@ -1869,7 +1926,7 @@ lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL,
- Subtarget->isABI_N64() ? Mips::FP_64 : Mips::FP, VT);
+ Subtarget.isABI_N64() ? Mips::FP_64 : Mips::FP, VT);
return FrameAddr;
}
@@ -1885,7 +1942,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MVT VT = Op.getSimpleValueType();
- unsigned RA = Subtarget->isABI_N64() ? Mips::RA_64 : Mips::RA;
+ unsigned RA = Subtarget.isABI_N64() ? Mips::RA_64 : Mips::RA;
MFI->setReturnAddressIsTaken(true);
// Return RA, which contains the return address. Mark it an implicit live-in.
@@ -1907,12 +1964,12 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
SDLoc DL(Op);
- EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32;
+ EVT Ty = Subtarget.isABI_N64() ? MVT::i64 : MVT::i32;
// Store stack offset in V1, store jump target in V0. Glue CopyToReg and
// EH_RETURN nodes, so that instructions are emitted back-to-back.
- unsigned OffsetReg = Subtarget->isABI_N64() ? Mips::V1_64 : Mips::V1;
- unsigned AddrReg = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0;
+ unsigned OffsetReg = Subtarget.isABI_N64() ? Mips::V1_64 : Mips::V1;
+ unsigned AddrReg = Subtarget.isABI_N64() ? Mips::V0_64 : Mips::V0;
Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
@@ -2025,7 +2082,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
- if (Subtarget->systemSupportsUnalignedAccess())
+ if (Subtarget.systemSupportsUnalignedAccess())
return Op;
// Return if load is aligned or if MemVT is neither i32 nor i64.
@@ -2033,7 +2090,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
((MemVT != MVT::i32) && (MemVT != MVT::i64)))
return SDValue();
- bool IsLittle = Subtarget->isLittle();
+ bool IsLittle = Subtarget.isLittle();
EVT VT = Op.getValueType();
ISD::LoadExtType ExtType = LD->getExtensionType();
SDValue Chain = LD->getChain(), Undef = DAG.getUNDEF(VT);
@@ -2151,10 +2208,10 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = SD->getMemoryVT();
// Lower unaligned integer stores.
- if (!Subtarget->systemSupportsUnalignedAccess() &&
+ if (!Subtarget.systemSupportsUnalignedAccess() &&
(SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
((MemVT == MVT::i32) || (MemVT == MVT::i64)))
- return lowerUnalignedIntStore(SD, DAG, Subtarget->isLittle());
+ return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle());
return lowerFP_TO_SINT_STORE(SD, DAG);
}
@@ -2201,7 +2258,7 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
// an argument. Otherwise, passed in A1, A2, A3 and stack.
// f64 - Only passed in two aliased f32 registers if no int reg has been used
// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
-// not used, it must be shadowed. If only A3 is avaiable, shadow it and
+// not used, it must be shadowed. If only A3 is available, shadow it and
// go to stack.
//
// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
@@ -2299,6 +2356,10 @@ static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT,
return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
}
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) LLVM_ATTRIBUTE_UNUSED;
+
#include "MipsGenCallingConv.inc"
//===----------------------------------------------------------------------===//
@@ -2333,15 +2394,21 @@ void MipsTargetLowering::
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const {
// Insert node "GP copy globalreg" before call to function.
//
// R_MIPS_CALL* operators (emitted when non-internal functions are called
// in PIC mode) allow symbols to be resolved via lazy binding.
// The lazy binding stub requires GP to point to the GOT.
- if (IsPICCall && !InternalLinkage) {
- unsigned GPReg = Subtarget->isABI_N64() ? Mips::GP_64 : Mips::GP;
- EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32;
+ // Note that we don't need GP to point to the GOT for indirect calls
+ // (when R_MIPS_CALL* is not used for the call) because Mips linker generates
+ // lazy binding stub for a function only when R_MIPS_CALL* are the only relocs
+ // used for the function (that is, Mips linker doesn't generate lazy binding
+ // stub for a function whose address is taken in the program).
+ if (IsPICCall && !InternalLinkage && IsCallReloc) {
+ unsigned GPReg = Subtarget.isABI_N64() ? Mips::GP_64 : Mips::GP;
+ EVT Ty = Subtarget.isABI_N64() ? MVT::i64 : MVT::i32;
RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
}
@@ -2364,10 +2431,11 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
- if (Subtarget->inMips16HardFloat()) {
+ if (Subtarget.inMips16HardFloat()) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
llvm::StringRef Sym = G->getGlobal()->getName();
Function *F = G->getGlobal()->getParent()->getFunction(Sym);
@@ -2400,31 +2468,30 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC::SpecialCallingConvType SpecialCallingConv =
- getSpecialCallingConv(Callee);
- MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
- CCInfo, SpecialCallingConv);
+ MipsCCState CCInfo(
+ CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(),
+ MipsCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget));
- MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
- Subtarget->mipsSEUsesSoftFloat(),
- Callee.getNode(), CLI.getArgs());
+ // Allocate the reserved argument area. It seems strange to do this from the
+ // caller side but removing it breaks the frame size calculation.
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode());
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
// Check if it's really possible to do a tail call.
if (IsTailCall)
- IsTailCall =
- isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
- *MF.getInfo<MipsFunctionInfo>());
+ IsTailCall = isEligibleForTailCallOptimization(
+ CCInfo, NextStackOffset, *MF.getInfo<MipsFunctionInfo>());
if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
@@ -2444,13 +2511,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL);
SDValue StackPtr = DAG.getCopyFromReg(
- Chain, DL, Subtarget->isABI_N64() ? Mips::SP_64 : Mips::SP,
+ Chain, DL, Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP,
getPointerTy());
// With EABI is it possible to have 16 args on registers.
std::deque< std::pair<unsigned, SDValue> > RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
+
+ CCInfo.rewindByValRegsInfo();
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -2458,23 +2526,30 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCValAssign &VA = ArgLocs[i];
MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ bool UseUpperBits = false;
// ByVal Arg.
if (Flags.isByVal()) {
+ unsigned FirstByValReg, LastByValReg;
+ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
+ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
+
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
- assert(ByValArg != MipsCCInfo.byval_end());
+ assert(ByValIdx < CCInfo.getInRegsParamsCount());
assert(!IsTailCall &&
"Do not tail-call optimize if there is a byval argument.");
passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
- MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle());
- ++ByValArg;
+ FirstByValReg, LastByValReg, Flags, Subtarget.isLittle(),
+ VA);
+ CCInfo.nextInRegsParam();
continue;
}
// Promote the value if needed.
switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
+ default:
+ llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
if (VA.isRegLoc()) {
if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
@@ -2486,7 +2561,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Arg, DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Arg, DAG.getConstant(1, MVT::i32));
- if (!Subtarget->isLittle())
+ if (!Subtarget.isLittle())
std::swap(Lo, Hi);
unsigned LocRegLo = VA.getLocReg();
unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
@@ -2496,17 +2571,37 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
+ break;
+ case CCValAssign::SExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
break;
+ case CCValAssign::ZExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
break;
+ case CCValAssign::AExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
break;
}
+ if (UseUpperBits) {
+ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ Arg = DAG.getNode(
+ ISD::SHL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ }
+
// Arguments that can be passed on register must be kept at
// RegsToPass vector
if (VA.isRegLoc()) {
@@ -2532,9 +2627,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
bool IsPICCall =
- (Subtarget->isABI_N64() || IsPIC); // true if calls are translated to
+ (Subtarget.isABI_N64() || IsPIC); // true if calls are translated to
// jalr $25
- bool GlobalOrExternal = false, InternalLinkage = false;
+ bool GlobalOrExternal = false, InternalLinkage = false, IsCallReloc = false;
SDValue CalleeLo;
EVT Ty = Callee.getValueType();
@@ -2545,14 +2640,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (InternalLinkage)
Callee = getAddrLocal(G, Ty, DAG,
- Subtarget->isABI_N32() || Subtarget->isABI_N64());
- else if (LargeGOT)
+ Subtarget.isABI_N32() || Subtarget.isABI_N64());
+ else if (LargeGOT) {
Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Val));
- else
+ IsCallReloc = true;
+ } else {
Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Val));
+ IsCallReloc = true;
+ }
} else
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
MipsII::MO_NO_FLAG);
@@ -2561,16 +2659,19 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- if (!Subtarget->isABI_N64() && !IsPIC) // !N64 && static
+ if (!Subtarget.isABI_N64() && !IsPIC) // !N64 && static
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
MipsII::MO_NO_FLAG);
- else if (LargeGOT)
+ else if (LargeGOT) {
Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Sym));
- else // N64 || PIC
+ IsCallReloc = true;
+ } else { // N64 || PIC
Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Sym));
+ IsCallReloc = true;
+ }
GlobalOrExternal = true;
}
@@ -2579,7 +2680,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
- CLI, Callee, Chain);
+ IsCallReloc, CLI, Callee, Chain);
if (IsTailCall)
return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops);
@@ -2594,39 +2695,68 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg,
- Ins, DL, DAG, InVals, CLI.Callee.getNode(), CLI.RetTy);
+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ InVals, CLI);
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
-SDValue
-MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const SDNode *CallNode,
- const Type *RetTy) const {
+SDValue MipsTargetLowering::LowerCallResult(
+ SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ TargetLowering::CallLoweringInfo &CLI) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
- CCInfo);
-
- MipsCCInfo.analyzeCallResult(Ins, Subtarget->mipsSEUsesSoftFloat(),
- CallNode, RetTy);
+ MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
RVLocs[i].getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
- if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
- Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val);
+ if (VA.isUpperBitsInLoc()) {
+ unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ unsigned Shift =
+ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
+ Val = DAG.getNode(
+ Shift, DL, VA.getLocVT(), Val,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ }
+
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ break;
+ case CCValAssign::AExt:
+ case CCValAssign::AExtUpper:
+ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ case CCValAssign::ZExtUpper:
+ Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+ break;
+ case CCValAssign::SExt:
+ case CCValAssign::SExtUpper:
+ Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+ break;
+ }
InVals.push_back(Val);
}
@@ -2634,6 +2764,60 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
return Chain;
}
+static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA,
+ EVT ArgVT, SDLoc DL, SelectionDAG &DAG) {
+ MVT LocVT = VA.getLocVT();
+ EVT ValVT = VA.getValVT();
+
+ // Shift into the upper bits if necessary.
+ switch (VA.getLocInfo()) {
+ default:
+ break;
+ case CCValAssign::AExtUpper:
+ case CCValAssign::SExtUpper:
+ case CCValAssign::ZExtUpper: {
+ unsigned ValSizeInBits = ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ unsigned Opcode =
+ VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
+ Val = DAG.getNode(
+ Opcode, DL, VA.getLocVT(), Val,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ break;
+ }
+ }
+
+ // If this is an value smaller than the argument slot size (32-bit for O32,
+ // 64-bit for N32/N64), it has been promoted in some way to the argument slot
+ // size. Extract the value and insert any appropriate assertions regarding
+ // sign/zero extension.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::AExtUpper:
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::SExtUpper:
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::ZExtUpper:
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT));
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+ break;
+ }
+
+ return Val;
+}
+
//===----------------------------------------------------------------------===//
// Formal Arguments Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -2658,20 +2842,19 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
- CCInfo);
+ MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
- bool UseSoftFloat = Subtarget->mipsSEUsesSoftFloat();
- MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
- MipsCCInfo.hasByValArg());
+ CCInfo.getInRegsParamsCount() > 0);
unsigned CurArgIdx = 0;
- MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
+ CCInfo.rewindByValRegsInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -2682,12 +2865,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
bool IsRegLoc = VA.isRegLoc();
if (Flags.isByVal()) {
+ unsigned FirstByValReg, LastByValReg;
+ unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
+ CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
+
assert(Flags.getByValSize() &&
"ByVal args of size 0 should have been ignored by front-end.");
- assert(ByValArg != MipsCCInfo.byval_end());
+ assert(ByValIdx < CCInfo.getInRegsParamsCount());
copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg,
- MipsCCInfo, *ByValArg);
- ++ByValArg;
+ FirstByValReg, LastByValReg, VA, CCInfo);
+ CCInfo.nextInRegsParam();
continue;
}
@@ -2702,20 +2889,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
- // If this is an 8 or 16-bit value, it has been passed promoted
- // to 32 bits. Insert an assert[sz]ext to capture this, then
- // truncate to the right size.
- if (VA.getLocInfo() != CCValAssign::Full) {
- unsigned Opcode = 0;
- if (VA.getLocInfo() == CCValAssign::SExt)
- Opcode = ISD::AssertSext;
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- Opcode = ISD::AssertZext;
- if (Opcode)
- ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue,
- DAG.getValueType(ValVT));
- ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
- }
+ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
// Handle floating point arguments passed in integer registers and
// long double arguments passed in floating point registers.
@@ -2723,12 +2897,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
(RegVT == MVT::i64 && ValVT == MVT::f64) ||
(RegVT == MVT::f64 && ValVT == MVT::i64))
ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
- else if (Subtarget->isABI_O32() && RegVT == MVT::i32 &&
+ else if (Subtarget.isABI_O32() && RegVT == MVT::i32 &&
ValVT == MVT::f64) {
unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
getNextIntArgReg(ArgReg), RC);
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT);
- if (!Subtarget->isLittle())
+ if (!Subtarget.isLittle())
std::swap(ArgValue, ArgValue2);
ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64,
ArgValue, ArgValue2);
@@ -2736,21 +2910,34 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
+ MVT LocVT = VA.getLocVT();
+
+ if (Subtarget.isABI_O32()) {
+ // We ought to be able to use LocVT directly but O32 sets it to i32
+ // when allocating floating point values to integer registers.
+ // This shouldn't influence how we load the value into registers unless
+ // we are targetting softfloat.
+ if (VA.getValVT().isFloatingPoint() && !Subtarget.abiUsesSoftFloat())
+ LocVT = VA.getValVT();
+ }
// sanity check
assert(VA.isMemLoc());
// The stack pointer offset is relative to the caller stack frame.
- int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+ int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
VA.getLocMemOffset(), true);
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- SDValue Load = DAG.getLoad(ValVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
- InVals.push_back(Load);
- OutChains.push_back(Load.getValue(1));
+ SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ OutChains.push_back(ArgValue.getValue(1));
+
+ ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
+
+ InVals.push_back(ArgValue);
}
}
@@ -2762,7 +2949,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg = MipsFI->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(
- getRegClassFor(Subtarget->isABI_N64() ? MVT::i64 : MVT::i32));
+ getRegClassFor(Subtarget.isABI_N64() ? MVT::i64 : MVT::i32));
MipsFI->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
@@ -2772,7 +2959,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
}
if (IsVarArg)
- writeVarArgRegs(OutChains, MipsCCInfo, Chain, DL, DAG);
+ writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo);
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions
@@ -2794,8 +2981,7 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
- RVLocs, Context);
+ MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
@@ -2811,14 +2997,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
- *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
- CCInfo);
+ MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
// Analyze return values.
- MipsCCInfo.analyzeReturn(Outs, Subtarget->mipsSEUsesSoftFloat(),
- MF.getFunction()->getReturnType());
+ CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -2828,9 +3010,43 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
SDValue Val = OutVals[i];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
+ bool UseUpperBits = false;
- if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
- Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val);
+ break;
+ case CCValAssign::AExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val);
+ break;
+ case CCValAssign::ZExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val);
+ break;
+ case CCValAssign::SExtUpper:
+ UseUpperBits = true;
+ // Fallthrough
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val);
+ break;
+ }
+
+ if (UseUpperBits) {
+ unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
+ unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+ Val = DAG.getNode(
+ ISD::SHL, DL, VA.getLocVT(), Val,
+ DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+ }
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
@@ -2850,7 +3066,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
- unsigned V0 = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0;
+ unsigned V0 = Subtarget.isABI_N64() ? Mips::V0_64 : Mips::V0;
Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
Flag = Chain.getValue(1);
@@ -2928,7 +3144,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
weight = CW_Register;
break;
case 'f': // FPU or MSA register
- if (Subtarget->hasMSA() && type->isVectorTy() &&
+ if (Subtarget.hasMSA() && type->isVectorTy() &&
cast<VectorType>(type)->getBitWidth() == 128)
weight = CW_Register;
else if (type->isFloatTy())
@@ -2962,7 +3178,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
/// that is returned indicates whether parsing was successful. The second flag
/// is true if the numeric part exists.
static std::pair<bool, bool>
-parsePhysicalReg(const StringRef &C, std::string &Prefix,
+parsePhysicalReg(StringRef C, std::string &Prefix,
unsigned long long &Reg) {
if (C.front() != '{' || C.back() != '}')
return std::make_pair(false, false);
@@ -2983,8 +3199,9 @@ parsePhysicalReg(const StringRef &C, std::string &Prefix,
}
std::pair<unsigned, const TargetRegisterClass *> MipsTargetLowering::
-parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const TargetRegisterClass *RC;
std::string Prefix;
unsigned long long Reg;
@@ -3034,7 +3251,7 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
// If the size of FP registers is 64-bit or Reg is an even number, select
// the 64-bit register class. Otherwise, select the 32-bit register class.
if (VT == MVT::Other)
- VT = (Subtarget->isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
+ VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
RC = getRegClassFor(VT);
@@ -3067,13 +3284,13 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
case 'y': // Same as 'r'. Exists for compatibility.
case 'r':
if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
- if (Subtarget->inMips16Mode())
+ if (Subtarget.inMips16Mode())
return std::make_pair(0U, &Mips::CPU16RegsRegClass);
return std::make_pair(0U, &Mips::GPR32RegClass);
}
- if (VT == MVT::i64 && !Subtarget->isGP64bit())
+ if (VT == MVT::i64 && !Subtarget.isGP64bit())
return std::make_pair(0U, &Mips::GPR32RegClass);
- if (VT == MVT::i64 && Subtarget->isGP64bit())
+ if (VT == MVT::i64 && Subtarget.isGP64bit())
return std::make_pair(0U, &Mips::GPR64RegClass);
// This will generate an error message
return std::make_pair(0U, nullptr);
@@ -3088,8 +3305,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
return std::make_pair(0U, &Mips::MSA128DRegClass);
else if (VT == MVT::f32)
return std::make_pair(0U, &Mips::FGR32RegClass);
- else if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
- if (Subtarget->isFP64bit())
+ else if ((VT == MVT::f64) && (!Subtarget.isSingleFloat())) {
+ if (Subtarget.isFP64bit())
return std::make_pair(0U, &Mips::FGR64RegClass);
return std::make_pair(0U, &Mips::AFGR64RegClass);
}
@@ -3245,7 +3462,7 @@ EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- if (Subtarget->hasMips64())
+ if (Subtarget.hasMips64())
return MVT::i64;
return MVT::i32;
@@ -3260,291 +3477,33 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
}
unsigned MipsTargetLowering::getJumpTableEncoding() const {
- if (Subtarget->isABI_N64())
+ if (Subtarget.isABI_N64())
return MachineJumpTableInfo::EK_GPRel64BlockAddress;
return TargetLowering::getJumpTableEncoding();
}
-/// This function returns true if CallSym is a long double emulation routine.
-static bool isF128SoftLibCall(const char *CallSym) {
- const char *const LibCalls[] =
- {"__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", "__extendsftf2",
- "__fixtfdi", "__fixtfsi", "__fixtfti", "__fixunstfdi", "__fixunstfsi",
- "__fixunstfti", "__floatditf", "__floatsitf", "__floattitf",
- "__floatunditf", "__floatunsitf", "__floatuntitf", "__getf2", "__gttf2",
- "__letf2", "__lttf2", "__multf3", "__netf2", "__powitf2", "__subtf3",
- "__trunctfdf2", "__trunctfsf2", "__unordtf2",
- "ceill", "copysignl", "cosl", "exp2l", "expl", "floorl", "fmal", "fmodl",
- "log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
- "truncl"};
-
- const char *const *End = LibCalls + array_lengthof(LibCalls);
-
- // Check that LibCalls is sorted alphabetically.
- MipsTargetLowering::LTStr Comp;
-
-#ifndef NDEBUG
- for (const char *const *I = LibCalls; I < End - 1; ++I)
- assert(Comp(*I, *(I + 1)));
-#endif
-
- return std::binary_search(LibCalls, End, CallSym, Comp);
-}
-
-/// This function returns true if Ty is fp128 or i128 which was originally a
-/// fp128.
-static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
- if (Ty->isFP128Ty())
- return true;
-
- const ExternalSymbolSDNode *ES =
- dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
-
- // If the Ty is i128 and the function being called is a long double emulation
- // routine, then the original type is f128.
- return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
-}
-
-MipsTargetLowering::MipsCC::SpecialCallingConvType
- MipsTargetLowering::getSpecialCallingConv(SDValue Callee) const {
- MipsCC::SpecialCallingConvType SpecialCallingConv =
- MipsCC::NoSpecialCallingConv;
- if (Subtarget->inMips16HardFloat()) {
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- llvm::StringRef Sym = G->getGlobal()->getName();
- Function *F = G->getGlobal()->getParent()->getFunction(Sym);
- if (F && F->hasFnAttribute("__Mips16RetHelper")) {
- SpecialCallingConv = MipsCC::Mips16RetHelperConv;
- }
- }
- }
- return SpecialCallingConv;
-}
-
-MipsTargetLowering::MipsCC::MipsCC(
- CallingConv::ID CC, bool IsO32_, bool IsFP64_, CCState &Info,
- MipsCC::SpecialCallingConvType SpecialCallingConv_)
- : CCInfo(Info), CallConv(CC), IsO32(IsO32_), IsFP64(IsFP64_),
- SpecialCallingConv(SpecialCallingConv_){
- // Pre-allocate reserved argument area.
- CCInfo.AllocateStack(reservedArgArea(), 1);
-}
-
-
-void MipsTargetLowering::MipsCC::
-analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
- bool IsVarArg, bool IsSoftFloat, const SDNode *CallNode,
- std::vector<ArgListEntry> &FuncArgs) {
- assert((CallConv != CallingConv::Fast || !IsVarArg) &&
- "CallingConv::Fast shouldn't be used for vararg functions.");
-
- unsigned NumOpnds = Args.size();
- llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn();
-
- for (unsigned I = 0; I != NumOpnds; ++I) {
- MVT ArgVT = Args[I].VT;
- ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
- bool R;
-
- if (ArgFlags.isByVal()) {
- handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
- continue;
- }
-
- if (IsVarArg && !Args[I].IsFixed)
- R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
- else {
- MVT RegVT = getRegVT(ArgVT, FuncArgs[Args[I].OrigArgIndex].Ty, CallNode,
- IsSoftFloat);
- R = FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo);
- }
-
- if (R) {
-#ifndef NDEBUG
- dbgs() << "Call operand #" << I << " has unhandled type "
- << EVT(ArgVT).getEVTString();
-#endif
- llvm_unreachable(nullptr);
- }
- }
-}
-
-void MipsTargetLowering::MipsCC::
-analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
- bool IsSoftFloat, Function::const_arg_iterator FuncArg) {
- unsigned NumArgs = Args.size();
- llvm::CCAssignFn *FixedFn = fixedArgFn();
- unsigned CurArgIdx = 0;
-
- for (unsigned I = 0; I != NumArgs; ++I) {
- MVT ArgVT = Args[I].VT;
- ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
- std::advance(FuncArg, Args[I].OrigArgIndex - CurArgIdx);
- CurArgIdx = Args[I].OrigArgIndex;
-
- if (ArgFlags.isByVal()) {
- handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
- continue;
- }
-
- MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), nullptr, IsSoftFloat);
-
- if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo))
- continue;
-
-#ifndef NDEBUG
- dbgs() << "Formal Arg #" << I << " has unhandled type "
- << EVT(ArgVT).getEVTString();
-#endif
- llvm_unreachable(nullptr);
- }
-}
-
-template<typename Ty>
-void MipsTargetLowering::MipsCC::
-analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
- const SDNode *CallNode, const Type *RetTy) const {
- CCAssignFn *Fn;
-
- if (IsSoftFloat && originalTypeIsF128(RetTy, CallNode))
- Fn = RetCC_F128Soft;
- else
- Fn = RetCC_Mips;
-
- for (unsigned I = 0, E = RetVals.size(); I < E; ++I) {
- MVT VT = RetVals[I].VT;
- ISD::ArgFlagsTy Flags = RetVals[I].Flags;
- MVT RegVT = this->getRegVT(VT, RetTy, CallNode, IsSoftFloat);
-
- if (Fn(I, VT, RegVT, CCValAssign::Full, Flags, this->CCInfo)) {
-#ifndef NDEBUG
- dbgs() << "Call result #" << I << " has unhandled type "
- << EVT(VT).getEVTString() << '\n';
-#endif
- llvm_unreachable(nullptr);
- }
- }
-}
-
-void MipsTargetLowering::MipsCC::
-analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat,
- const SDNode *CallNode, const Type *RetTy) const {
- analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy);
-}
-
-void MipsTargetLowering::MipsCC::
-analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
- const Type *RetTy) const {
- analyzeReturn(Outs, IsSoftFloat, nullptr, RetTy);
-}
-
-void MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
- MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags) {
- assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
-
- struct ByValArgInfo ByVal;
- unsigned RegSize = regSize();
- unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize);
- unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize),
- RegSize * 2);
-
- if (useRegsForByval())
- allocateRegs(ByVal, ByValSize, Align);
-
- // Allocate space on caller's stack.
- ByVal.Address = CCInfo.AllocateStack(ByValSize - RegSize * ByVal.NumRegs,
- Align);
- CCInfo.addLoc(CCValAssign::getMem(ValNo, ValVT, ByVal.Address, LocVT,
- LocInfo));
- ByValArgs.push_back(ByVal);
-}
-
-unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const {
- return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs);
-}
-
-unsigned MipsTargetLowering::MipsCC::reservedArgArea() const {
- return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0;
-}
-
-const MCPhysReg *MipsTargetLowering::MipsCC::intArgRegs() const {
- return IsO32 ? O32IntRegs : Mips64IntRegs;
-}
-
-llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
- if (CallConv == CallingConv::Fast)
- return CC_Mips_FastCC;
-
- if (SpecialCallingConv == Mips16RetHelperConv)
- return CC_Mips16RetHelper;
- return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN;
-}
-
-llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
- return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN_VarArg;
-}
-
-const MCPhysReg *MipsTargetLowering::MipsCC::shadowRegs() const {
- return IsO32 ? O32IntRegs : Mips64DPRegs;
-}
-
-void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
- unsigned ByValSize,
- unsigned Align) {
- unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs();
- const MCPhysReg *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
- assert(!(ByValSize % RegSize) && !(Align % RegSize) &&
- "Byval argument's size and alignment should be a multiple of"
- "RegSize.");
-
- ByVal.FirstIdx = CCInfo.getFirstUnallocated(IntArgRegs, NumIntArgRegs);
-
- // If Align > RegSize, the first arg register must be even.
- if ((Align > RegSize) && (ByVal.FirstIdx % 2)) {
- CCInfo.AllocateReg(IntArgRegs[ByVal.FirstIdx], ShadowRegs[ByVal.FirstIdx]);
- ++ByVal.FirstIdx;
- }
-
- // Mark the registers allocated.
- for (unsigned I = ByVal.FirstIdx; ByValSize && (I < NumIntArgRegs);
- ByValSize -= RegSize, ++I, ++ByVal.NumRegs)
- CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]);
-}
-
-MVT MipsTargetLowering::MipsCC::getRegVT(MVT VT, const Type *OrigTy,
- const SDNode *CallNode,
- bool IsSoftFloat) const {
- if (IsSoftFloat || IsO32)
- return VT;
-
- // Check if the original type was fp128.
- if (originalTypeIsF128(OrigTy, CallNode)) {
- assert(VT == MVT::i64);
- return MVT::f64;
- }
-
- return VT;
-}
-
-void MipsTargetLowering::
-copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
- SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
- SmallVectorImpl<SDValue> &InVals, const Argument *FuncArg,
- const MipsCC &CC, const ByValArgInfo &ByVal) const {
+void MipsTargetLowering::copyByValRegs(
+ SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains, SelectionDAG &DAG,
+ const ISD::ArgFlagsTy &Flags, SmallVectorImpl<SDValue> &InVals,
+ const Argument *FuncArg, unsigned FirstReg, unsigned LastReg,
+ const CCValAssign &VA, MipsCCState &State) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned RegAreaSize = ByVal.NumRegs * CC.regSize();
+ unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes();
+ unsigned NumRegs = LastReg - FirstReg;
+ unsigned RegAreaSize = NumRegs * GPRSizeInBytes;
unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize);
int FrameObjOffset;
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ ArrayRef<MCPhysReg> ByValArgRegs = ABI.GetByValArgRegs();
if (RegAreaSize)
- FrameObjOffset = (int)CC.reservedArgArea() -
- (int)((CC.numIntArgRegs() - ByVal.FirstIdx) * CC.regSize());
+ FrameObjOffset =
+ (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) -
+ (int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes);
else
- FrameObjOffset = ByVal.Address;
+ FrameObjOffset = VA.getLocMemOffset();
// Create frame object.
EVT PtrTy = getPointerTy();
@@ -3552,17 +3511,17 @@ copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
InVals.push_back(FIN);
- if (!ByVal.NumRegs)
+ if (!NumRegs)
return;
// Copy arg registers.
- MVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
+ MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
- for (unsigned I = 0; I < ByVal.NumRegs; ++I) {
- unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I];
+ for (unsigned I = 0; I < NumRegs; ++I) {
+ unsigned ArgReg = ByValArgRegs[FirstReg + I];
unsigned VReg = addLiveIn(MF, ArgReg, RC);
- unsigned Offset = I * CC.regSize();
+ unsigned Offset = I * GPRSizeInBytes;
SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
DAG.getConstant(Offset, PtrTy));
SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy),
@@ -3573,34 +3532,34 @@ copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
}
// Copy byVal arg to registers and stack.
-void MipsTargetLowering::
-passByValArg(SDValue Chain, SDLoc DL,
- std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
- SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
- MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const MipsCC &CC, const ByValArgInfo &ByVal,
- const ISD::ArgFlagsTy &Flags, bool isLittle) const {
+void MipsTargetLowering::passByValArg(
+ SDValue Chain, SDLoc DL,
+ std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
+ SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
+ MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg,
+ unsigned LastReg, const ISD::ArgFlagsTy &Flags, bool isLittle,
+ const CCValAssign &VA) const {
unsigned ByValSizeInBytes = Flags.getByValSize();
unsigned OffsetInBytes = 0; // From beginning of struct
- unsigned RegSizeInBytes = CC.regSize();
+ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
+ unsigned NumRegs = LastReg - FirstReg;
- if (ByVal.NumRegs) {
- const MCPhysReg *ArgRegs = CC.intArgRegs();
- bool LeftoverBytes = (ByVal.NumRegs * RegSizeInBytes > ByValSizeInBytes);
+ if (NumRegs) {
+ const ArrayRef<MCPhysReg> ArgRegs = Subtarget.getABI().GetByValArgRegs();
+ bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes);
unsigned I = 0;
// Copy words to registers.
- for (; I < ByVal.NumRegs - LeftoverBytes;
- ++I, OffsetInBytes += RegSizeInBytes) {
+ for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) {
SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
DAG.getConstant(OffsetInBytes, PtrTy));
SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
MachinePointerInfo(), false, false, false,
Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
- unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
+ unsigned ArgReg = ArgRegs[FirstReg + I];
RegsToPass.push_back(std::make_pair(ArgReg, LoadVal));
}
@@ -3610,9 +3569,6 @@ passByValArg(SDValue Chain, SDLoc DL,
// Copy the remainder of the byval argument with sub-word loads and shifts.
if (LeftoverBytes) {
- assert((ByValSizeInBytes > OffsetInBytes) &&
- (ByValSizeInBytes < OffsetInBytes + RegSizeInBytes) &&
- "Size of the remainder should be smaller than RegSizeInBytes.");
SDValue Val;
for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0;
@@ -3627,7 +3583,8 @@ passByValArg(SDValue Chain, SDLoc DL,
DAG.getConstant(OffsetInBytes, PtrTy));
SDValue LoadVal = DAG.getExtLoad(
ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
- MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, Alignment);
+ MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, false,
+ Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
// Shift the loaded value.
@@ -3651,7 +3608,7 @@ passByValArg(SDValue Chain, SDLoc DL,
Alignment = std::min(Alignment, LoadSizeInBytes);
}
- unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
+ unsigned ArgReg = ArgRegs[FirstReg + I];
RegsToPass.push_back(std::make_pair(ArgReg, Val));
return;
}
@@ -3662,7 +3619,7 @@ passByValArg(SDValue Chain, SDLoc DL,
SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
DAG.getConstant(OffsetInBytes, PtrTy));
SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
- DAG.getIntPtrConstant(ByVal.Address));
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
MachinePointerInfo(), MachinePointerInfo());
@@ -3670,14 +3627,13 @@ passByValArg(SDValue Chain, SDLoc DL,
}
void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
- const MipsCC &CC, SDValue Chain,
- SDLoc DL, SelectionDAG &DAG) const {
- unsigned NumRegs = CC.numIntArgRegs();
- const MCPhysReg *ArgRegs = CC.intArgRegs();
- const CCState &CCInfo = CC.getCCInfo();
- unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs);
- unsigned RegSize = CC.regSize();
- MVT RegTy = MVT::getIntegerVT(RegSize * 8);
+ SDValue Chain, SDLoc DL,
+ SelectionDAG &DAG,
+ CCState &State) const {
+ const ArrayRef<MCPhysReg> ArgRegs = Subtarget.getABI().GetVarArgRegs();
+ unsigned Idx = State.getFirstUnallocated(ArgRegs.data(), ArgRegs.size());
+ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
+ MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3686,28 +3642,81 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
// Offset of the first variable argument from stack pointer.
int VaArgOffset;
- if (NumRegs == Idx)
- VaArgOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), RegSize);
- else
- VaArgOffset = (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx));
+ if (ArgRegs.size() == Idx)
+ VaArgOffset =
+ RoundUpToAlignment(State.getNextStackOffset(), RegSizeInBytes);
+ else {
+ const MipsABIInfo &ABI = Subtarget.getABI();
+ VaArgOffset =
+ (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) -
+ (int)(RegSizeInBytes * (ArgRegs.size() - Idx));
+ }
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
- int FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
+ int FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
MipsFI->setVarArgsFrameIndex(FI);
// Copy the integer registers that have not been used for argument passing
// to the argument register save area. For O32, the save area is allocated
// in the caller's stack frame, while for N32/64, it is allocated in the
// callee's stack frame.
- for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) {
+ for (unsigned I = Idx; I < ArgRegs.size();
+ ++I, VaArgOffset += RegSizeInBytes) {
unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
- FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
+ FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
MachinePointerInfo(), false, false, 0);
- cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue((Value*)nullptr);
+ cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(
+ (Value *)nullptr);
OutChains.push_back(Store);
}
}
+
+void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+ unsigned Align) const {
+ MachineFunction &MF = State->getMachineFunction();
+ const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
+
+ assert(Size && "Byval argument's size shouldn't be 0.");
+
+ Align = std::min(Align, TFL->getStackAlignment());
+
+ unsigned FirstReg = 0;
+ unsigned NumRegs = 0;
+
+ if (State->getCallingConv() != CallingConv::Fast) {
+ unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
+ const ArrayRef<MCPhysReg> IntArgRegs = Subtarget.getABI().GetByValArgRegs();
+ // FIXME: The O32 case actually describes no shadow registers.
+ const MCPhysReg *ShadowRegs =
+ Subtarget.isABI_O32() ? IntArgRegs.data() : Mips64DPRegs;
+
+ // We used to check the size as well but we can't do that anymore since
+ // CCState::HandleByVal() rounds up the size after calling this function.
+ assert(!(Align % RegSizeInBytes) &&
+ "Byval argument's alignment should be a multiple of"
+ "RegSizeInBytes.");
+
+ FirstReg = State->getFirstUnallocated(IntArgRegs.data(), IntArgRegs.size());
+
+ // If Align > RegSizeInBytes, the first arg register must be even.
+ // FIXME: This condition happens to do the right thing but it's not the
+ // right way to test it. We want to check that the stack frame offset
+ // of the register is aligned.
+ if ((Align > RegSizeInBytes) && (FirstReg % 2)) {
+ State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]);
+ ++FirstReg;
+ }
+
+ // Mark the registers allocated.
+ Size = RoundUpToAlignment(Size, RegSizeInBytes);
+ for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size());
+ Size -= RegSizeInBytes, ++I, ++NumRegs)
+ State->AllocateReg(IntArgRegs[I], ShadowRegs[I]);
+ }
+
+ State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs);
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4701bc4..60e53da 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MipsISELLOWERING_H
-#define MipsISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSISELLOWERING_H
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h"
@@ -210,13 +210,16 @@ namespace llvm {
//===--------------------------------------------------------------------===//
class MipsFunctionInfo;
class MipsSubtarget;
+ class MipsCCState;
class MipsTargetLowering : public TargetLowering {
bool isMicroMips;
public:
- explicit MipsTargetLowering(MipsTargetMachine &TM);
+ explicit MipsTargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
- static const MipsTargetLowering *create(MipsTargetMachine &TM);
+ static const MipsTargetLowering *create(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
@@ -257,6 +260,8 @@ namespace llvm {
}
};
+ void HandleByVal(CCState *, unsigned &, unsigned) const override;
+
protected:
SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
@@ -327,6 +332,21 @@ namespace llvm {
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
}
+ // This method creates the following nodes, which are necessary for
+ // computing a symbol's address using gp-relative addressing:
+ //
+ // (add $gp, %gp_rel(sym))
+ template<class NodeTy>
+ SDValue getAddrGPRel(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ assert(Ty == MVT::i32);
+ SDValue GPRel = getTargetNode(N, Ty, DAG, MipsII::MO_GPREL);
+ return DAG.getNode(ISD::ADD, DL, Ty,
+ DAG.getRegister(Mips::GP, Ty),
+ DAG.getNode(MipsISD::GPRel, DL, DAG.getVTList(Ty),
+ GPRel));
+ }
+
/// This function fills Ops, which is the list of operands that will later
/// be used when a function call node is created. It also generates
/// copyToReg nodes to set up argument registers.
@@ -334,109 +354,15 @@ namespace llvm {
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
-
- /// ByValArgInfo - Byval argument information.
- struct ByValArgInfo {
- unsigned FirstIdx; // Index of the first register used.
- unsigned NumRegs; // Number of registers used for this argument.
- unsigned Address; // Offset of the stack area used to pass this argument.
-
- ByValArgInfo() : FirstIdx(0), NumRegs(0), Address(0) {}
- };
-
- /// MipsCC - This class provides methods used to analyze formal and call
- /// arguments and inquire about calling convention information.
- class MipsCC {
- public:
- enum SpecialCallingConvType {
- Mips16RetHelperConv, NoSpecialCallingConv
- };
-
- MipsCC(CallingConv::ID CallConv, bool IsO32, bool IsFP64, CCState &Info,
- SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv);
-
-
- void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
- bool IsVarArg, bool IsSoftFloat,
- const SDNode *CallNode,
- std::vector<ArgListEntry> &FuncArgs);
- void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
- bool IsSoftFloat,
- Function::const_arg_iterator FuncArg);
-
- void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
- bool IsSoftFloat, const SDNode *CallNode,
- const Type *RetTy) const;
-
- void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
- bool IsSoftFloat, const Type *RetTy) const;
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const;
- const CCState &getCCInfo() const { return CCInfo; }
-
- /// hasByValArg - Returns true if function has byval arguments.
- bool hasByValArg() const { return !ByValArgs.empty(); }
-
- /// regSize - Size (in number of bits) of integer registers.
- unsigned regSize() const { return IsO32 ? 4 : 8; }
-
- /// numIntArgRegs - Number of integer registers available for calls.
- unsigned numIntArgRegs() const;
-
- /// reservedArgArea - The size of the area the caller reserves for
- /// register arguments. This is 16-byte if ABI is O32.
- unsigned reservedArgArea() const;
-
- /// Return pointer to array of integer argument registers.
- const MCPhysReg *intArgRegs() const;
-
- typedef SmallVectorImpl<ByValArgInfo>::const_iterator byval_iterator;
- byval_iterator byval_begin() const { return ByValArgs.begin(); }
- byval_iterator byval_end() const { return ByValArgs.end(); }
-
- private:
- void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags);
-
- /// useRegsForByval - Returns true if the calling convention allows the
- /// use of registers to pass byval arguments.
- bool useRegsForByval() const { return CallConv != CallingConv::Fast; }
-
- /// Return the function that analyzes fixed argument list functions.
- llvm::CCAssignFn *fixedArgFn() const;
-
- /// Return the function that analyzes variable argument list functions.
- llvm::CCAssignFn *varArgFn() const;
-
- const MCPhysReg *shadowRegs() const;
-
- void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
- unsigned Align);
-
- /// Return the type of the register which is used to pass an argument or
- /// return a value. This function returns f64 if the argument is an i64
- /// value which has been generated as a result of softening an f128 value.
- /// Otherwise, it just returns VT.
- MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode,
- bool IsSoftFloat) const;
-
- template<typename Ty>
- void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
- const SDNode *CallNode, const Type *RetTy) const;
-
- CCState &CCInfo;
- CallingConv::ID CallConv;
- bool IsO32, IsFP64;
- SpecialCallingConvType SpecialCallingConv;
- SmallVector<ByValArgInfo, 2> ByValArgs;
- };
protected:
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
// Subtarget Info
- const MipsSubtarget *Subtarget;
+ const MipsSubtarget &Subtarget;
private:
// Create a TargetGlobalAddress node.
@@ -459,14 +385,12 @@ namespace llvm {
SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const;
- MipsCC::SpecialCallingConvType getSpecialCallingConv(SDValue Callee) const;
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals,
- const SDNode *CallNode, const Type *RetTy) const;
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ TargetLowering::CallLoweringInfo &CLI) const;
// Lower Operand specifics
SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
@@ -480,6 +404,7 @@ namespace llvm {
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
@@ -495,33 +420,34 @@ namespace llvm {
/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
virtual bool
- isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ isEligibleForTailCallOptimization(const CCState &CCInfo,
unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const = 0;
+ const MipsFunctionInfo &FI) const = 0;
/// copyByValArg - Copy argument registers which were used to pass a byval
/// argument to the stack. Create a stack frame object for the byval
/// argument.
- void copyByValRegs(SDValue Chain, SDLoc DL,
- std::vector<SDValue> &OutChains, SelectionDAG &DAG,
- const ISD::ArgFlagsTy &Flags,
+ void copyByValRegs(SDValue Chain, SDLoc DL, std::vector<SDValue> &OutChains,
+ SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
SmallVectorImpl<SDValue> &InVals,
- const Argument *FuncArg,
- const MipsCC &CC, const ByValArgInfo &ByVal) const;
+ const Argument *FuncArg, unsigned FirstReg,
+ unsigned LastReg, const CCValAssign &VA,
+ MipsCCState &State) const;
/// passByValArg - Pass a byval argument in registers or on stack.
void passByValArg(SDValue Chain, SDLoc DL,
- std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const MipsCC &CC, const ByValArgInfo &ByVal,
- const ISD::ArgFlagsTy &Flags, bool isLittle) const;
+ unsigned FirstReg, unsigned LastReg,
+ const ISD::ArgFlagsTy &Flags, bool isLittle,
+ const CCValAssign &VA) const;
/// writeVarArgRegs - Write variable function arguments passed in registers
/// to the stack. Also create a stack frame object for the first variable
/// argument.
- void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC,
- SDValue Chain, SDLoc DL, SelectionDAG &DAG) const;
+ void writeVarArgRegs(std::vector<SDValue> &OutChains, SDValue Chain,
+ SDLoc DL, SelectionDAG &DAG, CCState &State) const;
SDValue
LowerFormalArguments(SDValue Chain,
@@ -560,7 +486,7 @@ namespace llvm {
/// This function parses registers that appear in inline-asm constraints.
/// It returns pair (0, 0) on failure.
std::pair<unsigned, const TargetRegisterClass *>
- parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const;
+ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const;
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
@@ -611,8 +537,12 @@ namespace llvm {
};
/// Create MipsTargetLowering objects.
- const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
- const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
+ const MipsTargetLowering *
+ createMips16TargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
+ const MipsTargetLowering *
+ createMipsSETargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
namespace Mips {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -620,4 +550,4 @@ namespace llvm {
}
}
-#endif // MipsISELLOWERING_H
+#endif
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 2260d53..2aa8328 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -211,14 +211,14 @@ class SWXC1_FT<string opstr, RegisterOperand DRC,
}
class BC1F_FT<string opstr, DAGOperand opnd, InstrItinClass Itin,
- SDPatternOperator Op = null_frag> :
+ SDPatternOperator Op = null_frag, bit DelaySlot = 1> :
InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset),
!strconcat(opstr, "\t$fcc, $offset"),
[(MipsFPBrcond Op, FCCRegsOpnd:$fcc, bb:$offset)], Itin,
FrmFI, opstr> {
let isBranch = 1;
let isTerminator = 1;
- let hasDelaySlot = 1;
+ let hasDelaySlot = DelaySlot;
let Defs = [AT];
}
@@ -362,11 +362,15 @@ def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1,
bitconvert>, MFC1_FM<0>;
def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
bitconvert>, MFC1_FM<4>;
-def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
- MFC1_FM<3>, ISA_MIPS32R2;
-def MTHC1_D32 : MMRel, MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>,
+def MFHC1_D32 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
+ MFC1_FM<3>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+def MFHC1_D64 : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>,
+ MFC1_FM<3>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> {
+ let DecoderNamespace = "Mips64";
+}
+def MTHC1_D32 : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
-def MTHC1_D64 : MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
+def MTHC1_D64 : MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>,
MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> {
let DecoderNamespace = "Mips64";
}
@@ -400,30 +404,6 @@ def LDC1 : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM<0x35>,
def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>,
ISA_MIPS2, FGR_32;
-// Cop2 Memory Instructions
-// FIXME: These aren't really FPU instructions and as such don't belong in this
-// file
-def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
- ISA_MIPS1_NOT_32R6_64R6;
-def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
- ISA_MIPS1_NOT_32R6_64R6;
-def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
- ISA_MIPS2_NOT_32R6_64R6;
-def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
- ISA_MIPS2_NOT_32R6_64R6;
-
-// Cop3 Memory Instructions
-// FIXME: These aren't really FPU instructions and as such don't belong in this
-// file
-let DecoderNamespace = "COP3_" in {
- def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
- def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
- def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
- ISA_MIPS2;
- def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
- ISA_MIPS2;
-}
-
// Indexed loads and stores.
// Base register + offset register addressing mode (indicated by "x" in the
// instruction mnemonic) is disallowed under NaCl.
@@ -526,8 +506,12 @@ def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>,
BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6;
+def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, IIBranch, MIPS_BRANCH_F, 0>,
+ BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6;
def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6;
+def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, IIBranch, MIPS_BRANCH_T, 0>,
+ BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -593,8 +577,12 @@ def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"bc1tl $offset", (BC1TL FCC0, brtarget:$offset)>,
+ ISA_MIPS2_NOT_32R6_64R6;
def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
+ ISA_MIPS2_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 6a01ae5..5c91fbc 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -440,7 +440,7 @@ class EXT_FM<bits<6> funct> : StdArch {
let Inst{5-0} = funct;
}
-class RDHWR_FM {
+class RDHWR_FM : StdArch {
bits<5> rt;
bits<5> rd;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index d6da6c6..dcc0e24 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -30,15 +30,15 @@ using namespace llvm;
// Pin the vtable to this file.
void MipsInstrInfo::anchor() {}
-MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
- : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
- TM(tm), UncondBrOpc(UncondBr) {}
+MipsInstrInfo::MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBr)
+ : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+ Subtarget(STI), UncondBrOpc(UncondBr) {}
-const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) {
- if (TM.getSubtargetImpl()->inMips16Mode())
- return llvm::createMips16InstrInfo(TM);
+const MipsInstrInfo *MipsInstrInfo::create(MipsSubtarget &STI) {
+ if (STI.inMips16Mode())
+ return llvm::createMips16InstrInfo(STI);
- return llvm::createMipsSEInstrInfo(TM);
+ return llvm::createMipsSEInstrInfo(STI);
}
bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
@@ -94,10 +94,10 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return (BT == BT_None) || (BT == BT_Indirect);
}
-void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
- MachineBasicBlock *TBB, DebugLoc DL,
- const SmallVectorImpl<MachineOperand>& Cond)
- const {
+void
+MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ DebugLoc DL,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
unsigned Opc = Cond[0].getImm();
const MCInstrDesc &MCID = get(Opc);
MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
@@ -113,11 +113,9 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
MIB.addMBB(TBB);
}
-unsigned MipsInstrInfo::
-InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
+unsigned MipsInstrInfo::InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -145,9 +143,7 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return 1;
}
-unsigned MipsInstrInfo::
-RemoveBranch(MachineBasicBlock &MBB) const
-{
+unsigned MipsInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
MachineBasicBlock::reverse_iterator FirstBr;
unsigned removed;
@@ -160,7 +156,7 @@ RemoveBranch(MachineBasicBlock &MBB) const
// Up to 2 branches are removed.
// Note that indirect branches are not removed.
- for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
+ for (removed = 0; I != REnd && removed < 2; ++I, ++removed)
if (!getAnalyzableBrOpc(I->getOpcode()))
break;
@@ -171,20 +167,18 @@ RemoveBranch(MachineBasicBlock &MBB) const
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
-bool MipsInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
-{
+bool MipsInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
assert( (Cond.size() && Cond.size() <= 3) &&
"Invalid Mips branch condition!");
Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm()));
return false;
}
-MipsInstrInfo::BranchType MipsInstrInfo::
-AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify,
- SmallVectorImpl<MachineInstr*> &BranchInstrs) const {
+MipsInstrInfo::BranchType MipsInstrInfo::AnalyzeBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify,
+ SmallVectorImpl<MachineInstr *> &BranchInstrs) const {
MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 742193f..db149d4 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,8 +15,8 @@
// size in bytes; MipsLongBranch only expects it to be the correct upper bound.
//===----------------------------------------------------------------------===//
-#ifndef MIPSINSTRUCTIONINFO_H
-#define MIPSINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSINSTRINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSINSTRINFO_H
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
@@ -33,7 +33,7 @@ namespace llvm {
class MipsInstrInfo : public MipsGenInstrInfo {
virtual void anchor();
protected:
- MipsTargetMachine &TM;
+ const MipsSubtarget &Subtarget;
unsigned UncondBrOpc;
public:
@@ -46,9 +46,9 @@ public:
BT_Indirect // One indirct branch.
};
- explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
+ explicit MipsInstrInfo(const MipsSubtarget &STI, unsigned UncondBrOpc);
- static const MipsInstrInfo *create(MipsTargetMachine &TM);
+ static const MipsInstrInfo *create(MipsSubtarget &STI);
/// Branch Analysis
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -140,8 +140,8 @@ private:
};
/// Create MipsInstrInfo objects.
-const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM);
-const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM);
+const MipsInstrInfo *createMips16InstrInfo(const MipsSubtarget &STI);
+const MipsInstrInfo *createMipsSEInstrInfo(const MipsSubtarget &STI);
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 8e9472c..aebac34 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -331,7 +331,7 @@ include "MipsInstrFormats.td"
def MipsJumpTargetAsmOperand : AsmOperandClass {
let Name = "JumpTarget";
- let ParserMethod = "ParseJumpTarget";
+ let ParserMethod = "parseJumpTarget";
let PredicateMethod = "isImm";
let RenderMethod = "addImmOperands";
}
@@ -672,28 +672,62 @@ class StoreLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
let DecoderMethod = "DecodeMem";
}
+// COP2 Load/Store
+class LW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem2";
+ let mayLoad = 1;
+}
+
+class SW_FT2<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem2";
+ let mayStore = 1;
+}
+
+// COP3 Load/Store
+class LW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem3";
+ let mayLoad = 1;
+}
+
+class SW_FT3<string opstr, RegisterOperand RC, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> {
+ let DecoderMethod = "DecodeFMem3";
+ let mayStore = 1;
+}
+
// Conditional Branch
class CBranch<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO> :
+ RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset),
!strconcat(opstr, "\t$rs, $rt, $offset"),
[(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
- let hasDelaySlot = 1;
+ let hasDelaySlot = DelaySlot;
let Defs = [AT];
}
class CBranchZero<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO> :
+ RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"),
[(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
- let hasDelaySlot = 1;
+ let hasDelaySlot = DelaySlot;
let Defs = [AT];
}
@@ -765,9 +799,12 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
[], IIBranch, FrmR>;
- class BGEZAL_FT<string opstr, DAGOperand opnd, RegisterOperand RO> :
+ class BGEZAL_FT<string opstr, DAGOperand opnd,
+ RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr> {
+ let hasDelaySlot = DelaySlot;
+ }
}
@@ -933,7 +970,7 @@ class SubwordSwap<string opstr, RegisterOperand RO>:
// Read Hardware
class ReadHardware<RegisterOperand CPURegOperand, RegisterOperand RO> :
InstSE<(outs CPURegOperand:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [],
- II_RDHWR, FrmR>;
+ II_RDHWR, FrmR, "rdhwr">;
// Ext and Ins
class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
@@ -1059,18 +1096,20 @@ def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
+let AdditionalPredicates = [NotInMicroMips] in {
def ADDiu : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16,
- add>,
- ADDI_FM<0x9>, IsAsCheapAsAMove;
+ add>, ADDI_FM<0x9>, IsAsCheapAsAMove;
+}
def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>,
ISA_MIPS1_NOT_32R6_64R6;
def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,
SLTI_FM<0xa>;
def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>,
SLTI_FM<0xb>;
+let AdditionalPredicates = [NotInMicroMips] in {
def ANDi : MMRel, ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI, immZExt16,
- and>,
- ADDI_FM<0xc>;
+ and>, ADDI_FM<0xc>;
+}
def ORi : MMRel, ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16,
or>,
ADDI_FM<0xd>;
@@ -1100,10 +1139,12 @@ def XOR : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>,
def NOR : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
+let AdditionalPredicates = [NotInMicroMips] in {
def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, shl,
immZExt5>, SRA_FM<0, 0>;
def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, srl,
immZExt5>, SRA_FM<2, 0>;
+}
def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, sra,
immZExt5>, SRA_FM<3, 0>;
def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, shl>,
@@ -1147,13 +1188,34 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
ISA_MIPS1_NOT_32R6_64R6;
}
+// COP2 Memory Instructions
+def LWC2 : LW_FT2<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def SWC2 : SW_FT2<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def LDC2 : LW_FT2<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def SDC2 : SW_FT2<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
+ ISA_MIPS2_NOT_32R6_64R6;
+
+// COP3 Memory Instructions
+let DecoderNamespace = "COP3_" in {
+ def LWC3 : LW_FT3<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
+ def SWC3 : SW_FT3<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
+ def LDC3 : LW_FT3<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
+ ISA_MIPS2;
+ def SDC3 : SW_FT3<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
+ ISA_MIPS2;
+}
+
def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
-def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
-def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
-def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
-def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>;
-def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>;
-def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>;
+
+def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
+def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
+def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
+def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
+def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
+def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>,
ISA_MIPS2_NOT_32R6_64R6;
@@ -1171,7 +1233,7 @@ def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>,
def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>;
def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
def TRAP : TrapBase<BREAK>;
-def SDBBP : SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
+def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
@@ -1193,15 +1255,27 @@ def J : MMRel, JumpFJ<jmptarget, "j", br, bb, "j">, FJ<2>,
AdditionalRequires<[RelocStatic]>, IsBranch;
def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>;
def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>;
+def BEQL : MMRel, CBranch<"beql", brtarget, seteq, GPR32Opnd, 0>,
+ BEQ_FM<20>, ISA_MIPS2_NOT_32R6_64R6;
def BNE : MMRel, CBranch<"bne", brtarget, setne, GPR32Opnd>, BEQ_FM<5>;
+def BNEL : MMRel, CBranch<"bnel", brtarget, setne, GPR32Opnd, 0>,
+ BEQ_FM<21>, ISA_MIPS2_NOT_32R6_64R6;
def BGEZ : MMRel, CBranchZero<"bgez", brtarget, setge, GPR32Opnd>,
BGEZ_FM<1, 1>;
+def BGEZL : MMRel, CBranchZero<"bgezl", brtarget, setge, GPR32Opnd, 0>,
+ BGEZ_FM<1, 3>, ISA_MIPS2_NOT_32R6_64R6;
def BGTZ : MMRel, CBranchZero<"bgtz", brtarget, setgt, GPR32Opnd>,
BGEZ_FM<7, 0>;
+def BGTZL : MMRel, CBranchZero<"bgtzl", brtarget, setgt, GPR32Opnd, 0>,
+ BGEZ_FM<23, 0>, ISA_MIPS2_NOT_32R6_64R6;
def BLEZ : MMRel, CBranchZero<"blez", brtarget, setle, GPR32Opnd>,
BGEZ_FM<6, 0>;
+def BLEZL : MMRel, CBranchZero<"blezl", brtarget, setle, GPR32Opnd, 0>,
+ BGEZ_FM<22, 0>, ISA_MIPS2_NOT_32R6_64R6;
def BLTZ : MMRel, CBranchZero<"bltz", brtarget, setlt, GPR32Opnd>,
BGEZ_FM<1, 0>;
+def BLTZL : MMRel, CBranchZero<"bltzl", brtarget, setlt, GPR32Opnd, 0>,
+ BGEZ_FM<1, 2>, ISA_MIPS2_NOT_32R6_64R6;
def B : UncondBranch<BEQ>;
def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
@@ -1214,8 +1288,12 @@ let AdditionalPredicates = [NotInMicroMips] in {
def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>, ISA_MIPS32_NOT_32R6_64R6;
def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>,
ISA_MIPS1_NOT_32R6_64R6;
+def BGEZALL : MMRel, BGEZAL_FT<"bgezall", brtarget, GPR32Opnd, 0>,
+ BGEZAL_FM<0x13>, ISA_MIPS2_NOT_32R6_64R6;
def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>,
ISA_MIPS1_NOT_32R6_64R6;
+def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd, 0>,
+ BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6;
def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
def TAILCALL : TailCall<J>;
def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
@@ -1350,7 +1428,7 @@ def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV,
def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU,
0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
-def RDHWR : ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
+def RDHWR : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
@@ -1408,19 +1486,21 @@ def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32;
class TLB<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
- FrmOther>;
-def TLBP : TLB<"tlbp">, COP0_TLB_FM<0x08>;
-def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>;
-def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>;
-def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>;
+ FrmOther, asmstr>;
+def TLBP : MMRel, TLB<"tlbp">, COP0_TLB_FM<0x08>;
+def TLBR : MMRel, TLB<"tlbr">, COP0_TLB_FM<0x01>;
+def TLBWI : MMRel, TLB<"tlbwi">, COP0_TLB_FM<0x02>;
+def TLBWR : MMRel, TLB<"tlbwr">, COP0_TLB_FM<0x06>;
-class CacheOp<string instr_asm, Operand MemOpnd, RegisterOperand GPROpnd> :
+class CacheOp<string instr_asm, Operand MemOpnd> :
InstSE<(outs), (ins MemOpnd:$addr, uimm5:$hint),
- !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther>;
+ !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther> {
+ let DecoderMethod = "DecodeCacheOp";
+}
-def CACHE : CacheOp<"cache", mem, GPR32Opnd>, CACHEOP_FM<0b101111>,
+def CACHE : CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
INSN_MIPS3_32_NOT_32R6_64R6;
-def PREF : CacheOp<"pref", mem, GPR32Opnd>, CACHEOP_FM<0b110011>,
+def PREF : CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
INSN_MIPS3_32_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
@@ -1435,8 +1515,14 @@ def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>,
ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"addu $rs, $rt, $imm",
(ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"addu $rs, $imm",
+ (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
def : MipsInstAlias<"add $rs, $rt, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+ (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"add $rs, $imm",
+ (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"and $rs, $rt, $imm",
(ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
def : MipsInstAlias<"and $rs, $imm",
@@ -1480,25 +1566,30 @@ def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
-def : MipsInstAlias<"ei", (EI ZERO), 1>;
-def : MipsInstAlias<"di", (DI ZERO), 1>;
-
-def : MipsInstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : MipsInstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : MipsInstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0),
- 1>;
-def : MipsInstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : MipsInstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0),
- 1>;
-def : MipsInstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
+
+def : MipsInstAlias<"teq $rs, $rt",
+ (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tge $rs, $rt",
+ (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tgeu $rs, $rt",
+ (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tlt $rs, $rt",
+ (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tltu $rs, $rt",
+ (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+def : MipsInstAlias<"tne $rs, $rt",
+ (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+
def : MipsInstAlias<"sll $rd, $rt, $rs",
(SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"sub, $rd, $rs, $imm",
(ADDi GPR32Opnd:$rd, GPR32Opnd:$rs,
- InvertedImOperand:$imm), 0>;
+ InvertedImOperand:$imm), 0>, ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"sub $rs, $imm",
(ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, InvertedImOperand:$imm),
- 0>;
+ 0>, ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"subu, $rd, $rs, $imm",
(ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs,
InvertedImOperand:$imm), 0>;
@@ -1563,6 +1654,12 @@ let AdditionalPredicates = [NotDSP] in {
(ADDiu GPR32:$src, imm:$imm)>;
}
+// Support multiplication for pre-Mips32 targets that don't have
+// the MUL instruction.
+def : MipsPat<(mul GPR32:$lhs, GPR32:$rhs),
+ (PseudoMFLO (PseudoMULT GPR32:$lhs, GPR32:$rhs))>,
+ ISA_MIPS1_NOT_32R6_64R6;
+
// SYNC
def : MipsPat<(MipsSync (i32 immz)),
(SYNC 0)>, ISA_MIPS2;
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
deleted file mode 100644
index 2072488..0000000
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the Mips target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MipsJITInfo.h"
-#include "MipsInstrInfo.h"
-#include "MipsRelocations.h"
-#include "MipsSubtarget.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-
-void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- unsigned NewAddr = (intptr_t)New;
- unsigned OldAddr = (intptr_t)Old;
- const unsigned NopInstr = 0x0;
-
- // If the functions are in the same memory segment, insert PC-region branch.
- if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) {
- unsigned *OldInstruction = (unsigned *)Old;
- *OldInstruction = 0x08000000;
- unsigned JTargetAddr = NewAddr & 0x0FFFFFFC;
-
- JTargetAddr >>= 2;
- *OldInstruction |= JTargetAddr;
-
- // Insert a NOP.
- OldInstruction++;
- *OldInstruction = NopInstr;
-
- sys::Memory::InvalidateInstructionCache(Old, 2 * 4);
- } else {
- // We need to clear hint bits from the instruction, in case it is 'jr ra'.
- const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008;
- unsigned* CurrentInstr = (unsigned*)Old;
- unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask;
- unsigned* NextInstr = CurrentInstr + 1;
- unsigned NextInstrHintClear = (*NextInstr) & HintMask;
-
- // Do absolute jump if there are 2 or more instructions before return from
- // the old function.
- if ((CurrInstrHintClear != ReturnSequence) &&
- (NextInstrHintClear != ReturnSequence)) {
- const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000;
- const unsigned JrT0Instr = 0x01000008;
- // lui t0, high 16 bit of the NewAddr
- (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16);
- // addiu t0, t0, low 16 bit of the NewAddr
- (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff);
- // jr t0
- (*(CurrentInstr++)) = JrT0Instr;
- (*CurrentInstr) = NopInstr;
-
- sys::Memory::InvalidateInstructionCache(Old, 4 * 4);
- } else {
- // Unsupported case
- report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
- }
- }
-}
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-// Get the ASMPREFIX for the current host. This is often '_'.
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__
-#endif
-#define GETASMPREFIX2(X) #X
-#define GETASMPREFIX(X) GETASMPREFIX2(X)
-#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring. This code saves registers, calls
-// MipsCompilationCallbackC and restores registers.
-extern "C" {
-#if defined (__mips__)
-void MipsCompilationCallback();
-
- asm(
- ".text\n"
- ".align 2\n"
- ".globl " ASMPREFIX "MipsCompilationCallback\n"
- ASMPREFIX "MipsCompilationCallback:\n"
- ".ent " ASMPREFIX "MipsCompilationCallback\n"
- ".frame $sp, 32, $ra\n"
- ".set noreorder\n"
- ".cpload $t9\n"
-
- "addiu $sp, $sp, -64\n"
- ".cprestore 16\n"
-
- // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain
- // stuff for the real target function right now. We have to act as if this
- // whole compilation callback doesn't exist as far as the caller is
- // concerned. We also need to save the ra register since it contains the
- // original return address, and t8 register since it contains the address
- // of the end of function stub.
- "sw $a0, 20($sp)\n"
- "sw $a1, 24($sp)\n"
- "sw $a2, 28($sp)\n"
- "sw $a3, 32($sp)\n"
- "sw $ra, 36($sp)\n"
- "sw $t8, 40($sp)\n"
- "sdc1 $f12, 48($sp)\n"
- "sdc1 $f14, 56($sp)\n"
-
- // t8 points at the end of function stub. Pass the beginning of the stub
- // to the MipsCompilationCallbackC.
- "addiu $a0, $t8, -16\n"
- "jal " ASMPREFIX "MipsCompilationCallbackC\n"
- "nop\n"
-
- // Restore registers.
- "lw $a0, 20($sp)\n"
- "lw $a1, 24($sp)\n"
- "lw $a2, 28($sp)\n"
- "lw $a3, 32($sp)\n"
- "lw $ra, 36($sp)\n"
- "lw $t8, 40($sp)\n"
- "ldc1 $f12, 48($sp)\n"
- "ldc1 $f14, 56($sp)\n"
- "addiu $sp, $sp, 64\n"
-
- // Jump to the (newly modified) stub to invoke the real function.
- "addiu $t8, $t8, -16\n"
- "jr $t8\n"
- "nop\n"
-
- ".set reorder\n"
- ".end " ASMPREFIX "MipsCompilationCallback\n"
- );
-#else // host != Mips
- void MipsCompilationCallback() {
- llvm_unreachable(
- "Cannot call MipsCompilationCallback() on a non-Mips arch!");
- }
-#endif
-}
-
-/// MipsCompilationCallbackC - This is the target-specific function invoked
-/// by the function stub when we did not know the real target of a call.
-/// This function must locate the start of the stub or call site and pass
-/// it into the JIT compiler function.
-extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) {
- // Get the address of the compiled code for this function.
- intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
-
- // Rewrite the function stub so that we don't end up here every time we
- // execute the call. We're replacing the first four instructions of the
- // stub with code that jumps to the compiled function:
- // lui $t9, %hi(NewVal)
- // addiu $t9, $t9, %lo(NewVal)
- // jr $t9
- // nop
-
- int Hi = ((unsigned)NewVal & 0xffff0000) >> 16;
- if ((NewVal & 0x8000) != 0)
- Hi++;
- int Lo = (int)(NewVal & 0xffff);
-
- *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi;
- *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo;
- *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8;
- *(intptr_t *)(StubAddr + 12) = 0;
-
- sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16);
-}
-
-TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction(
- JITCompilerFn F) {
- JITCompilerFunction = F;
- return MipsCompilationCallback;
-}
-
-TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
- // The stub contains 4 4-byte instructions, aligned at 4 bytes. See
- // emitFunctionStub for details.
- StubLayout Result = { 4*4, 4 };
- return Result;
-}
-
-void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) {
- JCE.emitAlignment(4);
- void *Addr = (void*) (JCE.getCurrentPCValue());
- if (!sys::Memory::setRangeWritable(Addr, 16))
- llvm_unreachable("ERROR: Unable to mark stub writable.");
-
- intptr_t EmittedAddr;
- if (Fn != (void*)(intptr_t)MipsCompilationCallback)
- EmittedAddr = (intptr_t)Fn;
- else
- EmittedAddr = (intptr_t)MipsCompilationCallback;
-
-
- int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16;
- if ((EmittedAddr & 0x8000) != 0)
- Hi++;
- int Lo = (int)(EmittedAddr & 0xffff);
-
- // lui $t9, %hi(EmittedAddr)
- // addiu $t9, $t9, %lo(EmittedAddr)
- // jalr $t8, $t9
- // nop
- if (IsLittleEndian) {
- JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
- JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
- JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
- JCE.emitWordLE(0);
- } else {
- JCE.emitWordBE(0xf << 26 | 25 << 16 | Hi);
- JCE.emitWordBE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
- JCE.emitWordBE(25 << 21 | 24 << 11 | 9);
- JCE.emitWordBE(0);
- }
-
- sys::Memory::InvalidateInstructionCache(Addr, 16);
- if (!sys::Memory::setRangeExecutable(Addr, 16))
- llvm_unreachable("ERROR: Unable to mark stub executable.");
-
- return Addr;
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
-
- void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
-
- switch ((Mips::RelocationType) MR->getRelocationType()) {
- case Mips::reloc_mips_pc16:
- ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
-
- case Mips::reloc_mips_26:
- ResultPtr = (ResultPtr & 0x0fffffff) >> 2;
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
-
- case Mips::reloc_mips_hi:
- ResultPtr = ResultPtr >> 16;
- if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) {
- ResultPtr += 1;
- }
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
-
- case Mips::reloc_mips_lo: {
- // Addend is needed for unaligned load/store instructions, where offset
- // for the second load/store in the expanded instruction sequence must
- // be modified by +1 or +3. Otherwise, Addend is 0.
- int Addend = *((unsigned*) RelocPos) & 0xffff;
- ResultPtr = (ResultPtr + Addend) & 0xffff;
- *((unsigned*) RelocPos) &= 0xffff0000;
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- break;
- }
- }
- }
-}
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
deleted file mode 100644
index c9dfd83..0000000
--- a/lib/Target/Mips/MipsJITInfo.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the MipsJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MIPSJITINFO_H
-#define MIPSJITINFO_H
-
-#include "MipsMachineFunction.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class MipsTargetMachine;
-
-class MipsJITInfo : public TargetJITInfo {
-
- bool IsPIC;
- bool IsLittleEndian;
-
- public:
- explicit MipsJITInfo() :
- IsPIC(false), IsLittleEndian(true) {}
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on Mips.
- StubLayout getStubLayout() override;
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) override;
-
- /// Initialize - Initialize internal stage for the function being JITted.
- void Initialize(const MachineFunction &MF, bool isPIC,
- bool isLittleEndian) {
- IsPIC = isPIC;
- IsLittleEndian = isLittleEndian;
- }
-
-};
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index c6838a3..e44d6ee 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -16,6 +16,7 @@
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCNaCl.h"
+#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -64,8 +65,8 @@ namespace {
MipsLongBranch(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
- ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
- LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 :
+ ABI(TM.getSubtarget<MipsSubtarget>().getABI()),
+ LongBranchSeqSize(!IsPIC ? 2 : (ABI.IsN64() ? 10 :
(!TM.getSubtarget<MipsSubtarget>().isTargetNaCl() ? 9 : 10))) {}
const char *getPassName() const override {
@@ -86,7 +87,7 @@ namespace {
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBInfos;
bool IsPIC;
- unsigned ABI;
+ MipsABIInfo ABI;
unsigned LongBranchSeqSize;
};
@@ -170,7 +171,7 @@ void MipsLongBranch::initMBBInfo() {
MBBInfos.resize(MF->size());
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
MachineBasicBlock *MBB = MF->getBlockNumbered(I);
@@ -217,7 +218,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
DebugLoc DL, MachineBasicBlock *MBBOpnd) {
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
unsigned NewOpc = TII->getOppositeBranchOpc(Br->getOpcode());
const MCInstrDesc &NewDesc = TII->get(NewOpc);
@@ -254,7 +255,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
MachineBasicBlock *LongBrMBB = MF->CreateMachineBasicBlock(BB);
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
MF->insert(FallThroughMBB, LongBrMBB);
MBB->removeSuccessor(TgtMBB);
@@ -273,7 +274,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR;
- if (ABI != MipsSubtarget::N64) {
+ if (!ABI.IsN64()) {
// $longbr:
// addiu $sp, $sp, -8
// sw $ra, 0($sp)
@@ -447,9 +448,10 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
const MipsInstrInfo *TII =
- static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+ static_cast<const MipsInstrInfo *>(TM.getSubtargetImpl()->getInstrInfo());
- if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ if (STI.inMips16Mode() || !STI.enableLongBranchPass())
return false;
if ((TM.getRelocationModel() == Reloc::PIC_) &&
TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 269190f..1ce27e4 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMCINSTLOWER_H
-#define MIPSMCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSMCINSTLOWER_H
+#define LLVM_LIB_TARGET_MIPS_MIPSMCINSTLOWER_H
#include "MCTargetDesc/MipsMCExpr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineOperand.h"
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 285bb14..68230e6 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -69,7 +69,7 @@ def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
// as the encoded value should be subtracted by one.
def uimm2LSAAsmOperand : AsmOperandClass {
let Name = "LSAImm";
- let ParserMethod = "ParseLSAImm";
+ let ParserMethod = "parseLSAImm";
let RenderMethod = "addImmOperands";
}
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index e30302e..a89718a 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -24,7 +24,7 @@ FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
// class MipsCallEntry.
-MipsCallEntry::MipsCallEntry(const StringRef &N) {
+MipsCallEntry::MipsCallEntry(StringRef N) {
#ifndef NDEBUG
Name = N;
Val = nullptr;
@@ -119,7 +119,7 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
|| FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(const StringRef &Name) {
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
const MipsCallEntry *&E = ExternalCallEntries[Name];
if (!E)
@@ -137,4 +137,12 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
return MachinePointerInfo(E);
}
+int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
+ if (MoveF64ViaSpillFI == -1) {
+ MoveF64ViaSpillFI = MF.getFrameInfo()->CreateStackObject(
+ RC->getSize(), RC->getAlignment(), false);
+ }
+ return MoveF64ViaSpillFI;
+}
+
void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 8c16f82..217f307 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPS_MACHINE_FUNCTION_INFO_H
-#define MIPS_MACHINE_FUNCTION_INFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
+#define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
#include "Mips16HardFloatInfo.h"
#include "llvm/ADT/StringMap.h"
@@ -34,7 +34,7 @@ namespace llvm {
/// resolved by lazy-binding.
class MipsCallEntry : public PseudoSourceValue {
public:
- explicit MipsCallEntry(const StringRef &N);
+ explicit MipsCallEntry(StringRef N);
explicit MipsCallEntry(const GlobalValue *V);
bool isConstant(const MachineFrameInfo *) const override;
bool isAliased(const MachineFrameInfo *) const override;
@@ -54,7 +54,8 @@ class MipsFunctionInfo : public MachineFunctionInfo {
public:
MipsFunctionInfo(MachineFunction &MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
- VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false) {}
+ VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false),
+ MoveF64ViaSpillFI(-1) {}
~MipsFunctionInfo();
@@ -87,7 +88,7 @@ public:
/// \brief Create a MachinePointerInfo that has a MipsCallEntr object
/// representing a GOT entry for an external function.
- MachinePointerInfo callPtrInfo(const StringRef &Name);
+ MachinePointerInfo callPtrInfo(StringRef Name);
/// \brief Create a MachinePointerInfo that has a MipsCallEntr object
/// representing a GOT entry for a global function.
@@ -96,6 +97,8 @@ public:
void setSaveS2() { SaveS2 = true; }
bool hasSaveS2() const { return SaveS2; }
+ int getMoveF64ViaSpillFI(const TargetRegisterClass *RC);
+
std::map<const char *, const llvm::Mips16HardFloatInfo::FuncSignature *>
StubsNeeded;
@@ -136,6 +139,10 @@ private:
// saveS2
bool SaveS2;
+ /// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the
+ /// O32 FPXX ABI is enabled. -1 is used to denote invalid index.
+ int MoveF64ViaSpillFI;
+
/// MipsCallEntry maps.
StringMap<const MipsCallEntry *> ExternalCallEntries;
ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries;
@@ -143,4 +150,4 @@ private:
} // end of namespace llvm
-#endif // MIPS_MACHINE_FUNCTION_INFO_H
+#endif
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index 03c76ea..b011e8f 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -20,7 +20,7 @@ namespace llvm {
bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
- const_cast<MipsSubtarget&>(Subtarget).resetSubtarget(&MF);
+ TM.resetSubtarget(&MF);
return false;
}
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
index a96862a..85bae47 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSMODULEISELDAGTODAG_H
-#define MIPSMODULEISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
#include "Mips.h"
#include "MipsSubtarget.h"
@@ -37,8 +37,7 @@ public:
static char ID;
explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
- : MachineFunctionPass(ID),
- TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+ : MachineFunctionPass(ID), TM(TM_) {}
// Pass Name
const char *getPassName() const override {
@@ -48,10 +47,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
protected:
- /// Keep a pointer to the MipsSubtarget around so that we can make the right
- /// decision when generating code for different targets.
- const TargetMachine &TM;
- const MipsSubtarget &Subtarget;
+ MipsTargetMachine &TM;
};
/// createMipsISelDag - This pass converts a legalized DAG into a
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index c234049..22c524e 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -130,7 +130,7 @@ static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) {
static void setCallTargetReg(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I) {
MachineFunction &MF = *MBB->getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned SrcReg = I->getOperand(0).getReg();
unsigned DstReg = getRegTy(SrcReg, MF) == MVT::i32 ? Mips::T9 : Mips::T9_64;
BuildMI(*MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg)
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
new file mode 100644
index 0000000..f82544a
--- /dev/null
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -0,0 +1,78 @@
+//===-- MipsOptionRecord.h - Abstraction for storing information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MipsOptionRecord - Abstraction for storing arbitrary information in
+// ELF files. Arbitrary information (e.g. register usage) can be stored in Mips
+// specific ELF sections like .Mips.options. Specific records should subclass
+// MipsOptionRecord and provide an implementation to EmitMipsOptionRecord which
+// basically just dumps the information into an ELF section. More information
+// about .Mips.option can be found in the SysV ABI and the 64-bit ELF Object
+// specification.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSOPTIONRECORD_H
+#define LLVM_LIB_TARGET_MIPS_MIPSOPTIONRECORD_H
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
+namespace llvm {
+class MipsELFStreamer;
+class MCSubtargetInfo;
+
+class MipsOptionRecord {
+public:
+ virtual ~MipsOptionRecord(){};
+ virtual void EmitMipsOptionRecord() = 0;
+};
+
+class MipsRegInfoRecord : public MipsOptionRecord {
+public:
+ MipsRegInfoRecord(MipsELFStreamer *S, MCContext &Context,
+ const MCSubtargetInfo &STI)
+ : Streamer(S), Context(Context), STI(STI) {
+ ri_gprmask = 0;
+ ri_cprmask[0] = ri_cprmask[1] = ri_cprmask[2] = ri_cprmask[3] = 0;
+ ri_gp_value = 0;
+
+ const MCRegisterInfo *TRI = Context.getRegisterInfo();
+ GPR32RegClass = &(TRI->getRegClass(Mips::GPR32RegClassID));
+ GPR64RegClass = &(TRI->getRegClass(Mips::GPR64RegClassID));
+ FGR32RegClass = &(TRI->getRegClass(Mips::FGR32RegClassID));
+ FGR64RegClass = &(TRI->getRegClass(Mips::FGR64RegClassID));
+ AFGR64RegClass = &(TRI->getRegClass(Mips::AFGR64RegClassID));
+ MSA128BRegClass = &(TRI->getRegClass(Mips::MSA128BRegClassID));
+ COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
+ COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
+ }
+ ~MipsRegInfoRecord() {}
+
+ void EmitMipsOptionRecord() override;
+ void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
+
+private:
+ MipsELFStreamer *Streamer;
+ MCContext &Context;
+ const MCSubtargetInfo &STI;
+ const MCRegisterClass *GPR32RegClass;
+ const MCRegisterClass *GPR64RegClass;
+ const MCRegisterClass *FGR32RegClass;
+ const MCRegisterClass *FGR64RegClass;
+ const MCRegisterClass *AFGR64RegClass;
+ const MCRegisterClass *MSA128BRegClass;
+ const MCRegisterClass *COP2RegClass;
+ const MCRegisterClass *COP3RegClass;
+ uint32_t ri_gprmask;
+ uint32_t ri_cprmask[4];
+ int64_t ri_gp_value;
+};
+} // namespace llvm
+#endif
diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h
index 55e5a81..77183ec 100644
--- a/lib/Target/Mips/MipsOs16.h
+++ b/lib/Target/Mips/MipsOs16.h
@@ -11,16 +11,14 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSOS16_H
+#define LLVM_LIB_TARGET_MIPS_MIPSOS16_H
+
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsTargetMachine.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
-
-
-#ifndef MIPSOS16_H
-#define MIPSOS16_H
-
using namespace llvm;
namespace llvm {
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 084449b..20ef3f3 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -62,7 +62,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case Mips::GPR32RegClassID:
case Mips::GPR64RegClassID:
case Mips::DSPRRegClassID: {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return 28 - TFI->hasFP(MF);
}
case Mips::FGR32RegClassID:
@@ -149,6 +149,12 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I)
Reserved.set(ReservedGPR64[I]);
+ // For mno-abicalls, GP is a program invariant!
+ if (!Subtarget.isABICalls()) {
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::GP_64);
+ }
+
if (Subtarget.isFP64bit()) {
// Reserve all registers in AFGR64.
for (RegIter Reg = Mips::AFGR64RegClass.begin(),
@@ -161,7 +167,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*Reg);
}
// Reserve FP if this function should have a dedicated frame pointer register.
- if (MF.getTarget().getFrameLowering()->hasFP(MF)) {
+ if (MF.getSubtarget().getFrameLowering()->hasFP(MF)) {
if (Subtarget.inMips16Mode())
Reserved.set(Mips::S0);
else {
@@ -250,7 +256,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
bool IsN64 = Subtarget.isABI_N64();
if (Subtarget.inMips16Mode())
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index b34496f..9ec4a38 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSREGISTERINFO_H
-#define MIPSREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSREGISTERINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSREGISTERINFO_H
#include "Mips.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 6323da3..42fe76b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -212,8 +212,13 @@ let Namespace = "Mips" in {
// PC register
def PC : Register<"pc">;
- // Hardware register $29
- foreach I = 0-31 in
+ // Hardware registers
+ def HWR0 : MipsReg<0, "hwr_cpunum">;
+ def HWR1 : MipsReg<1, "hwr_synci_step">;
+ def HWR2 : MipsReg<2, "hwr_cc">;
+ def HWR3 : MipsReg<3, "hwr_ccres">;
+
+ foreach I = 4-31 in
def HWR#I : MipsReg<#I, ""#I>;
// Accum registers
@@ -283,6 +288,12 @@ class GPR32Class<list<ValueType> regTypes> :
def GPR32 : GPR32Class<[i32]>;
def DSPR : GPR32Class<[v4i8, v2i16]>;
+def GPRMM16 : RegisterClass<"Mips", [i32], 32, (add
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Callee save
+ S0, S1)>;
+
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
// Reserved
ZERO_64, AT_64,
@@ -341,9 +352,12 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
// Used to reserve odd registers when given -mattr=+nooddspreg
+// FIXME: Remove double precision registers from this set.
def OddSP : RegisterClass<"Mips", [f32], 32,
(add (decimate (sequence "F%u", 1, 31), 2),
- (decimate (sequence "F_HI%u", 1, 31), 2))>,
+ (decimate (sequence "F_HI%u", 1, 31), 2),
+ (decimate (sequence "D%u", 1, 15), 2),
+ (decimate (sequence "D%u_64", 1, 31), 2))>,
Unallocatable;
// FP control registers.
@@ -414,7 +428,7 @@ def OCTEON_P : RegisterClass<"Mips", [i64], 64, (add P0, P1, P2)>,
// Register Operands.
class MipsAsmRegOperand : AsmOperandClass {
- let ParserMethod = "ParseAnyRegister";
+ let ParserMethod = "parseAnyRegister";
}
def GPR64AsmOperand : MipsAsmRegOperand {
@@ -427,6 +441,11 @@ def GPR32AsmOperand : MipsAsmRegOperand {
let PredicateMethod = "isGPRAsmReg";
}
+def GPRMM16AsmOperand : MipsAsmRegOperand {
+ let Name = "GPRMM16AsmReg";
+ let PredicateMethod = "isMM16AsmReg";
+}
+
def ACC64DSPAsmOperand : MipsAsmRegOperand {
let Name = "ACC64DSPAsmReg";
let PredicateMethod = "isACCAsmReg";
@@ -482,6 +501,10 @@ def GPR32Opnd : RegisterOperand<GPR32> {
let ParserMatchClass = GPR32AsmOperand;
}
+def GPRMM16Opnd : RegisterOperand<GPRMM16> {
+ let ParserMatchClass = GPRMM16AsmOperand;
+}
+
def GPR64Opnd : RegisterOperand<GPR64> {
let ParserMatchClass = GPR64AsmOperand;
}
@@ -575,4 +598,3 @@ def MSA128DOpnd : RegisterOperand<MSA128D> {
def MSA128CROpnd : RegisterOperand<MSACtrl> {
let ParserMatchClass = MSACtrlAsmOperand;
}
-
diff --git a/lib/Target/Mips/MipsRelocations.h b/lib/Target/Mips/MipsRelocations.h
deleted file mode 100644
index 0787ed3..0000000
--- a/lib/Target/Mips/MipsRelocations.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Mips target-specific relocation types
-// (for relocation-model=static).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MIPSRELOCATIONS_H_
-#define MIPSRELOCATIONS_H_
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace Mips{
- enum RelocationType {
- // reloc_mips_pc16 - pc relative relocation for branches. The lower 18
- // bits of the difference between the branch target and the branch
- // instruction, shifted right by 2.
- reloc_mips_pc16 = 1,
-
- // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
- // lower 16 bits of the address is negative).
- reloc_mips_hi = 2,
-
- // reloc_mips_lo - lower 16 bits of the address.
- reloc_mips_lo = 3,
-
- // reloc_mips_26 - lower 28 bits of the address, shifted right by 2.
- reloc_mips_26 = 4
- };
- }
-}
-
-#endif /* MIPSRELOCATIONS_H_ */
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 6573070..97d9edf 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -64,6 +64,10 @@ private:
bool expandCopy(MachineBasicBlock &MBB, Iter I);
bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc,
unsigned MFLoOpc);
+ bool expandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, bool FP64) const;
+ bool expandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, bool FP64) const;
MachineFunction &MF;
MachineRegisterInfo &MRI;
@@ -108,6 +112,22 @@ bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
case Mips::STORE_ACC128:
expandStoreACC(MBB, I, Mips::PseudoMFHI64, Mips::PseudoMFLO64, 8);
break;
+ case Mips::BuildPairF64:
+ if (expandBuildPairF64(MBB, I, false))
+ MBB.erase(I);
+ return false;
+ case Mips::BuildPairF64_64:
+ if (expandBuildPairF64(MBB, I, true))
+ MBB.erase(I);
+ return false;
+ case Mips::ExtractElementF64:
+ if (expandExtractElementF64(MBB, I, false))
+ MBB.erase(I);
+ return false;
+ case Mips::ExtractElementF64_64:
+ if (expandExtractElementF64(MBB, I, true))
+ MBB.erase(I);
+ return false;
case TargetOpcode::COPY:
if (!expandCopy(MBB, I))
return false;
@@ -127,9 +147,9 @@ void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) {
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(4);
unsigned VR = MRI.createVirtualRegister(RC);
@@ -147,9 +167,9 @@ void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) {
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(4);
unsigned VR = MRI.createVirtualRegister(RC);
@@ -170,9 +190,9 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
@@ -200,9 +220,9 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
@@ -235,9 +255,9 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
// copy dst_hi, $vr1
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2;
@@ -258,6 +278,123 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
return true;
}
+/// This method expands the same instruction that MipsSEInstrInfo::
+/// expandBuildPairF64 does, for the case when ABI is fpxx and mthc1 is not
+/// available and the case where the ABI is FP64A. It is implemented here
+/// because frame indexes are eliminated before MipsSEInstrInfo::
+/// expandBuildPairF64 is called.
+bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ bool FP64) const {
+ // For fpxx and when mthc1 is not available, use:
+ // spill + reload via ldc1
+ //
+ // The case where dmtc1 is available doesn't need to be handled here
+ // because it never creates a BuildPairF64 node.
+ //
+ // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence
+ // for odd-numbered double precision values (because the lower 32-bits is
+ // transferred with mtc1 which is redirected to the upper half of the even
+ // register). Unfortunately, we have to make this decision before register
+ // allocation so for now we use a spill/reload sequence for all
+ // double-precision values in regardless of being an odd/even register.
+
+ const TargetMachine &TM = MF.getTarget();
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
+ (FP64 && !Subtarget.useOddSPReg())) {
+ const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ const MipsRegisterInfo &TRI = *static_cast<const MipsRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg();
+ unsigned HiReg = I->getOperand(2).getReg();
+
+ // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
+ // the cases where mthc1 is not available). 64-bit architectures and
+ // MIPS32r2 or later can use FGR64 though.
+ assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() ||
+ !Subtarget.isFP64bit());
+
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ const TargetRegisterClass *RC2 =
+ FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
+
+ // We re-use the same spill slot each time so that the stack frame doesn't
+ // grow too much in functions with a large number of moves.
+ int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC2);
+ if (!Subtarget.isLittle())
+ std::swap(LoReg, HiReg);
+ TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, &TRI,
+ 0);
+ TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, &TRI,
+ 4);
+ TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, 0);
+ return true;
+ }
+
+ return false;
+}
+
+/// This method expands the same instruction that MipsSEInstrInfo::
+/// expandExtractElementF64 does, for the case when ABI is fpxx and mfhc1 is not
+/// available and the case where the ABI is FP64A. It is implemented here
+/// because frame indexes are eliminated before MipsSEInstrInfo::
+/// expandExtractElementF64 is called.
+bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ bool FP64) const {
+ // For fpxx and when mfhc1 is not available, use:
+ // spill + reload via ldc1
+ //
+ // The case where dmfc1 is available doesn't need to be handled here
+ // because it never creates a ExtractElementF64 node.
+ //
+ // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence
+ // for odd-numbered double precision values (because the lower 32-bits is
+ // transferred with mfc1 which is redirected to the upper half of the even
+ // register). Unfortunately, we have to make this decision before register
+ // allocation so for now we use a spill/reload sequence for all
+ // double-precision values in regardless of being an odd/even register.
+
+ const TargetMachine &TM = MF.getTarget();
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
+ (FP64 && !Subtarget.useOddSPReg())) {
+ const MipsSEInstrInfo &TII = *static_cast<const MipsSEInstrInfo *>(
+ TM.getSubtargetImpl()->getInstrInfo());
+ const MipsRegisterInfo &TRI = *static_cast<const MipsRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N));
+
+ // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
+ // the cases where mfhc1 is not available). 64-bit architectures and
+ // MIPS32r2 or later can use FGR64 though.
+ assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() ||
+ !Subtarget.isFP64bit());
+
+ const TargetRegisterClass *RC =
+ FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
+ const TargetRegisterClass *RC2 = &Mips::GPR32RegClass;
+
+ // We re-use the same spill slot each time so that the stack frame doesn't
+ // grow too much in functions with a large number of moves.
+ int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
+ TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC, &TRI,
+ 0);
+ TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, Offset);
+ return true;
+ }
+
+ return false;
+}
+
MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI)
: MipsFrameLowering(STI, STI.stackAlignment()) {}
@@ -278,9 +415,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -343,6 +480,22 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+ } else if (Mips::FGR64RegClass.contains(Reg)) {
+ unsigned Reg0 = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg1 = MRI->getDwarfRegNum(Reg, true) + 1;
+
+ if (!STI.isLittle())
+ std::swap(Reg0, Reg1);
+
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg0, Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
} else {
// Reg is either in GPR32 or FGR32.
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
@@ -397,9 +550,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
- const MipsRegisterInfo &RegInfo =
- *static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const MipsRegisterInfo &RegInfo = *static_cast<const MipsRegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
DebugLoc dl = MBBI->getDebugLoc();
unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
@@ -452,7 +605,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
MachineBasicBlock *EntryBlock = MF->begin();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in. Do not add if the register is
@@ -493,7 +646,7 @@ void MipsSEFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const MipsSEInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!hasReservedCallFrame(MF)) {
int64_t Amount = I->getOperand(0).getImm();
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index e832848..0eca1df 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSE_FRAMEINFO_H
-#define MIPSSE_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
#include "MipsFrameLowering.h"
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 6f35947..f759905 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -37,6 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
if (Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
@@ -129,7 +130,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC;
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 57328d2..2e11fa7 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEISELDAGTODAG_H
-#define MIPSSEISELDAGTODAG_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEISELDAGTODAG_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEISELDAGTODAG_H
#include "MipsISelDAGToDAG.h"
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index be4ca86..4a0ce09 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "MipsSEISelLowering.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -34,15 +35,16 @@ static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
"stores to their single precision "
"counterparts"));
-MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
- : MipsTargetLowering(TM) {
+MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI)
+ : MipsTargetLowering(TM, STI) {
// Set up the register classes
addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
- if (Subtarget->isGP64bit())
+ if (Subtarget.isGP64bit())
addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
- if (Subtarget->hasDSP() || Subtarget->hasMSA()) {
+ if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
// Expand all truncating stores and extending loads.
unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
@@ -58,7 +60,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
}
}
- if (Subtarget->hasDSP()) {
+ if (Subtarget.hasDSP()) {
MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
@@ -82,10 +84,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::VSELECT);
}
- if (Subtarget->hasDSPR2())
+ if (Subtarget.hasDSPR2())
setOperationAction(ISD::MUL, MVT::v2i16, Legal);
- if (Subtarget->hasMSA()) {
+ if (Subtarget.hasMSA()) {
addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
@@ -101,12 +103,12 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::XOR);
}
- if (!Subtarget->mipsSEUsesSoftFloat()) {
+ if (!Subtarget.abiUsesSoftFloat()) {
addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
// When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat()) {
- if (Subtarget->isFP64bit())
+ if (!Subtarget.isSingleFloat()) {
+ if (Subtarget.isFP64bit())
addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
else
addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
@@ -118,14 +120,16 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::MULHS, MVT::i32, Custom);
setOperationAction(ISD::MULHU, MVT::i32, Custom);
- if (Subtarget->hasCnMips())
+ if (Subtarget.hasCnMips())
setOperationAction(ISD::MUL, MVT::i64, Legal);
- else if (Subtarget->isGP64bit())
+ else if (Subtarget.isGP64bit())
setOperationAction(ISD::MUL, MVT::i64, Custom);
- if (Subtarget->isGP64bit()) {
+ if (Subtarget.isGP64bit()) {
setOperationAction(ISD::MULHS, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
@@ -133,8 +137,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
- setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
- setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
@@ -152,7 +154,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::STORE, MVT::f64, Custom);
}
- if (Subtarget->hasMips32r6()) {
+ if (Subtarget.hasMips32r6()) {
// MIPS32r6 replaces the accumulator-based multiplies with a three register
// instruction
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
@@ -180,7 +182,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::f32, Legal);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
- assert(Subtarget->isFP64bit() && "FR=1 is required for MIPS32r6");
+ assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
setOperationAction(ISD::SETCC, MVT::f64, Legal);
setOperationAction(ISD::SELECT, MVT::f64, Legal);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
@@ -199,7 +201,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
}
- if (Subtarget->hasMips64r6()) {
+ if (Subtarget.hasMips64r6()) {
// MIPS64r6 replaces the accumulator-based multiplies with a three register
// instruction
setOperationAction(ISD::MUL, MVT::i64, Legal);
@@ -226,14 +228,15 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
}
const MipsTargetLowering *
-llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
- return new MipsSETargetLowering(TM);
+llvm::createMipsSETargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI) {
+ return new MipsSETargetLowering(TM, STI);
}
const TargetRegisterClass *
MipsSETargetLowering::getRepRegClassFor(MVT VT) const {
if (VT == MVT::Untyped)
- return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
+ return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
return TargetLowering::getRepRegClassFor(VT);
}
@@ -327,12 +330,13 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
}
bool
-MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
- if (Subtarget->systemSupportsUnalignedAccess()) {
+ if (Subtarget.systemSupportsUnalignedAccess()) {
// MIPS32r6/MIPS64r6 is required to support unaligned access. It's
// implementation defined whether this is handled by hardware, software, or
// a hybrid of the two but it's expected that most implementations will
@@ -523,11 +527,11 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->hasMips32() && !Subtarget->hasMips32r6() &&
+ if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() &&
N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG))
return SDValue(N, 0);
@@ -543,8 +547,8 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
// - Removes redundant zero extensions performed by an ISD::AND.
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
- if (!Subtarget->hasMSA())
+ const MipsSubtarget &Subtarget) {
+ if (!Subtarget.hasMSA())
return SDValue();
SDValue Op0 = N->getOperand(0);
@@ -575,10 +579,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
Log2 == ExtendTySize) {
SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
- DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT,
- Op0->getVTList(),
- makeArrayRef(Ops, Op0->getNumOperands()));
- return Op0;
+ return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0),
+ Op0->getVTList(),
+ makeArrayRef(Ops, Op0->getNumOperands()));
}
}
@@ -659,8 +662,8 @@ static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
// vector type.
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
- if (!Subtarget->hasMSA())
+ const MipsSubtarget &Subtarget) {
+ if (!Subtarget.hasMSA())
return SDValue();
EVT Ty = N->getValueType(0);
@@ -676,7 +679,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op0Op1 = Op0->getOperand(1);
SDValue Op1Op0 = Op1->getOperand(0);
SDValue Op1Op1 = Op1->getOperand(1);
- bool IsLittleEndian = !Subtarget->isLittle();
+ bool IsLittleEndian = !Subtarget.isLittle();
SDValue IfSet, IfClr, Cond;
bool IsConstantMask = false;
@@ -779,11 +782,11 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 &&
selectMSUB(N, &DAG))
return SDValue(N, 0);
@@ -843,7 +846,7 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
SelectionDAG &DAG,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
// See if this is a vector splat immediate node.
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
@@ -851,12 +854,12 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- if (!Subtarget->hasDSP())
+ if (!Subtarget.hasDSP())
return SDValue();
if (!BV ||
!BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
- EltSize, !Subtarget->isLittle()) ||
+ EltSize, !Subtarget.isLittle()) ||
(SplatBitSize != EltSize) ||
(SplatValue.getZExtValue() >= EltSize))
return SDValue();
@@ -867,7 +870,7 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
EVT Ty = N->getValueType(0);
if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
@@ -890,10 +893,10 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
// used for DSPr2.
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
EVT Ty = N->getValueType(0);
- if (Subtarget->hasMSA()) {
+ if (Subtarget.hasMSA()) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -920,15 +923,14 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
TotalBits <= 32)) {
SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
Op0Op0->getOperand(2) };
- DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT,
- Op0Op0->getVTList(),
- makeArrayRef(Ops, Op0Op0->getNumOperands()));
- return Op0Op0;
+ return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
+ Op0Op0->getVTList(),
+ makeArrayRef(Ops, Op0Op0->getNumOperands()));
}
}
}
- if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
+ if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
return SDValue();
return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
@@ -937,10 +939,10 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
EVT Ty = N->getValueType(0);
- if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8))
+ if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
return SDValue();
return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
@@ -1034,10 +1036,10 @@ static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
}
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
- const MipsSubtarget *Subtarget) {
+ const MipsSubtarget &Subtarget) {
EVT Ty = N->getValueType(0);
- if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
+ if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
// Try the following combines:
// (xor (or $a, $b), (build_vector allones))
// (xor (or $a, $b), (bitcast (build_vector allones)))
@@ -1165,15 +1167,14 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
}
-bool MipsSETargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
+bool MipsSETargetLowering::isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const {
if (!EnableMipsTailCalls)
return false;
// Return false if either the callee or caller has a byval argument.
- if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
return false;
// Return true if the callee's argument area is no larger than the
@@ -1185,10 +1186,12 @@ void MipsSETargetLowering::
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const {
Ops.push_back(Callee);
MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
- InternalLinkage, CLI, Callee, Chain);
+ InternalLinkage, IsCallReloc, CLI, Callee,
+ Chain);
}
SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
@@ -1215,7 +1218,7 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Nd.isNonTemporal(), Nd.isInvariant(),
std::min(Nd.getAlignment(), 4U));
- if (!Subtarget->isLittle())
+ if (!Subtarget.isLittle())
std::swap(Lo, Hi);
SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
@@ -1238,26 +1241,26 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
Val, DAG.getConstant(1, MVT::i32));
- if (!Subtarget->isLittle())
+ if (!Subtarget.isLittle())
std::swap(Lo, Hi);
// i32 store to lower address.
Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(),
Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
- Nd.getTBAAInfo());
+ Nd.getAAInfo());
// i32 store to higher address.
Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
Nd.isVolatile(), Nd.isNonTemporal(),
- std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo());
+ std::min(Nd.getAlignment(), 4U), Nd.getAAInfo());
}
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
bool HasLo, bool HasHi,
SelectionDAG &DAG) const {
// MIPS32r6/MIPS64r6 removed accumulator based multiplies.
- assert(!Subtarget->hasMips32r6());
+ assert(!Subtarget.hasMips32r6());
EVT Ty = Op.getOperand(0).getValueType();
SDLoc DL(Op);
@@ -1621,7 +1624,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_bnegi_w:
case Intrinsic::mips_bnegi_d:
return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
- !Subtarget->isLittle());
+ !Subtarget.isLittle());
case Intrinsic::mips_bnz_b:
case Intrinsic::mips_bnz_h:
case Intrinsic::mips_bnz_w:
@@ -1657,7 +1660,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_bseti_w:
case Intrinsic::mips_bseti_d:
return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
- !Subtarget->isLittle());
+ !Subtarget.isLittle());
case Intrinsic::mips_bz_b:
case Intrinsic::mips_bz_h:
case Intrinsic::mips_bz_w:
@@ -1732,7 +1735,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_copy_s_w:
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
case Intrinsic::mips_copy_s_d:
- if (Subtarget->hasMips64())
+ if (Subtarget.hasMips64())
// Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
else {
@@ -1747,7 +1750,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_copy_u_w:
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
case Intrinsic::mips_copy_u_d:
- if (Subtarget->hasMips64())
+ if (Subtarget.hasMips64())
// Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
else {
@@ -2324,12 +2327,12 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (!Subtarget->hasMSA() || !ResTy.is128BitVector())
+ if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
return SDValue();
if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, 8,
- !Subtarget->isLittle()) && SplatBitSize <= 64) {
+ !Subtarget.isLittle()) && SplatBitSize <= 64) {
// We can only cope with 8, 16, 32, or 64-bit elements
if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
SplatBitSize != 64)
@@ -2744,7 +2747,8 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
// $vr0 = phi($vr2, $fbb, $vr1, $tbb)
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -2809,7 +2813,8 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
// $rd = phi($rd1, $fbb, $rd2, $tbb)
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -2870,7 +2875,8 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
MachineBasicBlock * MipsSETargetLowering::
emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Fd = MI->getOperand(0).getReg();
@@ -2902,9 +2908,10 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
// valid because FR=1 mode which is the only supported mode in MSA.
MachineBasicBlock * MipsSETargetLowering::
emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
- assert(Subtarget->isFP64bit());
+ assert(Subtarget.isFP64bit());
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
unsigned Fd = MI->getOperand(0).getReg();
unsigned Ws = MI->getOperand(1).getReg();
@@ -2933,7 +2940,8 @@ emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
MachineBasicBlock *
MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -2965,9 +2973,10 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI,
MachineBasicBlock *BB) const {
- assert(Subtarget->isFP64bit());
+ assert(Subtarget.isFP64bit());
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3015,7 +3024,8 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned EltSizeInBytes,
bool IsFP) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3025,7 +3035,7 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
const TargetRegisterClass *VecRC = nullptr;
const TargetRegisterClass *GPRRC =
- Subtarget->isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+ Subtarget.isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
unsigned EltLog2Size;
unsigned InsertOp = 0;
unsigned InsveOp = 0;
@@ -3125,7 +3135,8 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitFILL_FW(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3154,9 +3165,10 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
MachineBasicBlock *BB) const {
- assert(Subtarget->isFP64bit());
+ assert(Subtarget.isFP64bit());
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned Wd = MI->getOperand(0).getReg();
@@ -3184,7 +3196,8 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
unsigned Ws1 = RegInfo.createVirtualRegister(RC);
@@ -3213,7 +3226,8 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
MachineBasicBlock *
MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
unsigned Ws1 = RegInfo.createVirtualRegister(RC);
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 13ef6fc..d44f8d8 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEISELLOWERING_H
-#define MIPSSEISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEISELLOWERING_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEISELLOWERING_H
#include "MipsISelLowering.h"
#include "MipsRegisterInfo.h"
@@ -20,7 +20,8 @@
namespace llvm {
class MipsSETargetLowering : public MipsTargetLowering {
public:
- explicit MipsSETargetLowering(MipsTargetMachine &TM);
+ explicit MipsSETargetLowering(const MipsTargetMachine &TM,
+ const MipsSubtarget &STI);
/// \brief Enable MSA support for the given integer type and Register
/// class.
@@ -30,8 +31,9 @@ namespace llvm {
void addMSAFloatType(MVT::SimpleValueType Ty,
const TargetRegisterClass *RC);
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -49,15 +51,15 @@ namespace llvm {
const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
private:
- bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const override;
+ bool isEligibleForTailCallOptimization(
+ const CCState &CCInfo, unsigned NextStackOffset,
+ const MipsFunctionInfo &FI) const override;
void
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee,
+ bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
SDValue Chain) const override;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
@@ -112,4 +114,4 @@ namespace llvm {
};
}
-#endif // MipsSEISELLOWERING_H
+#endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 32da749..16bea8b 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -24,11 +24,10 @@
using namespace llvm;
-MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
- : MipsInstrInfo(tm,
- tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
- RI(*tm.getSubtargetImpl()),
- IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {}
+MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
+ : MipsInstrInfo(STI, STI.getRelocationModel() == Reloc::PIC_ ? Mips::B
+ : Mips::J),
+ RI(STI), IsN64(STI.isABI_N64()) {}
const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
return RI;
@@ -84,7 +83,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
unsigned Opc = 0, ZeroReg = 0;
- bool isMicroMips = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode();
+ bool isMicroMips = Subtarget.inMicroMipsMode();
if (Mips::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg.
if (Mips::GPR32RegClass.contains(SrcReg)) {
@@ -265,7 +264,7 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineBasicBlock &MBB = *MI->getParent();
- bool isMicroMips = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode();
+ bool isMicroMips = Subtarget.inMicroMipsMode();
unsigned Opc;
switch(MI->getDesc().getOpcode()) {
@@ -360,7 +359,7 @@ unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ const MipsSubtarget &STI = Subtarget;
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
@@ -380,7 +379,7 @@ MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II, DebugLoc DL,
unsigned *NewImm) const {
MipsAnalyzeImmediate AnalyzeImm;
- const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ const MipsSubtarget &STI = Subtarget;
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
unsigned Size = STI.isABI_N64() ? 64 : 32;
unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi;
@@ -429,8 +428,6 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const auto &Subtarget = TM.getSubtarget<MipsSubtarget>();
-
if (Subtarget.isGP64bit())
BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64))
.addReg(Mips::RA_64);
@@ -521,8 +518,17 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo;
unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
- if (SubIdx == Mips::sub_hi && FP64) {
- // FIXME: The .addReg(SrcReg, RegState::Implicit) is a white lie used to
+ // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload
+ // in MipsSEFrameLowering.cpp.
+ assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2()));
+
+ // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload
+ // in MipsSEFrameLowering.cpp.
+ assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg()));
+
+ if (SubIdx == Mips::sub_hi && Subtarget.hasMTHC1()) {
+ // FIXME: Strictly speaking MFHC1 only reads the top 32-bits however, we
+ // claim to read the whole 64-bits as part of a white lie used to
// temporarily work around a widespread bug in the -mfp64 support.
// The problem is that none of the 32-bit fpu ops mention the fact
// that they clobber the upper 32-bits of the 64-bit FPR. Fixing that
@@ -533,8 +539,8 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
// We therefore pretend that it reads the bottom 32-bits to
// artificially create a dependency and prevent the scheduler
// changing the behaviour of the code.
- BuildMI(MBB, I, dl, get(Mips::MFHC1), DstReg).addReg(SubReg).addReg(
- SrcReg, RegState::Implicit);
+ BuildMI(MBB, I, dl, get(FP64 ? Mips::MFHC1_D64 : Mips::MFHC1_D32), DstReg)
+ .addReg(SrcReg);
} else
BuildMI(MBB, I, dl, get(Mips::MFC1), DstReg).addReg(SubReg);
}
@@ -547,29 +553,34 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
DebugLoc dl = I->getDebugLoc();
const TargetRegisterInfo &TRI = getRegisterInfo();
- bool HasMTHC1 = TM.getSubtarget<MipsSubtarget>().hasMips32r2() ||
- TM.getSubtarget<MipsSubtarget>().hasMips32r6();
// When mthc1 is available, use:
// mtc1 Lo, $fp
// mthc1 Hi, $fp
//
- // Otherwise, for FP64:
+ // Otherwise, for O32 FPXX ABI:
// spill + reload via ldc1
- // This has not been implemented since FP64 on MIPS32 and earlier is not
- // supported.
+ // This case is handled by the frame lowering code.
//
// Otherwise, for FP32:
// mtc1 Lo, $fp
// mtc1 Hi, $fp + 1
+ //
+ // The case where dmtc1 is available doesn't need to be handled here
+ // because it never creates a BuildPairF64 node.
+
+ // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload
+ // in MipsSEFrameLowering.cpp.
+ assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2()));
+
+ // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload
+ // in MipsSEFrameLowering.cpp.
+ assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg()));
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo))
.addReg(LoReg);
- if (HasMTHC1 || FP64) {
- assert(TM.getSubtarget<MipsSubtarget>().hasMips32r2() &&
- "MTHC1 requires MIPS32r2");
-
+ if (Subtarget.hasMTHC1()) {
// FIXME: The .addReg(DstReg) is a white lie used to temporarily work
// around a widespread bug in the -mfp64 support.
// The problem is that none of the 32-bit fpu ops mention the fact
@@ -584,7 +595,9 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
BuildMI(MBB, I, dl, get(FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32), DstReg)
.addReg(DstReg)
.addReg(HiReg);
- } else
+ } else if (Subtarget.isABI_FPXX())
+ llvm_unreachable("BuildPairF64 not expanded in frame lowering code!");
+ else
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi))
.addReg(HiReg);
}
@@ -594,28 +607,34 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
// This pseudo instruction is generated as part of the lowering of
// ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
// indirect jump to TargetReg
- const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
- unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned SP = STI.isGP64bit() ? Mips::SP_64 : Mips::SP;
- unsigned RA = STI.isGP64bit() ? Mips::RA_64 : Mips::RA;
- unsigned T9 = STI.isGP64bit() ? Mips::T9_64 : Mips::T9;
- unsigned ZERO = STI.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ADDU = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned SP = Subtarget.isGP64bit() ? Mips::SP_64 : Mips::SP;
+ unsigned RA = Subtarget.isGP64bit() ? Mips::RA_64 : Mips::RA;
+ unsigned T9 = Subtarget.isGP64bit() ? Mips::T9_64 : Mips::T9;
+ unsigned ZERO = Subtarget.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
unsigned OffsetReg = I->getOperand(0).getReg();
unsigned TargetReg = I->getOperand(1).getReg();
// addu $ra, $v0, $zero
// addu $sp, $sp, $v1
// jr $ra (via RetRA)
+ const TargetMachine &TM = MBB.getParent()->getTarget();
if (TM.getRelocationModel() == Reloc::PIC_)
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), T9)
- .addReg(TargetReg).addReg(ZERO);
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), RA)
- .addReg(TargetReg).addReg(ZERO);
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
- .addReg(SP).addReg(OffsetReg);
+ BuildMI(MBB, I, I->getDebugLoc(),
+ TM.getSubtargetImpl()->getInstrInfo()->get(ADDU), T9)
+ .addReg(TargetReg)
+ .addReg(ZERO);
+ BuildMI(MBB, I, I->getDebugLoc(),
+ TM.getSubtargetImpl()->getInstrInfo()->get(ADDU), RA)
+ .addReg(TargetReg)
+ .addReg(ZERO);
+ BuildMI(MBB, I, I->getDebugLoc(),
+ TM.getSubtargetImpl()->getInstrInfo()->get(ADDU), SP)
+ .addReg(SP)
+ .addReg(OffsetReg);
expandRetRA(MBB, I);
}
-const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
- return new MipsSEInstrInfo(TM);
+const MipsInstrInfo *llvm::createMipsSEInstrInfo(const MipsSubtarget &STI) {
+ return new MipsSEInstrInfo(STI);
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 9ac94ce..b2d2301 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEINSTRUCTIONINFO_H
-#define MIPSSEINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEINSTRINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEINSTRINFO_H
#include "MipsInstrInfo.h"
#include "MipsSERegisterInfo.h"
@@ -24,7 +24,7 @@ class MipsSEInstrInfo : public MipsInstrInfo {
bool IsN64;
public:
- explicit MipsSEInstrInfo(MipsTargetMachine &TM);
+ explicit MipsSEInstrInfo(const MipsSubtarget &STI);
const MipsRegisterInfo &getRegisterInfo() const override;
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 0af1a6b..55c6638 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -172,7 +172,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned Reg = RegInfo.createVirtualRegister(RC);
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo *>(
- MBB.getParent()->getTarget().getInstrInfo());
+ MBB.getParent()->getSubtarget().getInstrInfo());
BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset);
FrameReg = Reg;
@@ -187,7 +187,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned NewImm = 0;
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo *>(
- MBB.getParent()->getTarget().getInstrInfo());
+ MBB.getParent()->getSubtarget().getInstrInfo());
unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
OffsetBitSize == 16 ? &NewImm : nullptr);
BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index f2f3a7e..6b70d07 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSEREGISTERINFO_H
-#define MIPSSEREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSEREGISTERINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSEREGISTERINFO_H
#include "MipsRegisterInfo.h"
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index 2b3d527..061423f 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSELECTIONDAGINFO_H
-#define MIPSSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 693daa3..8768b12 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -58,6 +58,10 @@ Mips16ConstantIslands(
cl::desc("MIPS: mips16 constant islands enable."),
cl::init(true));
+static cl::opt<bool>
+GPOpt("mgpopt", cl::Hidden,
+ cl::desc("MIPS: Enable gp-relative addressing of small data items"));
+
/// Select the Mips CPU for the given triple and cpu name.
/// FIXME: Merge with the copy in MipsMCTargetDesc.cpp
static StringRef selectMipsCPU(Triple TT, StringRef CPU) {
@@ -104,31 +108,32 @@ static std::string computeDataLayout(const MipsSubtarget &ST) {
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
- Reloc::Model _RM, MipsTargetMachine *_TM)
- : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32),
- MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false),
- IsFPXX(false), IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false),
- IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true),
- HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
- HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
- InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
- HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
- HasMSA(false), RM(_RM), OverrideMode(NoOverride), TM(_TM),
- TargetTriple(TT),
+ const MipsTargetMachine *_TM)
+ : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
+ ABI(MipsABIInfo::Unknown()), IsLittle(little), IsSingleFloat(false),
+ IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
+ IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
+ IsLinux(true), HasMips3_32(false), HasMips3_32r2(false),
+ HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
+ InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
+ InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+ AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ HasMSA(false), TM(_TM), TargetTriple(TT),
DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))),
- TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*TM)),
- FrameLowering(MipsFrameLowering::create(*TM, *this)),
- TLInfo(MipsTargetLowering::create(*TM)) {
+ TSInfo(DL), InstrInfo(MipsInstrInfo::create(*this)),
+ FrameLowering(MipsFrameLowering::create(*this)),
+ TLInfo(MipsTargetLowering::create(*TM, *this)) {
PreviousInMips16Mode = InMips16Mode;
- // Don't even attempt to generate code for MIPS-I, MIPS-II, MIPS-III, and
- // MIPS-V. They have not been tested and currently exist for the integrated
+ if (MipsArchVersion == MipsDefault)
+ MipsArchVersion = Mips32;
+
+ // Don't even attempt to generate code for MIPS-I, MIPS-III and MIPS-V.
+ // They have not been tested and currently exist for the integrated
// assembler only.
if (MipsArchVersion == Mips1)
report_fatal_error("Code generation for MIPS-I is not implemented", false);
- if (MipsArchVersion == Mips2)
- report_fatal_error("Code generation for MIPS-II is not implemented", false);
if (MipsArchVersion == Mips3)
report_fatal_error("Code generation for MIPS-III is not implemented",
false);
@@ -136,7 +141,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
report_fatal_error("Code generation for MIPS-V is not implemented", false);
// Assert exactly one ABI was chosen.
- assert(MipsABI != UnknownABI);
+ assert(ABI.IsKnown());
assert((((getFeatureBits() & Mips::FeatureO32) != 0) +
((getFeatureBits() & Mips::FeatureEABI) != 0) +
((getFeatureBits() & Mips::FeatureN32) != 0) +
@@ -153,9 +158,10 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
false);
if (!isABI_O32() && !useOddSPReg())
- report_fatal_error("-mattr=+nooddspreg is not currently permitted for a "
- "the O32 ABI.",
- false);
+ report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false);
+
+ if (IsFPXX && (isABI_N32() || isABI_N64()))
+ report_fatal_error("FPXX is not permitted for the N32/N64 ABI's.", false);
if (hasMips32r6()) {
StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
@@ -170,21 +176,29 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
if (TT.find("linux") == std::string::npos)
IsLinux = false;
+ if (NoABICalls && TM->getRelocationModel() == Reloc::PIC_)
+ report_fatal_error("position-independent code requires '-mabicalls'");
+
// Set UseSmallSection.
- // TODO: Investigate the IsLinux check. I suspect it's really checking for
- // bare-metal.
- UseSmallSection = !IsLinux && (RM == Reloc::Static);
+ UseSmallSection = GPOpt;
+ if (!NoABICalls && GPOpt) {
+ errs() << "warning: cannot use small-data accesses for '-mabicalls'"
+ << "\n";
+ UseSmallSection = false;
+ }
}
-bool
-MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode &Mode,
- RegClassVector &CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_NONE;
+/// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
+bool MipsSubtarget::enablePostMachineScheduler() const { return true; }
+
+void MipsSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
CriticalPathRCs.clear();
- CriticalPathRCs.push_back(isGP64bit() ? &Mips::GPR64RegClass
- : &Mips::GPR32RegClass);
- return OptLevel >= CodeGenOpt::Aggressive;
+ CriticalPathRCs.push_back(isGP64bit() ?
+ &Mips::GPR64RegClass : &Mips::GPR32RegClass);
+}
+
+CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
+ return CodeGenOpt::Aggressive;
}
MipsSubtarget &
@@ -197,100 +211,13 @@ MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
- if (InMips16Mode && !TM->Options.UseSoftFloat) {
- // Hard float for mips16 means essentially to compile as soft float
- // but to use a runtime library for soft float that is written with
- // native mips32 floating point instructions (those runtime routines
- // run in mips32 hard float mode).
- TM->Options.UseSoftFloat = true;
- TM->Options.FloatABIType = FloatABI::Soft;
+ if (InMips16Mode && !TM->Options.UseSoftFloat)
InMips16HardFloat = true;
- }
return *this;
}
-//FIXME: This logic for reseting the subtarget along with
-// the helper classes can probably be simplified but there are a lot of
-// cases so we will defer rewriting this to later.
-//
-void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
- bool ChangeToMips16 = false, ChangeToNoMips16 = false;
- DEBUG(dbgs() << "resetSubtargetFeatures" << "\n");
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- ChangeToMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- "mips16");
- ChangeToNoMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- "nomips16");
- assert (!(ChangeToMips16 & ChangeToNoMips16) &&
- "mips16 and nomips16 specified on the same function");
- if (ChangeToMips16) {
- if (PreviousInMips16Mode)
- return;
- OverrideMode = Mips16Override;
- PreviousInMips16Mode = true;
- setHelperClassesMips16();
- return;
- } else if (ChangeToNoMips16) {
- if (!PreviousInMips16Mode)
- return;
- OverrideMode = NoMips16Override;
- PreviousInMips16Mode = false;
- setHelperClassesMipsSE();
- return;
- } else {
- if (OverrideMode == NoOverride)
- return;
- OverrideMode = NoOverride;
- DEBUG(dbgs() << "back to default" << "\n");
- if (inMips16Mode() && !PreviousInMips16Mode) {
- setHelperClassesMips16();
- PreviousInMips16Mode = true;
- } else if (!inMips16Mode() && PreviousInMips16Mode) {
- setHelperClassesMipsSE();
- PreviousInMips16Mode = false;
- }
- return;
- }
-}
-
-void MipsSubtarget::setHelperClassesMips16() {
- InstrInfoSE.swap(InstrInfo);
- FrameLoweringSE.swap(FrameLowering);
- TLInfoSE.swap(TLInfo);
- if (!InstrInfo16) {
- InstrInfo.reset(MipsInstrInfo::create(*TM));
- FrameLowering.reset(MipsFrameLowering::create(*TM, *this));
- TLInfo.reset(MipsTargetLowering::create(*TM));
- } else {
- InstrInfo16.swap(InstrInfo);
- FrameLowering16.swap(FrameLowering);
- TLInfo16.swap(TLInfo);
- }
- assert(TLInfo && "null target lowering 16");
- assert(InstrInfo && "null instr info 16");
- assert(FrameLowering && "null frame lowering 16");
-}
-
-void MipsSubtarget::setHelperClassesMipsSE() {
- InstrInfo16.swap(InstrInfo);
- FrameLowering16.swap(FrameLowering);
- TLInfo16.swap(TLInfo);
- if (!InstrInfoSE) {
- InstrInfo.reset(MipsInstrInfo::create(*TM));
- FrameLowering.reset(MipsFrameLowering::create(*TM, *this));
- TLInfo.reset(MipsTargetLowering::create(*TM));
- } else {
- InstrInfoSE.swap(InstrInfo);
- FrameLoweringSE.swap(FrameLowering);
- TLInfoSE.swap(TLInfo);
- }
- assert(TLInfo && "null target lowering in SE");
- assert(InstrInfo && "null instr info SE");
- assert(FrameLowering && "null frame lowering SE");
-}
-
-bool MipsSubtarget::mipsSEUsesSoftFloat() const {
+bool MipsSubtarget::abiUsesSoftFloat() const {
return TM->Options.UseSoftFloat && !InMips16HardFloat;
}
@@ -298,3 +225,7 @@ bool MipsSubtarget::useConstantIslands() {
DEBUG(dbgs() << "use constant islands " << Mips16ConstantIslands << "\n");
return Mips16ConstantIslands;
}
+
+Reloc::Model MipsSubtarget::getRelocationModel() const {
+ return TM->getRelocationModel();
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index a3dcf03..bff9013 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -11,18 +11,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSSUBTARGET_H
-#define MIPSSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSSUBTARGET_H
+#define LLVM_LIB_TARGET_MIPS_MIPSSUBTARGET_H
#include "MipsFrameLowering.h"
#include "MipsISelLowering.h"
#include "MipsInstrInfo.h"
-#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "MipsABIInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -36,14 +36,8 @@ class MipsTargetMachine;
class MipsSubtarget : public MipsGenSubtargetInfo {
virtual void anchor();
-public:
- // NOTE: O64 will not be supported.
- enum MipsABIEnum {
- UnknownABI, O32, N32, N64, EABI
- };
-
-protected:
enum MipsArchEnum {
+ MipsDefault,
Mips1, Mips2, Mips32, Mips32r2, Mips32r6, Mips3, Mips4, Mips5, Mips64,
Mips64r2, Mips64r6
};
@@ -51,8 +45,8 @@ protected:
// Mips architecture version
MipsArchEnum MipsArchVersion;
- // Mips supported ABIs
- MipsABIEnum MipsABI;
+ // Selected ABI
+ MipsABIInfo ABI;
// IsLittle - The target is Little Endian
bool IsLittle;
@@ -65,6 +59,9 @@ protected:
// IsFPXX - MIPS O32 modeless ABI.
bool IsFPXX;
+ // NoABICalls - Disable SVR4-style position-independent code.
+ bool NoABICalls;
+
// IsFP64bit - The target processor has 64-bit floating point registers.
bool IsFP64bit;
@@ -135,48 +132,39 @@ protected:
InstrItineraryData InstrItins;
- // Relocation Model
- Reloc::Model RM;
-
// We can override the determination of whether we are in mips16 mode
// as from the command line
enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode;
- MipsTargetMachine *TM;
+ const MipsTargetMachine *TM;
Triple TargetTriple;
const DataLayout DL; // Calculates type size & alignment
const MipsSelectionDAGInfo TSInfo;
- MipsJITInfo JITInfo;
std::unique_ptr<const MipsInstrInfo> InstrInfo;
std::unique_ptr<const MipsFrameLowering> FrameLowering;
std::unique_ptr<const MipsTargetLowering> TLInfo;
- std::unique_ptr<const MipsInstrInfo> InstrInfo16;
- std::unique_ptr<const MipsFrameLowering> FrameLowering16;
- std::unique_ptr<const MipsTargetLowering> TLInfo16;
- std::unique_ptr<const MipsInstrInfo> InstrInfoSE;
- std::unique_ptr<const MipsFrameLowering> FrameLoweringSE;
- std::unique_ptr<const MipsTargetLowering> TLInfoSE;
public:
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const override;
+ /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ bool enablePostMachineScheduler() const override;
+ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
+ CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
/// Only O32 and EABI supported right now.
- bool isABI_EABI() const { return MipsABI == EABI; }
- bool isABI_N64() const { return MipsABI == N64; }
- bool isABI_N32() const { return MipsABI == N32; }
- bool isABI_O32() const { return MipsABI == O32; }
- bool isABI_FPXX() const { return false; } // TODO: add check for FPXX
- unsigned getTargetABI() const { return MipsABI; }
+ bool isABI_EABI() const { return ABI.IsEABI(); }
+ bool isABI_N64() const { return ABI.IsN64(); }
+ bool isABI_N32() const { return ABI.IsN32(); }
+ bool isABI_O32() const { return ABI.IsO32(); }
+ bool isABI_FPXX() const { return isABI_O32() && IsFPXX; }
+ const MipsABIInfo &getABI() const { return ABI; }
/// This constructor initializes the data members to match that
/// of the specified triple.
MipsSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool little, Reloc::Model RM,
- MipsTargetMachine *TM);
+ const std::string &FS, bool little,
+ const MipsTargetMachine *TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -209,30 +197,25 @@ public:
bool hasCnMips() const { return HasCnMips; }
bool isLittle() const { return IsLittle; }
+ bool isABICalls() const { return !NoABICalls; }
bool isFPXX() const { return IsFPXX; }
bool isFP64bit() const { return IsFP64bit; }
bool useOddSPReg() const { return UseOddSPReg; }
+ bool noOddSPReg() const { return !UseOddSPReg; }
bool isNaN2008() const { return IsNaN2008bit; }
- bool isNotFP64bit() const { return !IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
bool isGP32bit() const { return !IsGP64bit; }
+ unsigned getGPRSizeInBytes() const { return isGP64bit() ? 8 : 4; }
bool isSingleFloat() const { return IsSingleFloat; }
- bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
- bool inMips16Mode() const {
- switch (OverrideMode) {
- case NoOverride:
- return InMips16Mode;
- case Mips16Override:
- return true;
- case NoMips16Override:
- return false;
- }
- llvm_unreachable("Unexpected mode");
- }
+ bool inMips16Mode() const { return InMips16Mode; }
bool inMips16ModeDefault() const {
return InMips16Mode;
}
+ // Hard float for mips16 means essentially to compile as soft float
+ // but to use a runtime library for soft float that is written with
+ // native mips32 floating point instructions (those runtime routines
+ // run in mips32 hard float mode).
bool inMips16HardFloat() const {
return inMips16Mode() && InMips16HardFloat;
}
@@ -245,7 +228,7 @@ public:
bool hasStandardEncoding() const { return !inMips16Mode(); }
- bool mipsSEUsesSoftFloat() const;
+ bool abiUsesSoftFloat() const;
bool enableLongBranchPass() const {
return hasStandardEncoding() || allowMixed16_32();
@@ -253,15 +236,14 @@ public:
/// Features related to the presence of specific instructions.
bool hasExtractInsert() const { return !inMips16Mode() && hasMips32r2(); }
+ bool hasMTHC1() const { return hasMips32r2(); }
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
bool allowMixed16_32() const { return inMips16ModeDefault() |
AllowMixed16_32;}
bool os16() const { return Os16;};
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
- bool isNotTargetNaCl() const { return !TargetTriple.isOSNaCl(); }
// for now constant islands are on for the whole compilation unit but we only
// really use them if in addition we are in mips16 mode
@@ -270,10 +252,7 @@ public:
unsigned stackAlignment() const { return hasMips64() ? 16 : 8; }
// Grab relocation model
- Reloc::Model getRelocationModel() const {return RM;}
-
- /// \brief Reset the subtarget for the Mips target.
- void resetSubtarget(MachineFunction *MF);
+ Reloc::Model getRelocationModel() const;
MipsSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
const TargetMachine *TM);
@@ -289,17 +268,23 @@ public:
void setHelperClassesMips16();
void setHelperClassesMipsSE();
- MipsJITInfo *getJITInfo() { return &JITInfo; }
- const MipsSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const MipsInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
- const TargetFrameLowering *getFrameLowering() const {
+ const MipsSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const MipsInstrInfo *getInstrInfo() const override { return InstrInfo.get(); }
+ const TargetFrameLowering *getFrameLowering() const override {
return FrameLowering.get();
}
- const MipsRegisterInfo *getRegisterInfo() const {
+ const MipsRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
- const MipsTargetLowering *getTargetLowering() const { return TLInfo.get(); }
+ const MipsTargetLowering *getTargetLowering() const override {
+ return TLInfo.get();
+ }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 425dbf1..33280e3 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -26,6 +26,7 @@
#include "MipsSEISelDAGToDAG.h"
#include "MipsSEISelLowering.h"
#include "MipsSEInstrInfo.h"
+#include "MipsTargetObjectFile.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/PassManager.h"
@@ -56,10 +57,20 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle, RM, this) {
+ isLittle(isLittle),
+ TLOF(make_unique<MipsTargetObjectFile>()),
+ Subtarget(nullptr),
+ DefaultSubtarget(TT, CPU, FS, isLittle, this),
+ NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
+ isLittle, this),
+ Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16",
+ isLittle, this) {
+ Subtarget = &DefaultSubtarget;
initAsmInfo();
}
+MipsTargetMachine::~MipsTargetMachine() {}
+
void MipsebTargetMachine::anchor() { }
MipsebTargetMachine::
@@ -78,6 +89,63 @@ MipselTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+const MipsSubtarget *
+MipsTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+ bool hasMips16Attr =
+ !FnAttrs.getAttribute(AttributeSet::FunctionIndex, "mips16")
+ .hasAttribute(Attribute::None);
+ bool hasNoMips16Attr =
+ !FnAttrs.getAttribute(AttributeSet::FunctionIndex, "nomips16")
+ .hasAttribute(Attribute::None);
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ Attribute SFAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+ bool softFloat = !SFAttr.hasAttribute(Attribute::None)
+ ? SFAttr.getValueAsString() == "true"
+ : Options.UseSoftFloat;
+
+ if (hasMips16Attr)
+ FS += FS.empty() ? "+mips16" : ",+mips16";
+ else if (hasNoMips16Attr)
+ FS += FS.empty() ? "-mips16" : ",-mips16";
+
+ auto &I = SubtargetMap[CPU + FS + (softFloat ? "use-soft-float=true"
+ : "use-soft-float=false")];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<MipsSubtarget>(TargetTriple, CPU, FS, isLittle, this);
+ }
+ return I.get();
+}
+
+void MipsTargetMachine::resetSubtarget(MachineFunction *MF) {
+ DEBUG(dbgs() << "resetSubtarget\n");
+
+ Subtarget = const_cast<MipsSubtarget *>(getSubtargetImpl(*MF->getFunction()));
+ MF->setSubtarget(Subtarget);
+ return;
+}
+
namespace {
/// Mips Code Generator Pass Configuration Options.
class MipsPassConfig : public TargetPassConfig {
@@ -115,22 +183,18 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
void MipsPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
+ addPass(createAtomicExpandPass(&getMipsTargetMachine()));
if (getMipsSubtarget().os16())
addPass(createMipsOs16(getMipsTargetMachine()));
if (getMipsSubtarget().inMips16HardFloat())
addPass(createMips16HardFloat(getMipsTargetMachine()));
- addPass(createPartiallyInlineLibCallsPass());
}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsPassConfig::addInstSelector() {
- if (getMipsSubtarget().allowMixed16_32()) {
- addPass(createMipsModuleISelDag(getMipsTargetMachine()));
- addPass(createMips16ISelDag(getMipsTargetMachine()));
- addPass(createMipsSEISelDag(getMipsTargetMachine()));
- } else {
- addPass(createMipsISelDag(getMipsTargetMachine()));
- }
+ addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+ addPass(createMips16ISelDag(getMipsTargetMachine()));
+ addPass(createMipsSEISelDag(getMipsTargetMachine()));
return false;
}
@@ -149,7 +213,7 @@ bool MipsPassConfig::addPreRegAlloc() {
}
void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- if (Subtarget.allowMixed16_32()) {
+ if (Subtarget->allowMixed16_32()) {
DEBUG(errs() << "No ");
//FIXME: The Basic Target Transform Info
// pass needs to become a function pass instead of
@@ -166,21 +230,8 @@ void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// print out the code after the passes.
bool MipsPassConfig::addPreEmitPass() {
MipsTargetMachine &TM = getMipsTargetMachine();
- const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
addPass(createMipsDelaySlotFillerPass(TM));
-
- if (Subtarget.enableLongBranchPass())
- addPass(createMipsLongBranchPass(TM));
- if (Subtarget.inMips16Mode() ||
- Subtarget.allowMixed16_32())
- addPass(createMipsConstantIslandPass(TM));
-
+ addPass(createMipsLongBranchPass(TM));
+ addPass(createMipsConstantIslandPass(TM));
return true;
}
-
-bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Machine code emitter pass for Mips.
- PM.add(createMipsJITCodeEmitterPass(*this, JCE));
- return false;
-}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a0e7d43..1349f82 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSTARGETMACHINE_H
-#define MIPSTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
+#define LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
#include "MipsSubtarget.h"
#include "llvm/CodeGen/Passes.h"
@@ -25,48 +25,40 @@ class formatted_raw_ostream;
class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
- MipsSubtarget Subtarget;
+ bool isLittle;
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ MipsSubtarget *Subtarget;
+ MipsSubtarget DefaultSubtarget;
+ MipsSubtarget NoMips16Subtarget;
+ MipsSubtarget Mips16Subtarget;
+
+ mutable StringMap<std::unique_ptr<MipsSubtarget>> SubtargetMap;
public:
MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
-
- virtual ~MipsTargetMachine() {}
+ ~MipsTargetMachine() override;
void addAnalysisPasses(PassManagerBase &PM) override;
- const MipsInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const MipsSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const InstrItineraryData *getInstrItineraryData() const override {
- return Subtarget.inMips16Mode()
- ? nullptr
- : &getSubtargetImpl()->getInstrItineraryData();
- }
- MipsJITInfo *getJITInfo() override {
- return Subtarget.getJITInfo();
- }
- const MipsRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const MipsTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const MipsSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
+ const MipsSubtarget *getSubtargetImpl() const override {
+ if (Subtarget)
+ return Subtarget;
+ return &DefaultSubtarget;
}
+ const MipsSubtarget *getSubtargetImpl(const Function &F) const override;
+
+ /// \brief Reset the subtarget for the Mips target.
+ void resetSubtarget(MachineFunction *MF);
+
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
+
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 13f9408..b56c39b 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -24,6 +24,17 @@ SSThreshold("mips-ssection-threshold", cl::Hidden,
cl::desc("Small data and bss section threshold size (default=8)"),
cl::init(8));
+static cl::opt<bool>
+LocalSData("mlocal-sdata", cl::Hidden,
+ cl::desc("MIPS: Use gp_rel for object-local data."),
+ cl::init(true));
+
+static cl::opt<bool>
+ExternSData("mextern-sdata", cl::Hidden,
+ cl::desc("MIPS: Use gp_rel for data that is not defined by the "
+ "current object."),
+ cl::init(true));
+
void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
@@ -37,29 +48,46 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
+ this->TM = &TM;
}
// A address must be loaded from a small section if its size is less than the
// small section size threshold. Data in this section must be addressed using
// gp_rel operator.
static bool IsInSmallSection(uint64_t Size) {
+ // gcc has traditionally not treated zero-sized objects as small data, so this
+ // is effectively part of the ABI.
return Size > 0 && Size <= SSThreshold;
}
-bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
- const TargetMachine &TM) const {
+/// Return true if this global address should be placed into small data/bss
+/// section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+ // We first check the case where global is a declaration, because finding
+ // section kind using getKindForGlobal() is only allowed for global
+ // definitions.
if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
- return false;
+ return IsGlobalInSmallSectionImpl(GV, TM);
return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
}
-/// IsGlobalInSmallSection - Return true if this global address should be
-/// placed into small data/bss section.
+/// Return true if this global address should be placed into small data/bss
+/// section.
bool MipsTargetObjectFile::
IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
SectionKind Kind) const {
+ return (IsGlobalInSmallSectionImpl(GV, TM) &&
+ (Kind.isDataRel() || Kind.isBSS() || Kind.isCommon()));
+}
+/// Return true if this global address should be placed into small data/bss
+/// section. This method does all the work, except for checking the section
+/// kind.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSectionImpl(const GlobalValue *GV,
+ const TargetMachine &TM) const {
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
// Return if small section is not available.
@@ -71,21 +99,20 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (!GVA)
return false;
- // We can only do this for datarel or BSS objects for now.
- if (!Kind.isBSS() && !Kind.isDataRel())
+ // Enforce -mlocal-sdata.
+ if (!LocalSData && GV->hasLocalLinkage())
return false;
- // If this is a internal constant string, there is a special
- // section for it, but not in small data/bss.
- if (Kind.isMergeable1ByteCString())
+ // Enforce -mextern-sdata.
+ if (!ExternSData && ((GV->hasExternalLinkage() && GV->isDeclaration()) ||
+ GV->hasCommonLinkage()))
return false;
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty));
}
-
-
const MCSection *MipsTargetObjectFile::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
@@ -95,9 +122,27 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// Handle Small Section classification here.
if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallBSSSection;
- if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ if (Kind.isDataRel() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallDataSection;
// Otherwise, we work the same as ELF.
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
}
+
+/// Return true if this constant should be placed into small data section.
+bool MipsTargetObjectFile::
+IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
+ return (TM.getSubtarget<MipsSubtarget>().useSmallSection() &&
+ LocalSData &&
+ IsInSmallSection(TM.getSubtargetImpl()->getDataLayout()
+ ->getTypeAllocSize(CN->getType())));
+}
+
+const MCSection *MipsTargetObjectFile::
+getSectionForConstant(SectionKind Kind, const Constant *C) const {
+ if (IsConstantInSmallSection(C, *TM))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::getSectionForConstant(Kind, C);
+}
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 2bf5a75..3a2b298 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
-#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_MIPS_MIPSTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -17,21 +17,30 @@ namespace llvm {
class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
+ const TargetMachine *TM;
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-
- /// IsGlobalInSmallSection - Return true if this global address should be
- /// placed into small data/bss section.
- bool IsGlobalInSmallSection(const GlobalValue *GV,
- const TargetMachine &TM, SectionKind Kind)const;
+ /// Return true if this global address should be placed into small data/bss
+ /// section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const;
bool IsGlobalInSmallSection(const GlobalValue *GV,
const TargetMachine &TM) const;
+ bool IsGlobalInSmallSectionImpl(const GlobalValue *GV,
+ const TargetMachine &TM) const;
const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const override;
+
+ /// Return true if this constant should be placed into small data section.
+ bool IsConstantInSmallSection(const Constant *CN,
+ const TargetMachine &TM) const;
+
+ const MCSection *getSectionForConstant(SectionKind Kind,
+ const Constant *C) const override;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 99f7d4c..c1f17933 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -7,10 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MIPSTARGETSTREAMER_H
-#define MIPSTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MIPSTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_MIPS_MIPSTARGETSTREAMER_H
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "MCTargetDesc/MipsABIFlagsSection.h"
@@ -30,6 +31,8 @@ public:
virtual void emitDirectiveSetNoReorder();
virtual void emitDirectiveSetMacro();
virtual void emitDirectiveSetNoMacro();
+ virtual void emitDirectiveSetMsa();
+ virtual void emitDirectiveSetNoMsa();
virtual void emitDirectiveSetAt();
virtual void emitDirectiveSetNoAt();
virtual void emitDirectiveEnd(StringRef Name);
@@ -45,13 +48,26 @@ public:
virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff);
+ virtual void emitDirectiveSetArch(StringRef Arch);
+ virtual void emitDirectiveSetMips0();
+ virtual void emitDirectiveSetMips1();
+ virtual void emitDirectiveSetMips2();
+ virtual void emitDirectiveSetMips3();
+ virtual void emitDirectiveSetMips4();
+ virtual void emitDirectiveSetMips5();
+ virtual void emitDirectiveSetMips32();
virtual void emitDirectiveSetMips32R2();
+ virtual void emitDirectiveSetMips32R6();
virtual void emitDirectiveSetMips64();
virtual void emitDirectiveSetMips64R2();
+ virtual void emitDirectiveSetMips64R6();
virtual void emitDirectiveSetDsp();
+ virtual void emitDirectiveSetNoDsp();
+ virtual void emitDirectiveSetPop();
+ virtual void emitDirectiveSetPush();
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo);
+ virtual void emitDirectiveCpLoad(unsigned RegNo);
virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg);
@@ -72,8 +88,8 @@ public:
virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){};
virtual void emitMipsAbiFlags(){};
- void setCanHaveModuleDir(bool Can) { canHaveModuleDirective = Can; }
- bool getCanHaveModuleDir() { return canHaveModuleDirective; }
+ void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
+ bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
// This method enables template classes to set internal abi flags
// structure values.
@@ -87,8 +103,21 @@ public:
protected:
MipsABIFlagsSection ABIFlagsSection;
+ bool GPRInfoSet;
+ unsigned GPRBitMask;
+ int GPROffset;
+
+ bool FPRInfoSet;
+ unsigned FPRBitMask;
+ int FPROffset;
+
+ bool FrameInfoSet;
+ int FrameOffset;
+ unsigned FrameReg;
+ unsigned ReturnReg;
+
private:
- bool canHaveModuleDirective;
+ bool ModuleDirectiveAllowed;
};
// This part is for ascii assembly output
@@ -106,6 +135,8 @@ public:
void emitDirectiveSetNoReorder() override;
void emitDirectiveSetMacro() override;
void emitDirectiveSetNoMacro() override;
+ void emitDirectiveSetMsa() override;
+ void emitDirectiveSetNoMsa() override;
void emitDirectiveSetAt() override;
void emitDirectiveSetNoAt() override;
void emitDirectiveEnd(StringRef Name) override;
@@ -121,13 +152,26 @@ public:
void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override;
+ void emitDirectiveSetArch(StringRef Arch) override;
+ void emitDirectiveSetMips0() override;
+ void emitDirectiveSetMips1() override;
+ void emitDirectiveSetMips2() override;
+ void emitDirectiveSetMips3() override;
+ void emitDirectiveSetMips4() override;
+ void emitDirectiveSetMips5() override;
+ void emitDirectiveSetMips32() override;
void emitDirectiveSetMips32R2() override;
+ void emitDirectiveSetMips32R6() override;
void emitDirectiveSetMips64() override;
void emitDirectiveSetMips64R2() override;
+ void emitDirectiveSetMips64R6() override;
void emitDirectiveSetDsp() override;
+ void emitDirectiveSetNoDsp() override;
+ void emitDirectiveSetPop() override;
+ void emitDirectiveSetPush() override;
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo);
+ void emitDirectiveCpLoad(unsigned RegNo) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
@@ -157,14 +201,8 @@ public:
void emitDirectiveSetMicroMips() override;
void emitDirectiveSetNoMicroMips() override;
void emitDirectiveSetMips16() override;
- void emitDirectiveSetNoMips16() override;
- void emitDirectiveSetReorder() override;
void emitDirectiveSetNoReorder() override;
- void emitDirectiveSetMacro() override;
- void emitDirectiveSetNoMacro() override;
- void emitDirectiveSetAt() override;
- void emitDirectiveSetNoAt() override;
void emitDirectiveEnd(StringRef Name) override;
void emitDirectiveEnt(const MCSymbol &Symbol) override;
@@ -178,13 +216,8 @@ public:
void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override;
- void emitDirectiveSetMips32R2() override;
- void emitDirectiveSetMips64() override;
- void emitDirectiveSetMips64R2() override;
- void emitDirectiveSetDsp() override;
-
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo);
+ void emitDirectiveCpLoad(unsigned RegNo) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 4e35b18..3a4a19d 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -9,26 +9,28 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
+ NVPTXAllocaHoisting.cpp
+ NVPTXAsmPrinter.cpp
+ NVPTXAssignValidGlobalNames.cpp
NVPTXFavorNonGenericAddrSpaces.cpp
NVPTXFrameLowering.cpp
- NVPTXInstrInfo.cpp
+ NVPTXGenericToNVVM.cpp
NVPTXISelDAGToDAG.cpp
NVPTXISelLowering.cpp
+ NVPTXImageOptimizer.cpp
+ NVPTXInstrInfo.cpp
+ NVPTXLowerAggrCopies.cpp
+ NVPTXLowerStructArgs.cpp
+ NVPTXMCExpr.cpp
+ NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp
+ NVPTXReplaceImageHandles.cpp
NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp
- NVPTXLowerAggrCopies.cpp
- NVPTXutil.cpp
- NVPTXAllocaHoisting.cpp
- NVPTXAsmPrinter.cpp
+ NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp
+ NVPTXutil.cpp
NVVMReflect.cpp
- NVPTXGenericToNVVM.cpp
- NVPTXAssignValidGlobalNames.cpp
- NVPTXPrologEpilogPass.cpp
- NVPTXMCExpr.cpp
- NVPTXReplaceImageHandles.cpp
- NVPTXImageOptimizer.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 9618896..80b2f62 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -57,13 +57,13 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
OS << "%r";
break;
case 4:
- OS << "%rl";
+ OS << "%rd";
break;
case 5:
OS << "%f";
break;
case 6:
- OS << "%fl";
+ OS << "%fd";
break;
}
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 1fb3c57..0496964 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_INST_PRINTER_H
-#define NVPTX_INST_PRINTER_H
+#ifndef LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt
index e805aba..bc8d82e 100644
--- a/lib/Target/NVPTX/LLVMBuild.txt
+++ b/lib/Target/NVPTX/LLVMBuild.txt
@@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library
name = NVPTXCodeGen
parent = NVPTX
-required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo SelectionDAG Support Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target
add_to_library_groups = NVPTX
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index ddb122f..a72ae2e 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXBASEINFO_H
-#define NVPTXBASEINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
namespace llvm {
@@ -84,6 +84,17 @@ __attribute__((unused))
#endif
static const char *NamedMDForAnnotations = "nvvm.annotations";
+namespace NVPTXII {
+enum {
+ // These must be kept in sync with TSFlags in NVPTXInstrFormats.td
+ IsTexFlag = 0x80,
+ IsSuldMask = 0x300,
+ IsSuldShift = 8,
+ IsSustFlag = 0x400,
+ IsSurfTexQueryFlag = 0x800,
+ IsTexModeUnifiedFlag = 0x1000
+};
+}
}
#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 366341a..4fd5bdd 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -25,7 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile",
void NVPTXMCAsmInfo::anchor() {}
-NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.getArch() == Triple::nvptx64) {
PointerSize = CalleeSaveStackSlotSize = 8;
@@ -33,8 +33,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
CommentString = "//";
- HasSetDirective = false;
-
HasSingleParameterDotFile = false;
InlineAsmStart = " inline asm";
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 7d1633f..c324286 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_MCASM_INFO_H
-#define NVPTX_MCASM_INFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H
#include "llvm/MC/MCAsmInfo.h"
@@ -23,8 +23,8 @@ class StringRef;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
- explicit NVPTXMCAsmInfo(const StringRef &TT);
+ explicit NVPTXMCAsmInfo(StringRef TT);
};
} // namespace llvm
-#endif // NVPTX_MCASM_INFO_H
+#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index af95c76..98821d2 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXMCTARGETDESC_H
-#define NVPTXMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
namespace llvm {
class Target;
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index f9fb059..a2d670f 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_MANAGED_STRING_H
-#define LLVM_SUPPORT_MANAGED_STRING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H
+#define LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H
#include "llvm/ADT/SmallVector.h"
#include <string>
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index e74c808..13ba57e 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_H
-#define LLVM_TARGET_NVPTX_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/ADT/StringMap.h"
@@ -59,6 +59,7 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
llvm_unreachable("Unknown condition code");
}
+ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM);
FunctionPass *
createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
@@ -69,6 +70,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
+FunctionPass *createNVPTXLowerStructArgsPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 5b61068..69fc86e 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_ALLOCA_HOISTING_H_
-#define NVPTX_ALLOCA_HOISTING_H_
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/IR/DataLayout.h"
@@ -47,4 +47,4 @@ extern FunctionPass *createAllocaHoisting();
} // end namespace llvm
-#endif // NVPTX_ALLOCA_HOISTING_H_
+#endif
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index decf02a..35ba4f1 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -88,12 +88,9 @@ void VisitGlobalVariableForEmission(
return;
// Do we have a circular dependency?
- if (Visiting.count(GV))
+ if (!Visiting.insert(GV).second)
report_fatal_error("Circular dependency found in global variable set");
- // Start visiting this global
- Visiting.insert(GV);
-
// Make sure we visit all dependents first
DenseSet<const GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
@@ -140,7 +137,8 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(
+ CE, AP.TM.getSubtargetImpl()->getDataLayout()))
if (C != CE)
return LowerConstant(C, AP);
@@ -169,7 +167,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
+ const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
// Generate a symbolic expression for the byte address
APInt OffsetAI(TD.getPointerSizeInBits(), 0);
cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
@@ -193,7 +191,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
return LowerConstant(CE->getOperand(0), AP);
case Instruction::IntToPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
+ const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
@@ -203,7 +201,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
}
case Instruction::PtrToInt: {
- const DataLayout &TD = *AP.TM.getDataLayout();
+ const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
@@ -330,253 +328,51 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
unsigned OpNo, MCOperand &MCOp) {
const MachineOperand &MO = MI->getOperand(OpNo);
+ const MCInstrDesc &MCID = MI->getDesc();
- switch (MI->getOpcode()) {
- default: return false;
- case NVPTX::TEX_1D_F32_I32:
- case NVPTX::TEX_1D_F32_F32:
- case NVPTX::TEX_1D_F32_F32_LEVEL:
- case NVPTX::TEX_1D_F32_F32_GRAD:
- case NVPTX::TEX_1D_I32_I32:
- case NVPTX::TEX_1D_I32_F32:
- case NVPTX::TEX_1D_I32_F32_LEVEL:
- case NVPTX::TEX_1D_I32_F32_GRAD:
- case NVPTX::TEX_1D_ARRAY_F32_I32:
- case NVPTX::TEX_1D_ARRAY_F32_F32:
- case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
- case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
- case NVPTX::TEX_1D_ARRAY_I32_I32:
- case NVPTX::TEX_1D_ARRAY_I32_F32:
- case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
- case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
- case NVPTX::TEX_2D_F32_I32:
- case NVPTX::TEX_2D_F32_F32:
- case NVPTX::TEX_2D_F32_F32_LEVEL:
- case NVPTX::TEX_2D_F32_F32_GRAD:
- case NVPTX::TEX_2D_I32_I32:
- case NVPTX::TEX_2D_I32_F32:
- case NVPTX::TEX_2D_I32_F32_LEVEL:
- case NVPTX::TEX_2D_I32_F32_GRAD:
- case NVPTX::TEX_2D_ARRAY_F32_I32:
- case NVPTX::TEX_2D_ARRAY_F32_F32:
- case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
- case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
- case NVPTX::TEX_2D_ARRAY_I32_I32:
- case NVPTX::TEX_2D_ARRAY_I32_F32:
- case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
- case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
- case NVPTX::TEX_3D_F32_I32:
- case NVPTX::TEX_3D_F32_F32:
- case NVPTX::TEX_3D_F32_F32_LEVEL:
- case NVPTX::TEX_3D_F32_F32_GRAD:
- case NVPTX::TEX_3D_I32_I32:
- case NVPTX::TEX_3D_I32_F32:
- case NVPTX::TEX_3D_I32_F32_LEVEL:
- case NVPTX::TEX_3D_I32_F32_GRAD:
- {
+ if (MCID.TSFlags & NVPTXII::IsTexFlag) {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
- if (OpNo == 4) {
+ if (OpNo == 4 && MO.isImm()) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
- if (OpNo == 5) {
+ if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
- }
- case NVPTX::SULD_1D_I8_TRAP:
- case NVPTX::SULD_1D_I16_TRAP:
- case NVPTX::SULD_1D_I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_I32_TRAP:
- case NVPTX::SULD_2D_I8_TRAP:
- case NVPTX::SULD_2D_I16_TRAP:
- case NVPTX::SULD_2D_I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_I32_TRAP:
- case NVPTX::SULD_3D_I8_TRAP:
- case NVPTX::SULD_3D_I16_TRAP:
- case NVPTX::SULD_3D_I32_TRAP: {
- // This is a V1 surface load, so operand 1 is a surfref
- if (OpNo == 1) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
+ } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
+ unsigned VecSize =
+ 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
- return false;
- }
- case NVPTX::SULD_1D_V2I8_TRAP:
- case NVPTX::SULD_1D_V2I16_TRAP:
- case NVPTX::SULD_1D_V2I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
- case NVPTX::SULD_2D_V2I8_TRAP:
- case NVPTX::SULD_2D_V2I16_TRAP:
- case NVPTX::SULD_2D_V2I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
- case NVPTX::SULD_3D_V2I8_TRAP:
- case NVPTX::SULD_3D_V2I16_TRAP:
- case NVPTX::SULD_3D_V2I32_TRAP: {
- // This is a V2 surface load, so operand 2 is a surfref
- if (OpNo == 2) {
+ // For a surface load of vector size N, the Nth operand will be the surfref
+ if (OpNo == VecSize && MO.isImm()) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
- }
- case NVPTX::SULD_1D_V4I8_TRAP:
- case NVPTX::SULD_1D_V4I16_TRAP:
- case NVPTX::SULD_1D_V4I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
- case NVPTX::SULD_2D_V4I8_TRAP:
- case NVPTX::SULD_2D_V4I16_TRAP:
- case NVPTX::SULD_2D_V4I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
- case NVPTX::SULD_3D_V4I8_TRAP:
- case NVPTX::SULD_3D_V4I16_TRAP:
- case NVPTX::SULD_3D_V4I32_TRAP: {
- // This is a V4 surface load, so operand 4 is a surfref
- if (OpNo == 4) {
- lowerImageHandleSymbol(MO.getImm(), MCOp);
- return true;
- }
-
- return false;
- }
- case NVPTX::SUST_B_1D_B8_TRAP:
- case NVPTX::SUST_B_1D_B16_TRAP:
- case NVPTX::SUST_B_1D_B32_TRAP:
- case NVPTX::SUST_B_1D_V2B8_TRAP:
- case NVPTX::SUST_B_1D_V2B16_TRAP:
- case NVPTX::SUST_B_1D_V2B32_TRAP:
- case NVPTX::SUST_B_1D_V4B8_TRAP:
- case NVPTX::SUST_B_1D_V4B16_TRAP:
- case NVPTX::SUST_B_1D_V4B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_B_2D_B8_TRAP:
- case NVPTX::SUST_B_2D_B16_TRAP:
- case NVPTX::SUST_B_2D_B32_TRAP:
- case NVPTX::SUST_B_2D_V2B8_TRAP:
- case NVPTX::SUST_B_2D_V2B16_TRAP:
- case NVPTX::SUST_B_2D_V2B32_TRAP:
- case NVPTX::SUST_B_2D_V4B8_TRAP:
- case NVPTX::SUST_B_2D_V4B16_TRAP:
- case NVPTX::SUST_B_2D_V4B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_B_3D_B8_TRAP:
- case NVPTX::SUST_B_3D_B16_TRAP:
- case NVPTX::SUST_B_3D_B32_TRAP:
- case NVPTX::SUST_B_3D_V2B8_TRAP:
- case NVPTX::SUST_B_3D_V2B16_TRAP:
- case NVPTX::SUST_B_3D_V2B32_TRAP:
- case NVPTX::SUST_B_3D_V4B8_TRAP:
- case NVPTX::SUST_B_3D_V4B16_TRAP:
- case NVPTX::SUST_B_3D_V4B32_TRAP:
- case NVPTX::SUST_P_1D_B8_TRAP:
- case NVPTX::SUST_P_1D_B16_TRAP:
- case NVPTX::SUST_P_1D_B32_TRAP:
- case NVPTX::SUST_P_1D_V2B8_TRAP:
- case NVPTX::SUST_P_1D_V2B16_TRAP:
- case NVPTX::SUST_P_1D_V2B32_TRAP:
- case NVPTX::SUST_P_1D_V4B8_TRAP:
- case NVPTX::SUST_P_1D_V4B16_TRAP:
- case NVPTX::SUST_P_1D_V4B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_P_2D_B8_TRAP:
- case NVPTX::SUST_P_2D_B16_TRAP:
- case NVPTX::SUST_P_2D_B32_TRAP:
- case NVPTX::SUST_P_2D_V2B8_TRAP:
- case NVPTX::SUST_P_2D_V2B16_TRAP:
- case NVPTX::SUST_P_2D_V2B32_TRAP:
- case NVPTX::SUST_P_2D_V4B8_TRAP:
- case NVPTX::SUST_P_2D_V4B16_TRAP:
- case NVPTX::SUST_P_2D_V4B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_P_3D_B8_TRAP:
- case NVPTX::SUST_P_3D_B16_TRAP:
- case NVPTX::SUST_P_3D_B32_TRAP:
- case NVPTX::SUST_P_3D_V2B8_TRAP:
- case NVPTX::SUST_P_3D_V2B16_TRAP:
- case NVPTX::SUST_P_3D_V2B32_TRAP:
- case NVPTX::SUST_P_3D_V4B8_TRAP:
- case NVPTX::SUST_P_3D_V4B16_TRAP:
- case NVPTX::SUST_P_3D_V4B32_TRAP: {
+ } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
// This is a surface store, so operand 0 is a surfref
- if (OpNo == 0) {
+ if (OpNo == 0 && MO.isImm()) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
- }
- case NVPTX::TXQ_CHANNEL_ORDER:
- case NVPTX::TXQ_CHANNEL_DATA_TYPE:
- case NVPTX::TXQ_WIDTH:
- case NVPTX::TXQ_HEIGHT:
- case NVPTX::TXQ_DEPTH:
- case NVPTX::TXQ_ARRAY_SIZE:
- case NVPTX::TXQ_NUM_SAMPLES:
- case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
- case NVPTX::SUQ_CHANNEL_ORDER:
- case NVPTX::SUQ_CHANNEL_DATA_TYPE:
- case NVPTX::SUQ_WIDTH:
- case NVPTX::SUQ_HEIGHT:
- case NVPTX::SUQ_DEPTH:
- case NVPTX::SUQ_ARRAY_SIZE: {
+ } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
// This is a query, so operand 1 is a surfref/texref
- if (OpNo == 1) {
+ if (OpNo == 1 && MO.isImm()) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
- }
+
+ return false;
}
void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
@@ -704,8 +500,8 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
}
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
Type *Ty = F->getReturnType();
@@ -828,13 +624,14 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
if (TRI->isVirtualRegister(RegNo)) {
OutStreamer.AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
- OutStreamer.AddComment(Twine("implicit-def: ") +
- TM.getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddComment(
+ Twine("implicit-def: ") +
+ TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
}
OutStreamer.AddBlankLine();
}
@@ -1155,7 +952,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(TM.getDataLayout());
+ Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1);
@@ -1356,7 +1153,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1659,7 +1456,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1780,9 +1577,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
}
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
@@ -1973,7 +1770,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
// Map the global virtual register number to a register class specific
// virtual register number starting from 1 with that class.
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
//unsigned numRegClasses = TRI->getNumRegClasses();
// Emit the Fake Stack Object
@@ -2010,9 +1807,9 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
// O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
// O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
// O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
// O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
- // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
+ // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
// Emit declaration of the virtual registers or 'physical' registers for
// each register class
@@ -2113,7 +1910,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
int s = TD->getTypeAllocSize(CPV->getType());
@@ -2237,7 +2034,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
int Bytes;
// Old constants
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index a9f9bdd..83fa5d3 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXASMPRINTER_H
-#define NVPTXASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
@@ -86,13 +86,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
// Once we have this AggBuffer setup, we can choose how to print
// it out.
public:
- unsigned size; // size of the buffer in bytes
- unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
- SmallVector<unsigned, 4> symbolPosInBuffer;
- SmallVector<const Value *, 4> Symbols;
private:
+ const unsigned size; // size of the buffer in bytes
+ std::vector<unsigned char> buffer; // the buffer
+ SmallVector<unsigned, 4> symbolPosInBuffer;
+ SmallVector<const Value *, 4> Symbols;
unsigned curpos;
raw_ostream &O;
NVPTXAsmPrinter &AP;
@@ -100,14 +100,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- : O(_O), AP(_AP) {
- buffer = new unsigned char[_size];
- size = _size;
+ : size(_size), buffer(_size), O(_O), AP(_AP) {
curpos = 0;
numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
}
- ~AggBuffer() { delete[] buffer; }
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
assert((curpos + Num) <= size);
assert((curpos + Bytes) <= size);
@@ -179,9 +176,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
else
nextSymbolPos = symbolPosInBuffer[nSym];
} else if (nBytes == 4)
- O << *(unsigned int *)(buffer + pos);
+ O << *(unsigned int *)(&buffer[pos]);
else
- O << *(unsigned long long *)(buffer + pos);
+ O << *(unsigned long long *)(&buffer[pos]);
}
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 8b088412..314df38 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -48,20 +48,20 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
if (is64bit) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
MachineInstr *MI =
- BuildMI(MBB, MBBI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
+ NVPTX::cvta_local_yes_64),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
LocalReg).addImm(MF.getFunctionNumber());
} else {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
MachineInstr *MI =
BuildMI(MBB, MBBI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
LocalReg).addImm(MF.getFunctionNumber());
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 56fb673..0846b78 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_FRAMELOWERING_H
-#define NVPTX_FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index faa9fdb..58fa95b 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -140,20 +140,23 @@ bool GenericToNVVM::runOnModule(Module &M) {
for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
GlobalVariable *GV = I->first;
GlobalVariable *NewGV = I->second;
- ++I;
+
+ // Remove GV from the map so that it can be RAUWed. Note that
+ // DenseMap::erase() won't invalidate any iterators but this one.
+ auto Next = std::next(I);
+ GVMap.erase(I);
+ I = Next;
+
Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType());
// At this point, the remaining uses of GV should be found only in global
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
- for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
- UI != UE;)
- (UI++)->set(BitCastNewGV);
+ GV->replaceAllUsesWith(BitCastNewGV);
std::string Name = GV->getName();
- GV->removeDeadConstantUsers();
GV->eraseFromParent();
NewGV->setName(Name);
}
- GVMap.clear();
+ assert(GVMap.empty() && "Expected it to be empty by now");
return true;
}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 0dfbf10..cd0422d 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -24,19 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "nvptx-isel"
-unsigned FMAContractLevel = 0;
-
-static cl::opt<unsigned, true>
-FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
- " 1: do it 2: do it aggressively"),
- cl::location(FMAContractLevel),
- cl::init(2));
-
static cl::opt<int> UsePrecDivF32(
"nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
- " IEEE Compliant F32 div.rnd if avaiable."),
+ " IEEE Compliant F32 div.rnd if available."),
cl::init(2));
static cl::opt<bool>
@@ -61,16 +52,6 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel),
Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
-
- doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
- doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
- doFMAF32AGG =
- (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
- doFMAF64AGG =
- (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
-
- allowFMA = (FMAContractLevel >= 1);
-
doMulWide = (OptLevel > 0);
}
@@ -116,6 +97,11 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const {
}
}
+bool NVPTXDAGToDAGISel::allowFMA() const {
+ const NVPTXTargetLowering *TL = Subtarget.getTargetLowering();
+ return TL->allowFMA(*MF, OptLevel);
+}
+
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
@@ -170,93 +156,341 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::INTRINSIC_W_CHAIN:
ResNode = SelectIntrinsicChain(N);
break;
- case NVPTXISD::Tex1DFloatI32:
+ case NVPTXISD::Tex1DFloatS32:
case NVPTXISD::Tex1DFloatFloat:
case NVPTXISD::Tex1DFloatFloatLevel:
case NVPTXISD::Tex1DFloatFloatGrad:
- case NVPTXISD::Tex1DI32I32:
- case NVPTXISD::Tex1DI32Float:
- case NVPTXISD::Tex1DI32FloatLevel:
- case NVPTXISD::Tex1DI32FloatGrad:
- case NVPTXISD::Tex1DArrayFloatI32:
+ case NVPTXISD::Tex1DS32S32:
+ case NVPTXISD::Tex1DS32Float:
+ case NVPTXISD::Tex1DS32FloatLevel:
+ case NVPTXISD::Tex1DS32FloatGrad:
+ case NVPTXISD::Tex1DU32S32:
+ case NVPTXISD::Tex1DU32Float:
+ case NVPTXISD::Tex1DU32FloatLevel:
+ case NVPTXISD::Tex1DU32FloatGrad:
+ case NVPTXISD::Tex1DArrayFloatS32:
case NVPTXISD::Tex1DArrayFloatFloat:
case NVPTXISD::Tex1DArrayFloatFloatLevel:
case NVPTXISD::Tex1DArrayFloatFloatGrad:
- case NVPTXISD::Tex1DArrayI32I32:
- case NVPTXISD::Tex1DArrayI32Float:
- case NVPTXISD::Tex1DArrayI32FloatLevel:
- case NVPTXISD::Tex1DArrayI32FloatGrad:
- case NVPTXISD::Tex2DFloatI32:
+ case NVPTXISD::Tex1DArrayS32S32:
+ case NVPTXISD::Tex1DArrayS32Float:
+ case NVPTXISD::Tex1DArrayS32FloatLevel:
+ case NVPTXISD::Tex1DArrayS32FloatGrad:
+ case NVPTXISD::Tex1DArrayU32S32:
+ case NVPTXISD::Tex1DArrayU32Float:
+ case NVPTXISD::Tex1DArrayU32FloatLevel:
+ case NVPTXISD::Tex1DArrayU32FloatGrad:
+ case NVPTXISD::Tex2DFloatS32:
case NVPTXISD::Tex2DFloatFloat:
case NVPTXISD::Tex2DFloatFloatLevel:
case NVPTXISD::Tex2DFloatFloatGrad:
- case NVPTXISD::Tex2DI32I32:
- case NVPTXISD::Tex2DI32Float:
- case NVPTXISD::Tex2DI32FloatLevel:
- case NVPTXISD::Tex2DI32FloatGrad:
- case NVPTXISD::Tex2DArrayFloatI32:
+ case NVPTXISD::Tex2DS32S32:
+ case NVPTXISD::Tex2DS32Float:
+ case NVPTXISD::Tex2DS32FloatLevel:
+ case NVPTXISD::Tex2DS32FloatGrad:
+ case NVPTXISD::Tex2DU32S32:
+ case NVPTXISD::Tex2DU32Float:
+ case NVPTXISD::Tex2DU32FloatLevel:
+ case NVPTXISD::Tex2DU32FloatGrad:
+ case NVPTXISD::Tex2DArrayFloatS32:
case NVPTXISD::Tex2DArrayFloatFloat:
case NVPTXISD::Tex2DArrayFloatFloatLevel:
case NVPTXISD::Tex2DArrayFloatFloatGrad:
- case NVPTXISD::Tex2DArrayI32I32:
- case NVPTXISD::Tex2DArrayI32Float:
- case NVPTXISD::Tex2DArrayI32FloatLevel:
- case NVPTXISD::Tex2DArrayI32FloatGrad:
- case NVPTXISD::Tex3DFloatI32:
+ case NVPTXISD::Tex2DArrayS32S32:
+ case NVPTXISD::Tex2DArrayS32Float:
+ case NVPTXISD::Tex2DArrayS32FloatLevel:
+ case NVPTXISD::Tex2DArrayS32FloatGrad:
+ case NVPTXISD::Tex2DArrayU32S32:
+ case NVPTXISD::Tex2DArrayU32Float:
+ case NVPTXISD::Tex2DArrayU32FloatLevel:
+ case NVPTXISD::Tex2DArrayU32FloatGrad:
+ case NVPTXISD::Tex3DFloatS32:
case NVPTXISD::Tex3DFloatFloat:
case NVPTXISD::Tex3DFloatFloatLevel:
case NVPTXISD::Tex3DFloatFloatGrad:
- case NVPTXISD::Tex3DI32I32:
- case NVPTXISD::Tex3DI32Float:
- case NVPTXISD::Tex3DI32FloatLevel:
- case NVPTXISD::Tex3DI32FloatGrad:
+ case NVPTXISD::Tex3DS32S32:
+ case NVPTXISD::Tex3DS32Float:
+ case NVPTXISD::Tex3DS32FloatLevel:
+ case NVPTXISD::Tex3DS32FloatGrad:
+ case NVPTXISD::Tex3DU32S32:
+ case NVPTXISD::Tex3DU32Float:
+ case NVPTXISD::Tex3DU32FloatLevel:
+ case NVPTXISD::Tex3DU32FloatGrad:
+ case NVPTXISD::TexCubeFloatFloat:
+ case NVPTXISD::TexCubeFloatFloatLevel:
+ case NVPTXISD::TexCubeS32Float:
+ case NVPTXISD::TexCubeS32FloatLevel:
+ case NVPTXISD::TexCubeU32Float:
+ case NVPTXISD::TexCubeU32FloatLevel:
+ case NVPTXISD::TexCubeArrayFloatFloat:
+ case NVPTXISD::TexCubeArrayFloatFloatLevel:
+ case NVPTXISD::TexCubeArrayS32Float:
+ case NVPTXISD::TexCubeArrayS32FloatLevel:
+ case NVPTXISD::TexCubeArrayU32Float:
+ case NVPTXISD::TexCubeArrayU32FloatLevel:
+ case NVPTXISD::Tld4R2DFloatFloat:
+ case NVPTXISD::Tld4G2DFloatFloat:
+ case NVPTXISD::Tld4B2DFloatFloat:
+ case NVPTXISD::Tld4A2DFloatFloat:
+ case NVPTXISD::Tld4R2DS64Float:
+ case NVPTXISD::Tld4G2DS64Float:
+ case NVPTXISD::Tld4B2DS64Float:
+ case NVPTXISD::Tld4A2DS64Float:
+ case NVPTXISD::Tld4R2DU64Float:
+ case NVPTXISD::Tld4G2DU64Float:
+ case NVPTXISD::Tld4B2DU64Float:
+ case NVPTXISD::Tld4A2DU64Float:
+ case NVPTXISD::TexUnified1DFloatS32:
+ case NVPTXISD::TexUnified1DFloatFloat:
+ case NVPTXISD::TexUnified1DFloatFloatLevel:
+ case NVPTXISD::TexUnified1DFloatFloatGrad:
+ case NVPTXISD::TexUnified1DS32S32:
+ case NVPTXISD::TexUnified1DS32Float:
+ case NVPTXISD::TexUnified1DS32FloatLevel:
+ case NVPTXISD::TexUnified1DS32FloatGrad:
+ case NVPTXISD::TexUnified1DU32S32:
+ case NVPTXISD::TexUnified1DU32Float:
+ case NVPTXISD::TexUnified1DU32FloatLevel:
+ case NVPTXISD::TexUnified1DU32FloatGrad:
+ case NVPTXISD::TexUnified1DArrayFloatS32:
+ case NVPTXISD::TexUnified1DArrayFloatFloat:
+ case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
+ case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
+ case NVPTXISD::TexUnified1DArrayS32S32:
+ case NVPTXISD::TexUnified1DArrayS32Float:
+ case NVPTXISD::TexUnified1DArrayS32FloatLevel:
+ case NVPTXISD::TexUnified1DArrayS32FloatGrad:
+ case NVPTXISD::TexUnified1DArrayU32S32:
+ case NVPTXISD::TexUnified1DArrayU32Float:
+ case NVPTXISD::TexUnified1DArrayU32FloatLevel:
+ case NVPTXISD::TexUnified1DArrayU32FloatGrad:
+ case NVPTXISD::TexUnified2DFloatS32:
+ case NVPTXISD::TexUnified2DFloatFloat:
+ case NVPTXISD::TexUnified2DFloatFloatLevel:
+ case NVPTXISD::TexUnified2DFloatFloatGrad:
+ case NVPTXISD::TexUnified2DS32S32:
+ case NVPTXISD::TexUnified2DS32Float:
+ case NVPTXISD::TexUnified2DS32FloatLevel:
+ case NVPTXISD::TexUnified2DS32FloatGrad:
+ case NVPTXISD::TexUnified2DU32S32:
+ case NVPTXISD::TexUnified2DU32Float:
+ case NVPTXISD::TexUnified2DU32FloatLevel:
+ case NVPTXISD::TexUnified2DU32FloatGrad:
+ case NVPTXISD::TexUnified2DArrayFloatS32:
+ case NVPTXISD::TexUnified2DArrayFloatFloat:
+ case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
+ case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
+ case NVPTXISD::TexUnified2DArrayS32S32:
+ case NVPTXISD::TexUnified2DArrayS32Float:
+ case NVPTXISD::TexUnified2DArrayS32FloatLevel:
+ case NVPTXISD::TexUnified2DArrayS32FloatGrad:
+ case NVPTXISD::TexUnified2DArrayU32S32:
+ case NVPTXISD::TexUnified2DArrayU32Float:
+ case NVPTXISD::TexUnified2DArrayU32FloatLevel:
+ case NVPTXISD::TexUnified2DArrayU32FloatGrad:
+ case NVPTXISD::TexUnified3DFloatS32:
+ case NVPTXISD::TexUnified3DFloatFloat:
+ case NVPTXISD::TexUnified3DFloatFloatLevel:
+ case NVPTXISD::TexUnified3DFloatFloatGrad:
+ case NVPTXISD::TexUnified3DS32S32:
+ case NVPTXISD::TexUnified3DS32Float:
+ case NVPTXISD::TexUnified3DS32FloatLevel:
+ case NVPTXISD::TexUnified3DS32FloatGrad:
+ case NVPTXISD::TexUnified3DU32S32:
+ case NVPTXISD::TexUnified3DU32Float:
+ case NVPTXISD::TexUnified3DU32FloatLevel:
+ case NVPTXISD::TexUnified3DU32FloatGrad:
+ case NVPTXISD::TexUnifiedCubeFloatFloat:
+ case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
+ case NVPTXISD::TexUnifiedCubeS32Float:
+ case NVPTXISD::TexUnifiedCubeS32FloatLevel:
+ case NVPTXISD::TexUnifiedCubeU32Float:
+ case NVPTXISD::TexUnifiedCubeU32FloatLevel:
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
+ case NVPTXISD::TexUnifiedCubeArrayS32Float:
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
+ case NVPTXISD::TexUnifiedCubeArrayU32Float:
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
+ case NVPTXISD::Tld4UnifiedR2DFloatFloat:
+ case NVPTXISD::Tld4UnifiedG2DFloatFloat:
+ case NVPTXISD::Tld4UnifiedB2DFloatFloat:
+ case NVPTXISD::Tld4UnifiedA2DFloatFloat:
+ case NVPTXISD::Tld4UnifiedR2DS64Float:
+ case NVPTXISD::Tld4UnifiedG2DS64Float:
+ case NVPTXISD::Tld4UnifiedB2DS64Float:
+ case NVPTXISD::Tld4UnifiedA2DS64Float:
+ case NVPTXISD::Tld4UnifiedR2DU64Float:
+ case NVPTXISD::Tld4UnifiedG2DU64Float:
+ case NVPTXISD::Tld4UnifiedB2DU64Float:
+ case NVPTXISD::Tld4UnifiedA2DU64Float:
ResNode = SelectTextureIntrinsic(N);
break;
+ case NVPTXISD::Suld1DI8Clamp:
+ case NVPTXISD::Suld1DI16Clamp:
+ case NVPTXISD::Suld1DI32Clamp:
+ case NVPTXISD::Suld1DI64Clamp:
+ case NVPTXISD::Suld1DV2I8Clamp:
+ case NVPTXISD::Suld1DV2I16Clamp:
+ case NVPTXISD::Suld1DV2I32Clamp:
+ case NVPTXISD::Suld1DV2I64Clamp:
+ case NVPTXISD::Suld1DV4I8Clamp:
+ case NVPTXISD::Suld1DV4I16Clamp:
+ case NVPTXISD::Suld1DV4I32Clamp:
+ case NVPTXISD::Suld1DArrayI8Clamp:
+ case NVPTXISD::Suld1DArrayI16Clamp:
+ case NVPTXISD::Suld1DArrayI32Clamp:
+ case NVPTXISD::Suld1DArrayI64Clamp:
+ case NVPTXISD::Suld1DArrayV2I8Clamp:
+ case NVPTXISD::Suld1DArrayV2I16Clamp:
+ case NVPTXISD::Suld1DArrayV2I32Clamp:
+ case NVPTXISD::Suld1DArrayV2I64Clamp:
+ case NVPTXISD::Suld1DArrayV4I8Clamp:
+ case NVPTXISD::Suld1DArrayV4I16Clamp:
+ case NVPTXISD::Suld1DArrayV4I32Clamp:
+ case NVPTXISD::Suld2DI8Clamp:
+ case NVPTXISD::Suld2DI16Clamp:
+ case NVPTXISD::Suld2DI32Clamp:
+ case NVPTXISD::Suld2DI64Clamp:
+ case NVPTXISD::Suld2DV2I8Clamp:
+ case NVPTXISD::Suld2DV2I16Clamp:
+ case NVPTXISD::Suld2DV2I32Clamp:
+ case NVPTXISD::Suld2DV2I64Clamp:
+ case NVPTXISD::Suld2DV4I8Clamp:
+ case NVPTXISD::Suld2DV4I16Clamp:
+ case NVPTXISD::Suld2DV4I32Clamp:
+ case NVPTXISD::Suld2DArrayI8Clamp:
+ case NVPTXISD::Suld2DArrayI16Clamp:
+ case NVPTXISD::Suld2DArrayI32Clamp:
+ case NVPTXISD::Suld2DArrayI64Clamp:
+ case NVPTXISD::Suld2DArrayV2I8Clamp:
+ case NVPTXISD::Suld2DArrayV2I16Clamp:
+ case NVPTXISD::Suld2DArrayV2I32Clamp:
+ case NVPTXISD::Suld2DArrayV2I64Clamp:
+ case NVPTXISD::Suld2DArrayV4I8Clamp:
+ case NVPTXISD::Suld2DArrayV4I16Clamp:
+ case NVPTXISD::Suld2DArrayV4I32Clamp:
+ case NVPTXISD::Suld3DI8Clamp:
+ case NVPTXISD::Suld3DI16Clamp:
+ case NVPTXISD::Suld3DI32Clamp:
+ case NVPTXISD::Suld3DI64Clamp:
+ case NVPTXISD::Suld3DV2I8Clamp:
+ case NVPTXISD::Suld3DV2I16Clamp:
+ case NVPTXISD::Suld3DV2I32Clamp:
+ case NVPTXISD::Suld3DV2I64Clamp:
+ case NVPTXISD::Suld3DV4I8Clamp:
+ case NVPTXISD::Suld3DV4I16Clamp:
+ case NVPTXISD::Suld3DV4I32Clamp:
case NVPTXISD::Suld1DI8Trap:
case NVPTXISD::Suld1DI16Trap:
case NVPTXISD::Suld1DI32Trap:
+ case NVPTXISD::Suld1DI64Trap:
case NVPTXISD::Suld1DV2I8Trap:
case NVPTXISD::Suld1DV2I16Trap:
case NVPTXISD::Suld1DV2I32Trap:
+ case NVPTXISD::Suld1DV2I64Trap:
case NVPTXISD::Suld1DV4I8Trap:
case NVPTXISD::Suld1DV4I16Trap:
case NVPTXISD::Suld1DV4I32Trap:
case NVPTXISD::Suld1DArrayI8Trap:
case NVPTXISD::Suld1DArrayI16Trap:
case NVPTXISD::Suld1DArrayI32Trap:
+ case NVPTXISD::Suld1DArrayI64Trap:
case NVPTXISD::Suld1DArrayV2I8Trap:
case NVPTXISD::Suld1DArrayV2I16Trap:
case NVPTXISD::Suld1DArrayV2I32Trap:
+ case NVPTXISD::Suld1DArrayV2I64Trap:
case NVPTXISD::Suld1DArrayV4I8Trap:
case NVPTXISD::Suld1DArrayV4I16Trap:
case NVPTXISD::Suld1DArrayV4I32Trap:
case NVPTXISD::Suld2DI8Trap:
case NVPTXISD::Suld2DI16Trap:
case NVPTXISD::Suld2DI32Trap:
+ case NVPTXISD::Suld2DI64Trap:
case NVPTXISD::Suld2DV2I8Trap:
case NVPTXISD::Suld2DV2I16Trap:
case NVPTXISD::Suld2DV2I32Trap:
+ case NVPTXISD::Suld2DV2I64Trap:
case NVPTXISD::Suld2DV4I8Trap:
case NVPTXISD::Suld2DV4I16Trap:
case NVPTXISD::Suld2DV4I32Trap:
case NVPTXISD::Suld2DArrayI8Trap:
case NVPTXISD::Suld2DArrayI16Trap:
case NVPTXISD::Suld2DArrayI32Trap:
+ case NVPTXISD::Suld2DArrayI64Trap:
case NVPTXISD::Suld2DArrayV2I8Trap:
case NVPTXISD::Suld2DArrayV2I16Trap:
case NVPTXISD::Suld2DArrayV2I32Trap:
+ case NVPTXISD::Suld2DArrayV2I64Trap:
case NVPTXISD::Suld2DArrayV4I8Trap:
case NVPTXISD::Suld2DArrayV4I16Trap:
case NVPTXISD::Suld2DArrayV4I32Trap:
case NVPTXISD::Suld3DI8Trap:
case NVPTXISD::Suld3DI16Trap:
case NVPTXISD::Suld3DI32Trap:
+ case NVPTXISD::Suld3DI64Trap:
case NVPTXISD::Suld3DV2I8Trap:
case NVPTXISD::Suld3DV2I16Trap:
case NVPTXISD::Suld3DV2I32Trap:
+ case NVPTXISD::Suld3DV2I64Trap:
case NVPTXISD::Suld3DV4I8Trap:
case NVPTXISD::Suld3DV4I16Trap:
case NVPTXISD::Suld3DV4I32Trap:
+ case NVPTXISD::Suld1DI8Zero:
+ case NVPTXISD::Suld1DI16Zero:
+ case NVPTXISD::Suld1DI32Zero:
+ case NVPTXISD::Suld1DI64Zero:
+ case NVPTXISD::Suld1DV2I8Zero:
+ case NVPTXISD::Suld1DV2I16Zero:
+ case NVPTXISD::Suld1DV2I32Zero:
+ case NVPTXISD::Suld1DV2I64Zero:
+ case NVPTXISD::Suld1DV4I8Zero:
+ case NVPTXISD::Suld1DV4I16Zero:
+ case NVPTXISD::Suld1DV4I32Zero:
+ case NVPTXISD::Suld1DArrayI8Zero:
+ case NVPTXISD::Suld1DArrayI16Zero:
+ case NVPTXISD::Suld1DArrayI32Zero:
+ case NVPTXISD::Suld1DArrayI64Zero:
+ case NVPTXISD::Suld1DArrayV2I8Zero:
+ case NVPTXISD::Suld1DArrayV2I16Zero:
+ case NVPTXISD::Suld1DArrayV2I32Zero:
+ case NVPTXISD::Suld1DArrayV2I64Zero:
+ case NVPTXISD::Suld1DArrayV4I8Zero:
+ case NVPTXISD::Suld1DArrayV4I16Zero:
+ case NVPTXISD::Suld1DArrayV4I32Zero:
+ case NVPTXISD::Suld2DI8Zero:
+ case NVPTXISD::Suld2DI16Zero:
+ case NVPTXISD::Suld2DI32Zero:
+ case NVPTXISD::Suld2DI64Zero:
+ case NVPTXISD::Suld2DV2I8Zero:
+ case NVPTXISD::Suld2DV2I16Zero:
+ case NVPTXISD::Suld2DV2I32Zero:
+ case NVPTXISD::Suld2DV2I64Zero:
+ case NVPTXISD::Suld2DV4I8Zero:
+ case NVPTXISD::Suld2DV4I16Zero:
+ case NVPTXISD::Suld2DV4I32Zero:
+ case NVPTXISD::Suld2DArrayI8Zero:
+ case NVPTXISD::Suld2DArrayI16Zero:
+ case NVPTXISD::Suld2DArrayI32Zero:
+ case NVPTXISD::Suld2DArrayI64Zero:
+ case NVPTXISD::Suld2DArrayV2I8Zero:
+ case NVPTXISD::Suld2DArrayV2I16Zero:
+ case NVPTXISD::Suld2DArrayV2I32Zero:
+ case NVPTXISD::Suld2DArrayV2I64Zero:
+ case NVPTXISD::Suld2DArrayV4I8Zero:
+ case NVPTXISD::Suld2DArrayV4I16Zero:
+ case NVPTXISD::Suld2DArrayV4I32Zero:
+ case NVPTXISD::Suld3DI8Zero:
+ case NVPTXISD::Suld3DI16Zero:
+ case NVPTXISD::Suld3DI32Zero:
+ case NVPTXISD::Suld3DI64Zero:
+ case NVPTXISD::Suld3DV2I8Zero:
+ case NVPTXISD::Suld3DV2I16Zero:
+ case NVPTXISD::Suld3DV2I32Zero:
+ case NVPTXISD::Suld3DV2I64Zero:
+ case NVPTXISD::Suld3DV4I8Zero:
+ case NVPTXISD::Suld3DV4I16Zero:
+ case NVPTXISD::Suld3DV4I32Zero:
ResNode = SelectSurfaceIntrinsic(N);
break;
case ISD::AND:
@@ -2781,16 +3015,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
SDValue Chain = N->getOperand(0);
- SDValue TexRef = N->getOperand(1);
- SDValue SampRef = N->getOperand(2);
SDNode *Ret = nullptr;
unsigned Opc = 0;
SmallVector<SDValue, 8> Ops;
switch (N->getOpcode()) {
default: return nullptr;
- case NVPTXISD::Tex1DFloatI32:
- Opc = NVPTX::TEX_1D_F32_I32;
+ case NVPTXISD::Tex1DFloatS32:
+ Opc = NVPTX::TEX_1D_F32_S32;
break;
case NVPTXISD::Tex1DFloatFloat:
Opc = NVPTX::TEX_1D_F32_F32;
@@ -2801,20 +3033,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
case NVPTXISD::Tex1DFloatFloatGrad:
Opc = NVPTX::TEX_1D_F32_F32_GRAD;
break;
- case NVPTXISD::Tex1DI32I32:
- Opc = NVPTX::TEX_1D_I32_I32;
+ case NVPTXISD::Tex1DS32S32:
+ Opc = NVPTX::TEX_1D_S32_S32;
+ break;
+ case NVPTXISD::Tex1DS32Float:
+ Opc = NVPTX::TEX_1D_S32_F32;
+ break;
+ case NVPTXISD::Tex1DS32FloatLevel:
+ Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
break;
- case NVPTXISD::Tex1DI32Float:
- Opc = NVPTX::TEX_1D_I32_F32;
+ case NVPTXISD::Tex1DS32FloatGrad:
+ Opc = NVPTX::TEX_1D_S32_F32_GRAD;
break;
- case NVPTXISD::Tex1DI32FloatLevel:
- Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
+ case NVPTXISD::Tex1DU32S32:
+ Opc = NVPTX::TEX_1D_U32_S32;
break;
- case NVPTXISD::Tex1DI32FloatGrad:
- Opc = NVPTX::TEX_1D_I32_F32_GRAD;
+ case NVPTXISD::Tex1DU32Float:
+ Opc = NVPTX::TEX_1D_U32_F32;
break;
- case NVPTXISD::Tex1DArrayFloatI32:
- Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
+ case NVPTXISD::Tex1DU32FloatLevel:
+ Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DU32FloatGrad:
+ Opc = NVPTX::TEX_1D_U32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex1DArrayFloatS32:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
break;
case NVPTXISD::Tex1DArrayFloatFloat:
Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
@@ -2825,20 +3069,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
case NVPTXISD::Tex1DArrayFloatFloatGrad:
Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
break;
- case NVPTXISD::Tex1DArrayI32I32:
- Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
+ case NVPTXISD::Tex1DArrayS32S32:
+ Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
+ break;
+ case NVPTXISD::Tex1DArrayS32Float:
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
break;
- case NVPTXISD::Tex1DArrayI32Float:
- Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
+ case NVPTXISD::Tex1DArrayS32FloatLevel:
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
break;
- case NVPTXISD::Tex1DArrayI32FloatLevel:
- Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
+ case NVPTXISD::Tex1DArrayS32FloatGrad:
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
break;
- case NVPTXISD::Tex1DArrayI32FloatGrad:
- Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
+ case NVPTXISD::Tex1DArrayU32S32:
+ Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
break;
- case NVPTXISD::Tex2DFloatI32:
- Opc = NVPTX::TEX_2D_F32_I32;
+ case NVPTXISD::Tex1DArrayU32Float:
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
+ break;
+ case NVPTXISD::Tex1DArrayU32FloatLevel:
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DArrayU32FloatGrad:
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DFloatS32:
+ Opc = NVPTX::TEX_2D_F32_S32;
break;
case NVPTXISD::Tex2DFloatFloat:
Opc = NVPTX::TEX_2D_F32_F32;
@@ -2849,20 +3105,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
case NVPTXISD::Tex2DFloatFloatGrad:
Opc = NVPTX::TEX_2D_F32_F32_GRAD;
break;
- case NVPTXISD::Tex2DI32I32:
- Opc = NVPTX::TEX_2D_I32_I32;
+ case NVPTXISD::Tex2DS32S32:
+ Opc = NVPTX::TEX_2D_S32_S32;
+ break;
+ case NVPTXISD::Tex2DS32Float:
+ Opc = NVPTX::TEX_2D_S32_F32;
+ break;
+ case NVPTXISD::Tex2DS32FloatLevel:
+ Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DS32FloatGrad:
+ Opc = NVPTX::TEX_2D_S32_F32_GRAD;
break;
- case NVPTXISD::Tex2DI32Float:
- Opc = NVPTX::TEX_2D_I32_F32;
+ case NVPTXISD::Tex2DU32S32:
+ Opc = NVPTX::TEX_2D_U32_S32;
break;
- case NVPTXISD::Tex2DI32FloatLevel:
- Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
+ case NVPTXISD::Tex2DU32Float:
+ Opc = NVPTX::TEX_2D_U32_F32;
break;
- case NVPTXISD::Tex2DI32FloatGrad:
- Opc = NVPTX::TEX_2D_I32_F32_GRAD;
+ case NVPTXISD::Tex2DU32FloatLevel:
+ Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
break;
- case NVPTXISD::Tex2DArrayFloatI32:
- Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
+ case NVPTXISD::Tex2DU32FloatGrad:
+ Opc = NVPTX::TEX_2D_U32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DArrayFloatS32:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
break;
case NVPTXISD::Tex2DArrayFloatFloat:
Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
@@ -2873,20 +3141,32 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
case NVPTXISD::Tex2DArrayFloatFloatGrad:
Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
break;
- case NVPTXISD::Tex2DArrayI32I32:
- Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
+ case NVPTXISD::Tex2DArrayS32S32:
+ Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
+ break;
+ case NVPTXISD::Tex2DArrayS32Float:
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
+ break;
+ case NVPTXISD::Tex2DArrayS32FloatLevel:
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
break;
- case NVPTXISD::Tex2DArrayI32Float:
- Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
+ case NVPTXISD::Tex2DArrayS32FloatGrad:
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
break;
- case NVPTXISD::Tex2DArrayI32FloatLevel:
- Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
+ case NVPTXISD::Tex2DArrayU32S32:
+ Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
break;
- case NVPTXISD::Tex2DArrayI32FloatGrad:
- Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
+ case NVPTXISD::Tex2DArrayU32Float:
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
break;
- case NVPTXISD::Tex3DFloatI32:
- Opc = NVPTX::TEX_3D_F32_I32;
+ case NVPTXISD::Tex2DArrayU32FloatLevel:
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DArrayU32FloatGrad:
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex3DFloatS32:
+ Opc = NVPTX::TEX_3D_F32_S32;
break;
case NVPTXISD::Tex3DFloatFloat:
Opc = NVPTX::TEX_3D_F32_F32;
@@ -2897,25 +3177,358 @@ SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
case NVPTXISD::Tex3DFloatFloatGrad:
Opc = NVPTX::TEX_3D_F32_F32_GRAD;
break;
- case NVPTXISD::Tex3DI32I32:
- Opc = NVPTX::TEX_3D_I32_I32;
+ case NVPTXISD::Tex3DS32S32:
+ Opc = NVPTX::TEX_3D_S32_S32;
+ break;
+ case NVPTXISD::Tex3DS32Float:
+ Opc = NVPTX::TEX_3D_S32_F32;
+ break;
+ case NVPTXISD::Tex3DS32FloatLevel:
+ Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex3DS32FloatGrad:
+ Opc = NVPTX::TEX_3D_S32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex3DU32S32:
+ Opc = NVPTX::TEX_3D_U32_S32;
+ break;
+ case NVPTXISD::Tex3DU32Float:
+ Opc = NVPTX::TEX_3D_U32_F32;
+ break;
+ case NVPTXISD::Tex3DU32FloatLevel:
+ Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex3DU32FloatGrad:
+ Opc = NVPTX::TEX_3D_U32_F32_GRAD;
+ break;
+ case NVPTXISD::TexCubeFloatFloat:
+ Opc = NVPTX::TEX_CUBE_F32_F32;
+ break;
+ case NVPTXISD::TexCubeFloatFloatLevel:
+ Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexCubeS32Float:
+ Opc = NVPTX::TEX_CUBE_S32_F32;
+ break;
+ case NVPTXISD::TexCubeS32FloatLevel:
+ Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexCubeU32Float:
+ Opc = NVPTX::TEX_CUBE_U32_F32;
+ break;
+ case NVPTXISD::TexCubeU32FloatLevel:
+ Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexCubeArrayFloatFloat:
+ Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::TexCubeArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexCubeArrayS32Float:
+ Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
+ break;
+ case NVPTXISD::TexCubeArrayS32FloatLevel:
+ Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexCubeArrayU32Float:
+ Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
+ break;
+ case NVPTXISD::TexCubeArrayU32FloatLevel:
+ Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tld4R2DFloatFloat:
+ Opc = NVPTX::TLD4_R_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4G2DFloatFloat:
+ Opc = NVPTX::TLD4_G_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4B2DFloatFloat:
+ Opc = NVPTX::TLD4_B_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4A2DFloatFloat:
+ Opc = NVPTX::TLD4_A_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4R2DS64Float:
+ Opc = NVPTX::TLD4_R_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4G2DS64Float:
+ Opc = NVPTX::TLD4_G_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4B2DS64Float:
+ Opc = NVPTX::TLD4_B_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4A2DS64Float:
+ Opc = NVPTX::TLD4_A_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4R2DU64Float:
+ Opc = NVPTX::TLD4_R_2D_U32_F32;
+ break;
+ case NVPTXISD::Tld4G2DU64Float:
+ Opc = NVPTX::TLD4_G_2D_U32_F32;
+ break;
+ case NVPTXISD::Tld4B2DU64Float:
+ Opc = NVPTX::TLD4_B_2D_U32_F32;
+ break;
+ case NVPTXISD::Tld4A2DU64Float:
+ Opc = NVPTX::TLD4_A_2D_U32_F32;
+ break;
+ case NVPTXISD::TexUnified1DFloatS32:
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
+ break;
+ case NVPTXISD::TexUnified1DFloatFloat:
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
+ break;
+ case NVPTXISD::TexUnified1DFloatFloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified1DFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified1DS32S32:
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
+ break;
+ case NVPTXISD::TexUnified1DS32Float:
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
+ break;
+ case NVPTXISD::TexUnified1DS32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified1DS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified1DU32S32:
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
+ break;
+ case NVPTXISD::TexUnified1DU32Float:
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
+ break;
+ case NVPTXISD::TexUnified1DU32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified1DU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified1DArrayFloatS32:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
+ break;
+ case NVPTXISD::TexUnified1DArrayFloatFloat:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified1DArrayS32S32:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
+ break;
+ case NVPTXISD::TexUnified1DArrayS32Float:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
+ break;
+ case NVPTXISD::TexUnified1DArrayS32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified1DArrayS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified1DArrayU32S32:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
+ break;
+ case NVPTXISD::TexUnified1DArrayU32Float:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
+ break;
+ case NVPTXISD::TexUnified1DArrayU32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified1DArrayU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified2DFloatS32:
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
+ break;
+ case NVPTXISD::TexUnified2DFloatFloat:
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
+ break;
+ case NVPTXISD::TexUnified2DFloatFloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified2DFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified2DS32S32:
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
+ break;
+ case NVPTXISD::TexUnified2DS32Float:
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
+ break;
+ case NVPTXISD::TexUnified2DS32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified2DS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified2DU32S32:
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
+ break;
+ case NVPTXISD::TexUnified2DU32Float:
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
+ break;
+ case NVPTXISD::TexUnified2DU32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified2DU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified2DArrayFloatS32:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
+ break;
+ case NVPTXISD::TexUnified2DArrayFloatFloat:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified2DArrayS32S32:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
+ break;
+ case NVPTXISD::TexUnified2DArrayS32Float:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
break;
- case NVPTXISD::Tex3DI32Float:
- Opc = NVPTX::TEX_3D_I32_F32;
+ case NVPTXISD::TexUnified2DArrayS32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
break;
- case NVPTXISD::Tex3DI32FloatLevel:
- Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
+ case NVPTXISD::TexUnified2DArrayS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
break;
- case NVPTXISD::Tex3DI32FloatGrad:
- Opc = NVPTX::TEX_3D_I32_F32_GRAD;
+ case NVPTXISD::TexUnified2DArrayU32S32:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
+ break;
+ case NVPTXISD::TexUnified2DArrayU32Float:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
+ break;
+ case NVPTXISD::TexUnified2DArrayU32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified2DArrayU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified3DFloatS32:
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
+ break;
+ case NVPTXISD::TexUnified3DFloatFloat:
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
+ break;
+ case NVPTXISD::TexUnified3DFloatFloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified3DFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified3DS32S32:
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
+ break;
+ case NVPTXISD::TexUnified3DS32Float:
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
+ break;
+ case NVPTXISD::TexUnified3DS32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified3DS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnified3DU32S32:
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
+ break;
+ case NVPTXISD::TexUnified3DU32Float:
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
+ break;
+ case NVPTXISD::TexUnified3DU32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnified3DU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
+ break;
+ case NVPTXISD::TexUnifiedCubeFloatFloat:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
+ break;
+ case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnifiedCubeS32Float:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
+ break;
+ case NVPTXISD::TexUnifiedCubeS32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnifiedCubeU32Float:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
+ break;
+ case NVPTXISD::TexUnifiedCubeU32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayS32Float:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayU32Float:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tld4UnifiedR2DFloatFloat:
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedG2DFloatFloat:
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedB2DFloatFloat:
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedA2DFloatFloat:
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedR2DS64Float:
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedG2DS64Float:
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedB2DS64Float:
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedA2DS64Float:
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedR2DU64Float:
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedG2DU64Float:
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedB2DU64Float:
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
+ break;
+ case NVPTXISD::Tld4UnifiedA2DU64Float:
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
break;
}
- Ops.push_back(TexRef);
- Ops.push_back(SampRef);
-
- // Copy over indices
- for (unsigned i = 3; i < N->getNumOperands(); ++i) {
+ // Copy over operands
+ for (unsigned i = 1; i < N->getNumOperands(); ++i) {
Ops.push_back(N->getOperand(i));
}
@@ -2932,6 +3545,402 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
SmallVector<SDValue, 8> Ops;
switch (N->getOpcode()) {
default: return nullptr;
+ case NVPTXISD::Suld1DI8Clamp:
+ Opc = NVPTX::SULD_1D_I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI16Clamp:
+ Opc = NVPTX::SULD_1D_I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI32Clamp:
+ Opc = NVPTX::SULD_1D_I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI64Clamp:
+ Opc = NVPTX::SULD_1D_I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I8Clamp:
+ Opc = NVPTX::SULD_1D_V2I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I16Clamp:
+ Opc = NVPTX::SULD_1D_V2I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I32Clamp:
+ Opc = NVPTX::SULD_1D_V2I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I64Clamp:
+ Opc = NVPTX::SULD_1D_V2I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I8Clamp:
+ Opc = NVPTX::SULD_1D_V4I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I16Clamp:
+ Opc = NVPTX::SULD_1D_V4I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I32Clamp:
+ Opc = NVPTX::SULD_1D_V4I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI8Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI16Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI32Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI64Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I8Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I16Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I32Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I64Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I8Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I16Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I32Clamp:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI8Clamp:
+ Opc = NVPTX::SULD_2D_I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI16Clamp:
+ Opc = NVPTX::SULD_2D_I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI32Clamp:
+ Opc = NVPTX::SULD_2D_I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI64Clamp:
+ Opc = NVPTX::SULD_2D_I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I8Clamp:
+ Opc = NVPTX::SULD_2D_V2I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I16Clamp:
+ Opc = NVPTX::SULD_2D_V2I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I32Clamp:
+ Opc = NVPTX::SULD_2D_V2I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I64Clamp:
+ Opc = NVPTX::SULD_2D_V2I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I8Clamp:
+ Opc = NVPTX::SULD_2D_V4I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I16Clamp:
+ Opc = NVPTX::SULD_2D_V4I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I32Clamp:
+ Opc = NVPTX::SULD_2D_V4I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI8Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI16Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI32Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI64Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I8Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I16Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I32Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I64Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I8Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I16Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I32Clamp:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI8Clamp:
+ Opc = NVPTX::SULD_3D_I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI16Clamp:
+ Opc = NVPTX::SULD_3D_I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI32Clamp:
+ Opc = NVPTX::SULD_3D_I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI64Clamp:
+ Opc = NVPTX::SULD_3D_I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I8Clamp:
+ Opc = NVPTX::SULD_3D_V2I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I16Clamp:
+ Opc = NVPTX::SULD_3D_V2I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I32Clamp:
+ Opc = NVPTX::SULD_3D_V2I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I64Clamp:
+ Opc = NVPTX::SULD_3D_V2I64_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I8Clamp:
+ Opc = NVPTX::SULD_3D_V4I8_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I16Clamp:
+ Opc = NVPTX::SULD_3D_V4I16_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I32Clamp:
+ Opc = NVPTX::SULD_3D_V4I32_CLAMP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld1DI8Trap:
Opc = NVPTX::SULD_1D_I8_TRAP;
Ops.push_back(TexHandle);
@@ -2950,6 +3959,12 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld1DI64Trap:
+ Opc = NVPTX::SULD_1D_I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld1DV2I8Trap:
Opc = NVPTX::SULD_1D_V2I8_TRAP;
Ops.push_back(TexHandle);
@@ -2968,6 +3983,12 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld1DV2I64Trap:
+ Opc = NVPTX::SULD_1D_V2I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld1DV4I8Trap:
Opc = NVPTX::SULD_1D_V4I8_TRAP;
Ops.push_back(TexHandle);
@@ -3007,6 +4028,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld1DArrayI64Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld1DArrayV2I8Trap:
Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
Ops.push_back(TexHandle);
@@ -3028,6 +4056,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld1DArrayV2I64Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld1DArrayV4I8Trap:
Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
Ops.push_back(TexHandle);
@@ -3070,6 +4105,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld2DI64Trap:
+ Opc = NVPTX::SULD_2D_I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld2DV2I8Trap:
Opc = NVPTX::SULD_2D_V2I8_TRAP;
Ops.push_back(TexHandle);
@@ -3091,6 +4133,13 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(3));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld2DV2I64Trap:
+ Opc = NVPTX::SULD_2D_V2I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld2DV4I8Trap:
Opc = NVPTX::SULD_2D_V4I8_TRAP;
Ops.push_back(TexHandle);
@@ -3136,6 +4185,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld2DArrayI64Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld2DArrayV2I8Trap:
Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
Ops.push_back(TexHandle);
@@ -3160,6 +4217,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld2DArrayV2I64Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld2DArrayV4I8Trap:
Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
Ops.push_back(TexHandle);
@@ -3208,6 +4273,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld3DI64Trap:
+ Opc = NVPTX::SULD_3D_I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld3DV2I8Trap:
Opc = NVPTX::SULD_3D_V2I8_TRAP;
Ops.push_back(TexHandle);
@@ -3232,6 +4305,14 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld3DV2I64Trap:
+ Opc = NVPTX::SULD_3D_V2I64_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
case NVPTXISD::Suld3DV4I8Trap:
Opc = NVPTX::SULD_3D_V4I8_TRAP;
Ops.push_back(TexHandle);
@@ -3256,11 +4337,408 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
Ops.push_back(N->getOperand(4));
Ops.push_back(Chain);
break;
+ case NVPTXISD::Suld1DI8Zero:
+ Opc = NVPTX::SULD_1D_I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI16Zero:
+ Opc = NVPTX::SULD_1D_I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI32Zero:
+ Opc = NVPTX::SULD_1D_I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI64Zero:
+ Opc = NVPTX::SULD_1D_I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I8Zero:
+ Opc = NVPTX::SULD_1D_V2I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I16Zero:
+ Opc = NVPTX::SULD_1D_V2I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I32Zero:
+ Opc = NVPTX::SULD_1D_V2I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I64Zero:
+ Opc = NVPTX::SULD_1D_V2I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I8Zero:
+ Opc = NVPTX::SULD_1D_V4I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I16Zero:
+ Opc = NVPTX::SULD_1D_V4I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I32Zero:
+ Opc = NVPTX::SULD_1D_V4I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI8Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI16Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI32Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI64Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I8Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I16Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I32Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I64Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I8Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I16Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I32Zero:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI8Zero:
+ Opc = NVPTX::SULD_2D_I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI16Zero:
+ Opc = NVPTX::SULD_2D_I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI32Zero:
+ Opc = NVPTX::SULD_2D_I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI64Zero:
+ Opc = NVPTX::SULD_2D_I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I8Zero:
+ Opc = NVPTX::SULD_2D_V2I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I16Zero:
+ Opc = NVPTX::SULD_2D_V2I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I32Zero:
+ Opc = NVPTX::SULD_2D_V2I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I64Zero:
+ Opc = NVPTX::SULD_2D_V2I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I8Zero:
+ Opc = NVPTX::SULD_2D_V4I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I16Zero:
+ Opc = NVPTX::SULD_2D_V4I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I32Zero:
+ Opc = NVPTX::SULD_2D_V4I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI8Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI16Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI32Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI64Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I8Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I16Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I32Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I64Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I8Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I16Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I32Zero:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI8Zero:
+ Opc = NVPTX::SULD_3D_I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI16Zero:
+ Opc = NVPTX::SULD_3D_I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI32Zero:
+ Opc = NVPTX::SULD_3D_I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI64Zero:
+ Opc = NVPTX::SULD_3D_I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I8Zero:
+ Opc = NVPTX::SULD_3D_V2I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I16Zero:
+ Opc = NVPTX::SULD_3D_V2I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I32Zero:
+ Opc = NVPTX::SULD_3D_V2I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I64Zero:
+ Opc = NVPTX::SULD_3D_V2I64_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I8Zero:
+ Opc = NVPTX::SULD_3D_V4I8_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I16Zero:
+ Opc = NVPTX::SULD_3D_V4I16_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I32Zero:
+ Opc = NVPTX::SULD_3D_V4I32_ZERO;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
}
Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
return Ret;
}
+
/// SelectBFE - Look for instruction sequences that can be made more efficient
/// by using the 'bfe' (bit-field extract) PTX instruction
SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
@@ -3563,17 +5041,10 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
unsigned int spN) const {
const Value *Src = nullptr;
- // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
- // the classof() for MemSDNode does not include MemIntrinsicSDNode
- // (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
if (spN == 0 && mN->getMemOperand()->getPseudoValue())
return true;
Src = mN->getMemOperand()->getValue();
- } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
- if (spN == 0 && mN->getMemOperand()->getPseudoValue())
- return true;
- Src = mN->getMemOperand()->getValue();
}
if (!Src)
return false;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index c44ccb2..69afcd7 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
+
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
@@ -24,20 +27,13 @@ namespace {
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
- // If true, generate corresponding FPCONTRACT. This is
- // language dependent (i.e. CUDA and OpenCL works differently).
- bool doFMAF64;
- bool doFMAF32;
- bool doFMAF64AGG;
- bool doFMAF32AGG;
- bool allowFMA;
-
// If true, generate mul.wide from sext and mul
bool doMulWide;
int getDivF32Level() const;
bool usePrecSqrtF32() const;
bool useF32FTZ() const;
+ bool allowFMA() const;
public:
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
@@ -99,3 +95,5 @@ private:
};
}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index cb452ff..0b0b536 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -48,6 +48,12 @@ static cl::opt<bool> sched4reg(
"nvptx-sched4reg",
cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
+static cl::opt<unsigned>
+FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
+
static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
default:
@@ -100,8 +106,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
}
// NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
- : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
+ : TargetLowering(TM), nvTM(&TM),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
// always lower memset, memcpy, and memmove intrinsics to load/store
@@ -197,8 +203,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
// Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
// Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PTX does not support load / store predicate registers
@@ -360,73 +369,379 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::MUL_WIDE_SIGNED";
case NVPTXISD::MUL_WIDE_UNSIGNED:
return "NVPTXISD::MUL_WIDE_UNSIGNED";
- case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
+ case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32";
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
case NVPTXISD::Tex1DFloatFloatLevel:
return "NVPTXISD::Tex1DFloatFloatLevel";
case NVPTXISD::Tex1DFloatFloatGrad:
return "NVPTXISD::Tex1DFloatFloatGrad";
- case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32";
- case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float";
- case NVPTXISD::Tex1DI32FloatLevel:
- return "NVPTXISD::Tex1DI32FloatLevel";
- case NVPTXISD::Tex1DI32FloatGrad:
- return "NVPTXISD::Tex1DI32FloatGrad";
- case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
- case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
+ case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32";
+ case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float";
+ case NVPTXISD::Tex1DS32FloatLevel:
+ return "NVPTXISD::Tex1DS32FloatLevel";
+ case NVPTXISD::Tex1DS32FloatGrad:
+ return "NVPTXISD::Tex1DS32FloatGrad";
+ case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32";
+ case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float";
+ case NVPTXISD::Tex1DU32FloatLevel:
+ return "NVPTXISD::Tex1DU32FloatLevel";
+ case NVPTXISD::Tex1DU32FloatGrad:
+ return "NVPTXISD::Tex1DU32FloatGrad";
+ case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32";
+ case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
case NVPTXISD::Tex1DArrayFloatFloatLevel:
- return "NVPTXISD::Tex2DArrayFloatFloatLevel";
+ return "NVPTXISD::Tex1DArrayFloatFloatLevel";
case NVPTXISD::Tex1DArrayFloatFloatGrad:
- return "NVPTXISD::Tex2DArrayFloatFloatGrad";
- case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
- case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
- case NVPTXISD::Tex1DArrayI32FloatLevel:
- return "NVPTXISD::Tex2DArrayI32FloatLevel";
- case NVPTXISD::Tex1DArrayI32FloatGrad:
- return "NVPTXISD::Tex2DArrayI32FloatGrad";
- case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32";
+ return "NVPTXISD::Tex1DArrayFloatFloatGrad";
+ case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32";
+ case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float";
+ case NVPTXISD::Tex1DArrayS32FloatLevel:
+ return "NVPTXISD::Tex1DArrayS32FloatLevel";
+ case NVPTXISD::Tex1DArrayS32FloatGrad:
+ return "NVPTXISD::Tex1DArrayS32FloatGrad";
+ case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32";
+ case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float";
+ case NVPTXISD::Tex1DArrayU32FloatLevel:
+ return "NVPTXISD::Tex1DArrayU32FloatLevel";
+ case NVPTXISD::Tex1DArrayU32FloatGrad:
+ return "NVPTXISD::Tex1DArrayU32FloatGrad";
+ case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32";
case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
case NVPTXISD::Tex2DFloatFloatLevel:
return "NVPTXISD::Tex2DFloatFloatLevel";
case NVPTXISD::Tex2DFloatFloatGrad:
return "NVPTXISD::Tex2DFloatFloatGrad";
- case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32";
- case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float";
- case NVPTXISD::Tex2DI32FloatLevel:
- return "NVPTXISD::Tex2DI32FloatLevel";
- case NVPTXISD::Tex2DI32FloatGrad:
- return "NVPTXISD::Tex2DI32FloatGrad";
- case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
+ case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32";
+ case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float";
+ case NVPTXISD::Tex2DS32FloatLevel:
+ return "NVPTXISD::Tex2DS32FloatLevel";
+ case NVPTXISD::Tex2DS32FloatGrad:
+ return "NVPTXISD::Tex2DS32FloatGrad";
+ case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32";
+ case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float";
+ case NVPTXISD::Tex2DU32FloatLevel:
+ return "NVPTXISD::Tex2DU32FloatLevel";
+ case NVPTXISD::Tex2DU32FloatGrad:
+ return "NVPTXISD::Tex2DU32FloatGrad";
+ case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32";
case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
case NVPTXISD::Tex2DArrayFloatFloatLevel:
return "NVPTXISD::Tex2DArrayFloatFloatLevel";
case NVPTXISD::Tex2DArrayFloatFloatGrad:
return "NVPTXISD::Tex2DArrayFloatFloatGrad";
- case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
- case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
- case NVPTXISD::Tex2DArrayI32FloatLevel:
- return "NVPTXISD::Tex2DArrayI32FloatLevel";
- case NVPTXISD::Tex2DArrayI32FloatGrad:
- return "NVPTXISD::Tex2DArrayI32FloatGrad";
- case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32";
+ case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32";
+ case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float";
+ case NVPTXISD::Tex2DArrayS32FloatLevel:
+ return "NVPTXISD::Tex2DArrayS32FloatLevel";
+ case NVPTXISD::Tex2DArrayS32FloatGrad:
+ return "NVPTXISD::Tex2DArrayS32FloatGrad";
+ case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32";
+ case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float";
+ case NVPTXISD::Tex2DArrayU32FloatLevel:
+ return "NVPTXISD::Tex2DArrayU32FloatLevel";
+ case NVPTXISD::Tex2DArrayU32FloatGrad:
+ return "NVPTXISD::Tex2DArrayU32FloatGrad";
+ case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32";
case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
case NVPTXISD::Tex3DFloatFloatLevel:
return "NVPTXISD::Tex3DFloatFloatLevel";
case NVPTXISD::Tex3DFloatFloatGrad:
return "NVPTXISD::Tex3DFloatFloatGrad";
- case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32";
- case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float";
- case NVPTXISD::Tex3DI32FloatLevel:
- return "NVPTXISD::Tex3DI32FloatLevel";
- case NVPTXISD::Tex3DI32FloatGrad:
- return "NVPTXISD::Tex3DI32FloatGrad";
+ case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32";
+ case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float";
+ case NVPTXISD::Tex3DS32FloatLevel:
+ return "NVPTXISD::Tex3DS32FloatLevel";
+ case NVPTXISD::Tex3DS32FloatGrad:
+ return "NVPTXISD::Tex3DS32FloatGrad";
+ case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32";
+ case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float";
+ case NVPTXISD::Tex3DU32FloatLevel:
+ return "NVPTXISD::Tex3DU32FloatLevel";
+ case NVPTXISD::Tex3DU32FloatGrad:
+ return "NVPTXISD::Tex3DU32FloatGrad";
+ case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat";
+ case NVPTXISD::TexCubeFloatFloatLevel:
+ return "NVPTXISD::TexCubeFloatFloatLevel";
+ case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float";
+ case NVPTXISD::TexCubeS32FloatLevel:
+ return "NVPTXISD::TexCubeS32FloatLevel";
+ case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float";
+ case NVPTXISD::TexCubeU32FloatLevel:
+ return "NVPTXISD::TexCubeU32FloatLevel";
+ case NVPTXISD::TexCubeArrayFloatFloat:
+ return "NVPTXISD::TexCubeArrayFloatFloat";
+ case NVPTXISD::TexCubeArrayFloatFloatLevel:
+ return "NVPTXISD::TexCubeArrayFloatFloatLevel";
+ case NVPTXISD::TexCubeArrayS32Float:
+ return "NVPTXISD::TexCubeArrayS32Float";
+ case NVPTXISD::TexCubeArrayS32FloatLevel:
+ return "NVPTXISD::TexCubeArrayS32FloatLevel";
+ case NVPTXISD::TexCubeArrayU32Float:
+ return "NVPTXISD::TexCubeArrayU32Float";
+ case NVPTXISD::TexCubeArrayU32FloatLevel:
+ return "NVPTXISD::TexCubeArrayU32FloatLevel";
+ case NVPTXISD::Tld4R2DFloatFloat:
+ return "NVPTXISD::Tld4R2DFloatFloat";
+ case NVPTXISD::Tld4G2DFloatFloat:
+ return "NVPTXISD::Tld4G2DFloatFloat";
+ case NVPTXISD::Tld4B2DFloatFloat:
+ return "NVPTXISD::Tld4B2DFloatFloat";
+ case NVPTXISD::Tld4A2DFloatFloat:
+ return "NVPTXISD::Tld4A2DFloatFloat";
+ case NVPTXISD::Tld4R2DS64Float:
+ return "NVPTXISD::Tld4R2DS64Float";
+ case NVPTXISD::Tld4G2DS64Float:
+ return "NVPTXISD::Tld4G2DS64Float";
+ case NVPTXISD::Tld4B2DS64Float:
+ return "NVPTXISD::Tld4B2DS64Float";
+ case NVPTXISD::Tld4A2DS64Float:
+ return "NVPTXISD::Tld4A2DS64Float";
+ case NVPTXISD::Tld4R2DU64Float:
+ return "NVPTXISD::Tld4R2DU64Float";
+ case NVPTXISD::Tld4G2DU64Float:
+ return "NVPTXISD::Tld4G2DU64Float";
+ case NVPTXISD::Tld4B2DU64Float:
+ return "NVPTXISD::Tld4B2DU64Float";
+ case NVPTXISD::Tld4A2DU64Float:
+ return "NVPTXISD::Tld4A2DU64Float";
+
+ case NVPTXISD::TexUnified1DFloatS32:
+ return "NVPTXISD::TexUnified1DFloatS32";
+ case NVPTXISD::TexUnified1DFloatFloat:
+ return "NVPTXISD::TexUnified1DFloatFloat";
+ case NVPTXISD::TexUnified1DFloatFloatLevel:
+ return "NVPTXISD::TexUnified1DFloatFloatLevel";
+ case NVPTXISD::TexUnified1DFloatFloatGrad:
+ return "NVPTXISD::TexUnified1DFloatFloatGrad";
+ case NVPTXISD::TexUnified1DS32S32:
+ return "NVPTXISD::TexUnified1DS32S32";
+ case NVPTXISD::TexUnified1DS32Float:
+ return "NVPTXISD::TexUnified1DS32Float";
+ case NVPTXISD::TexUnified1DS32FloatLevel:
+ return "NVPTXISD::TexUnified1DS32FloatLevel";
+ case NVPTXISD::TexUnified1DS32FloatGrad:
+ return "NVPTXISD::TexUnified1DS32FloatGrad";
+ case NVPTXISD::TexUnified1DU32S32:
+ return "NVPTXISD::TexUnified1DU32S32";
+ case NVPTXISD::TexUnified1DU32Float:
+ return "NVPTXISD::TexUnified1DU32Float";
+ case NVPTXISD::TexUnified1DU32FloatLevel:
+ return "NVPTXISD::TexUnified1DU32FloatLevel";
+ case NVPTXISD::TexUnified1DU32FloatGrad:
+ return "NVPTXISD::TexUnified1DU32FloatGrad";
+ case NVPTXISD::TexUnified1DArrayFloatS32:
+ return "NVPTXISD::TexUnified1DArrayFloatS32";
+ case NVPTXISD::TexUnified1DArrayFloatFloat:
+ return "NVPTXISD::TexUnified1DArrayFloatFloat";
+ case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
+ return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
+ case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
+ return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
+ case NVPTXISD::TexUnified1DArrayS32S32:
+ return "NVPTXISD::TexUnified1DArrayS32S32";
+ case NVPTXISD::TexUnified1DArrayS32Float:
+ return "NVPTXISD::TexUnified1DArrayS32Float";
+ case NVPTXISD::TexUnified1DArrayS32FloatLevel:
+ return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
+ case NVPTXISD::TexUnified1DArrayS32FloatGrad:
+ return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
+ case NVPTXISD::TexUnified1DArrayU32S32:
+ return "NVPTXISD::TexUnified1DArrayU32S32";
+ case NVPTXISD::TexUnified1DArrayU32Float:
+ return "NVPTXISD::TexUnified1DArrayU32Float";
+ case NVPTXISD::TexUnified1DArrayU32FloatLevel:
+ return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
+ case NVPTXISD::TexUnified1DArrayU32FloatGrad:
+ return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
+ case NVPTXISD::TexUnified2DFloatS32:
+ return "NVPTXISD::TexUnified2DFloatS32";
+ case NVPTXISD::TexUnified2DFloatFloat:
+ return "NVPTXISD::TexUnified2DFloatFloat";
+ case NVPTXISD::TexUnified2DFloatFloatLevel:
+ return "NVPTXISD::TexUnified2DFloatFloatLevel";
+ case NVPTXISD::TexUnified2DFloatFloatGrad:
+ return "NVPTXISD::TexUnified2DFloatFloatGrad";
+ case NVPTXISD::TexUnified2DS32S32:
+ return "NVPTXISD::TexUnified2DS32S32";
+ case NVPTXISD::TexUnified2DS32Float:
+ return "NVPTXISD::TexUnified2DS32Float";
+ case NVPTXISD::TexUnified2DS32FloatLevel:
+ return "NVPTXISD::TexUnified2DS32FloatLevel";
+ case NVPTXISD::TexUnified2DS32FloatGrad:
+ return "NVPTXISD::TexUnified2DS32FloatGrad";
+ case NVPTXISD::TexUnified2DU32S32:
+ return "NVPTXISD::TexUnified2DU32S32";
+ case NVPTXISD::TexUnified2DU32Float:
+ return "NVPTXISD::TexUnified2DU32Float";
+ case NVPTXISD::TexUnified2DU32FloatLevel:
+ return "NVPTXISD::TexUnified2DU32FloatLevel";
+ case NVPTXISD::TexUnified2DU32FloatGrad:
+ return "NVPTXISD::TexUnified2DU32FloatGrad";
+ case NVPTXISD::TexUnified2DArrayFloatS32:
+ return "NVPTXISD::TexUnified2DArrayFloatS32";
+ case NVPTXISD::TexUnified2DArrayFloatFloat:
+ return "NVPTXISD::TexUnified2DArrayFloatFloat";
+ case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
+ return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
+ case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
+ return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
+ case NVPTXISD::TexUnified2DArrayS32S32:
+ return "NVPTXISD::TexUnified2DArrayS32S32";
+ case NVPTXISD::TexUnified2DArrayS32Float:
+ return "NVPTXISD::TexUnified2DArrayS32Float";
+ case NVPTXISD::TexUnified2DArrayS32FloatLevel:
+ return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
+ case NVPTXISD::TexUnified2DArrayS32FloatGrad:
+ return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
+ case NVPTXISD::TexUnified2DArrayU32S32:
+ return "NVPTXISD::TexUnified2DArrayU32S32";
+ case NVPTXISD::TexUnified2DArrayU32Float:
+ return "NVPTXISD::TexUnified2DArrayU32Float";
+ case NVPTXISD::TexUnified2DArrayU32FloatLevel:
+ return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
+ case NVPTXISD::TexUnified2DArrayU32FloatGrad:
+ return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
+ case NVPTXISD::TexUnified3DFloatS32:
+ return "NVPTXISD::TexUnified3DFloatS32";
+ case NVPTXISD::TexUnified3DFloatFloat:
+ return "NVPTXISD::TexUnified3DFloatFloat";
+ case NVPTXISD::TexUnified3DFloatFloatLevel:
+ return "NVPTXISD::TexUnified3DFloatFloatLevel";
+ case NVPTXISD::TexUnified3DFloatFloatGrad:
+ return "NVPTXISD::TexUnified3DFloatFloatGrad";
+ case NVPTXISD::TexUnified3DS32S32:
+ return "NVPTXISD::TexUnified3DS32S32";
+ case NVPTXISD::TexUnified3DS32Float:
+ return "NVPTXISD::TexUnified3DS32Float";
+ case NVPTXISD::TexUnified3DS32FloatLevel:
+ return "NVPTXISD::TexUnified3DS32FloatLevel";
+ case NVPTXISD::TexUnified3DS32FloatGrad:
+ return "NVPTXISD::TexUnified3DS32FloatGrad";
+ case NVPTXISD::TexUnified3DU32S32:
+ return "NVPTXISD::TexUnified3DU32S32";
+ case NVPTXISD::TexUnified3DU32Float:
+ return "NVPTXISD::TexUnified3DU32Float";
+ case NVPTXISD::TexUnified3DU32FloatLevel:
+ return "NVPTXISD::TexUnified3DU32FloatLevel";
+ case NVPTXISD::TexUnified3DU32FloatGrad:
+ return "NVPTXISD::TexUnified3DU32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeFloatFloat:
+ return "NVPTXISD::TexUnifiedCubeFloatFloat";
+ case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
+ return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
+ case NVPTXISD::TexUnifiedCubeS32Float:
+ return "NVPTXISD::TexUnifiedCubeS32Float";
+ case NVPTXISD::TexUnifiedCubeS32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeU32Float:
+ return "NVPTXISD::TexUnifiedCubeU32Float";
+ case NVPTXISD::TexUnifiedCubeU32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
+ return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
+ return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
+ case NVPTXISD::TexUnifiedCubeArrayS32Float:
+ return "NVPTXISD::TexUnifiedCubeArrayS32Float";
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeArrayU32Float:
+ return "NVPTXISD::TexUnifiedCubeArrayU32Float";
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
+ return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
+ case NVPTXISD::Tld4UnifiedR2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedG2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedB2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedA2DFloatFloat:
+ return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
+ case NVPTXISD::Tld4UnifiedR2DS64Float:
+ return "NVPTXISD::Tld4UnifiedR2DS64Float";
+ case NVPTXISD::Tld4UnifiedG2DS64Float:
+ return "NVPTXISD::Tld4UnifiedG2DS64Float";
+ case NVPTXISD::Tld4UnifiedB2DS64Float:
+ return "NVPTXISD::Tld4UnifiedB2DS64Float";
+ case NVPTXISD::Tld4UnifiedA2DS64Float:
+ return "NVPTXISD::Tld4UnifiedA2DS64Float";
+ case NVPTXISD::Tld4UnifiedR2DU64Float:
+ return "NVPTXISD::Tld4UnifiedR2DU64Float";
+ case NVPTXISD::Tld4UnifiedG2DU64Float:
+ return "NVPTXISD::Tld4UnifiedG2DU64Float";
+ case NVPTXISD::Tld4UnifiedB2DU64Float:
+ return "NVPTXISD::Tld4UnifiedB2DU64Float";
+ case NVPTXISD::Tld4UnifiedA2DU64Float:
+ return "NVPTXISD::Tld4UnifiedA2DU64Float";
+
+ case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp";
+ case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp";
+ case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp";
+ case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp";
+ case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp";
+ case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp";
+ case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp";
+ case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp";
+ case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp";
+ case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp";
+ case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp";
+
+ case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp";
+ case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp";
+ case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp";
+ case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp";
+ case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
+ case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
+ case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
+ case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
+ case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
+ case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
+ case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
+
+ case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp";
+ case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp";
+ case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp";
+ case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp";
+ case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp";
+ case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp";
+ case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp";
+ case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp";
+ case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp";
+ case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp";
+ case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp";
+
+ case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp";
+ case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp";
+ case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp";
+ case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp";
+ case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
+ case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
+ case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
+ case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
+ case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
+ case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
+ case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
+
+ case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp";
+ case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp";
+ case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp";
+ case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp";
+ case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp";
+ case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp";
+ case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp";
+ case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp";
+ case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp";
+ case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp";
+ case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp";
case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
+ case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap";
case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
+ case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap";
case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
@@ -434,9 +749,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
+ case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap";
case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
+ case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap";
case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
@@ -444,9 +761,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
+ case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap";
case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
+ case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap";
case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
@@ -454,9 +773,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
+ case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap";
case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
+ case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap";
case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
@@ -464,12 +785,74 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
+ case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap";
case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
+ case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap";
case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
+
+ case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero";
+ case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero";
+ case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero";
+ case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero";
+ case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero";
+ case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero";
+ case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero";
+ case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero";
+ case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero";
+ case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero";
+ case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero";
+
+ case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero";
+ case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero";
+ case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero";
+ case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero";
+ case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero";
+ case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero";
+ case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero";
+ case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero";
+ case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero";
+ case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero";
+ case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero";
+
+ case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero";
+ case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero";
+ case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero";
+ case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero";
+ case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero";
+ case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero";
+ case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero";
+ case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero";
+ case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero";
+ case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero";
+ case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero";
+
+ case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero";
+ case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero";
+ case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero";
+ case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero";
+ case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero";
+ case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero";
+ case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero";
+ case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero";
+ case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero";
+ case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero";
+ case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero";
+
+ case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero";
+ case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero";
+ case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero";
+ case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero";
+ case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero";
+ case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero";
+ case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero";
+ case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero";
+ case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero";
+ case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
+ case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
}
}
@@ -972,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
- if (retTy->isSingleValueType()) {
+ // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
+ // these three types to match the logic in
+ // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
+ // Plus, this behavior is consistent with nvcc's.
+ if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
+ retTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
@@ -1068,8 +1456,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
- assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(),
- ObjectVT) == NumElts &&
+ assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
+ F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
bool needTruncate = sz < 8 ? true : false;
@@ -1494,6 +1882,21 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
break;
}
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ const DataLayout *TD = getDataLayout();
+
+ unsigned Align = MemSD->getAlignment();
+ unsigned PrefAlign =
+ TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
+ if (Align < PrefAlign) {
+ // This store is not sufficiently aligned, so bail out and let this vector
+ // store be scalarized. Note that we may still be able to emit smaller
+ // vector stores. For example, if we are storing a <4 x float> with an
+ // alignment of 8, this check will fail but the legalizer will try again
+ // with 2 x <2 x float>, which will succeed with an alignment of 8.
+ return SDValue();
+ }
+
unsigned Opcode = 0;
EVT EltVT = ValVT.getVectorElementType();
unsigned NumElts = ValVT.getVectorNumElements();
@@ -1536,8 +1939,6 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
Ops.push_back(N->getOperand(i));
}
- MemSDNode *MemSD = cast<MemSDNode>(N);
-
SDValue NewSt = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other), Ops,
MemSD->getMemoryVT(), MemSD->getMemOperand());
@@ -1632,7 +2033,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = DAG.getTarget().getTargetLowering();
+ const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
@@ -1746,7 +2147,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
MachinePointerInfo(srcValue), partVT, false,
- false, partAlign);
+ false, false, partAlign);
} else {
p = DAG.getLoad(partVT, dl, Root, srcAddr,
MachinePointerInfo(srcValue), false, false, false,
@@ -1767,7 +2168,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
unsigned NumElts = ObjectVT.getVectorNumElements();
assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
- unsigned Ofst = 0;
EVT EltVT = ObjectVT.getVectorElementType();
// V1 load
@@ -1776,10 +2176,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// We only have one element, so just directly load it
Value *SrcValue = Constant::getNullValue(PointerType::get(
EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
if (P.getNode())
@@ -1788,7 +2186,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
InVals.push_back(P);
- Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext()));
++InsIdx;
} else if (NumElts == 2) {
// V2 load
@@ -1796,10 +2193,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
Value *SrcValue = Constant::getNullValue(PointerType::get(
VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
@@ -1817,7 +2212,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
InVals.push_back(Elt0);
InVals.push_back(Elt1);
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
InsIdx += 2;
} else {
// V4 loads
@@ -1835,6 +2229,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
VecSize = 2;
}
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
+ unsigned Ofst = 0;
for (unsigned i = 0; i < NumElts; i += VecSize) {
Value *SrcValue = Constant::getNullValue(
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
@@ -1879,6 +2274,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
MachinePointerInfo(srcValue), ObjectVT, false, false,
+ false,
TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
} else {
p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
@@ -2132,90 +2528,357 @@ static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
default:
return 0;
- case Intrinsic::nvvm_tex_1d_v4f32_i32:
- return NVPTXISD::Tex1DFloatI32;
+ case Intrinsic::nvvm_tex_1d_v4f32_s32:
+ return NVPTXISD::Tex1DFloatS32;
case Intrinsic::nvvm_tex_1d_v4f32_f32:
return NVPTXISD::Tex1DFloatFloat;
case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
return NVPTXISD::Tex1DFloatFloatLevel;
case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
return NVPTXISD::Tex1DFloatFloatGrad;
- case Intrinsic::nvvm_tex_1d_v4i32_i32:
- return NVPTXISD::Tex1DI32I32;
- case Intrinsic::nvvm_tex_1d_v4i32_f32:
- return NVPTXISD::Tex1DI32Float;
- case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
- return NVPTXISD::Tex1DI32FloatLevel;
- case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
- return NVPTXISD::Tex1DI32FloatGrad;
-
- case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
- return NVPTXISD::Tex1DArrayFloatI32;
+ case Intrinsic::nvvm_tex_1d_v4s32_s32:
+ return NVPTXISD::Tex1DS32S32;
+ case Intrinsic::nvvm_tex_1d_v4s32_f32:
+ return NVPTXISD::Tex1DS32Float;
+ case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
+ return NVPTXISD::Tex1DS32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
+ return NVPTXISD::Tex1DS32FloatGrad;
+ case Intrinsic::nvvm_tex_1d_v4u32_s32:
+ return NVPTXISD::Tex1DU32S32;
+ case Intrinsic::nvvm_tex_1d_v4u32_f32:
+ return NVPTXISD::Tex1DU32Float;
+ case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
+ return NVPTXISD::Tex1DU32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
+ return NVPTXISD::Tex1DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
+ return NVPTXISD::Tex1DArrayFloatS32;
case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloat;
case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloatLevel;
case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
return NVPTXISD::Tex1DArrayFloatFloatGrad;
- case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
- return NVPTXISD::Tex1DArrayI32I32;
- case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
- return NVPTXISD::Tex1DArrayI32Float;
- case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
- return NVPTXISD::Tex1DArrayI32FloatLevel;
- case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
- return NVPTXISD::Tex1DArrayI32FloatGrad;
-
- case Intrinsic::nvvm_tex_2d_v4f32_i32:
- return NVPTXISD::Tex2DFloatI32;
+ case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
+ return NVPTXISD::Tex1DArrayS32S32;
+ case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
+ return NVPTXISD::Tex1DArrayS32Float;
+ case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
+ return NVPTXISD::Tex1DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
+ return NVPTXISD::Tex1DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
+ return NVPTXISD::Tex1DArrayU32S32;
+ case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
+ return NVPTXISD::Tex1DArrayU32Float;
+ case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
+ return NVPTXISD::Tex1DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
+ return NVPTXISD::Tex1DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_v4f32_s32:
+ return NVPTXISD::Tex2DFloatS32;
case Intrinsic::nvvm_tex_2d_v4f32_f32:
return NVPTXISD::Tex2DFloatFloat;
case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
return NVPTXISD::Tex2DFloatFloatLevel;
case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
return NVPTXISD::Tex2DFloatFloatGrad;
- case Intrinsic::nvvm_tex_2d_v4i32_i32:
- return NVPTXISD::Tex2DI32I32;
- case Intrinsic::nvvm_tex_2d_v4i32_f32:
- return NVPTXISD::Tex2DI32Float;
- case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
- return NVPTXISD::Tex2DI32FloatLevel;
- case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
- return NVPTXISD::Tex2DI32FloatGrad;
-
- case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
- return NVPTXISD::Tex2DArrayFloatI32;
+ case Intrinsic::nvvm_tex_2d_v4s32_s32:
+ return NVPTXISD::Tex2DS32S32;
+ case Intrinsic::nvvm_tex_2d_v4s32_f32:
+ return NVPTXISD::Tex2DS32Float;
+ case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
+ return NVPTXISD::Tex2DS32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
+ return NVPTXISD::Tex2DS32FloatGrad;
+ case Intrinsic::nvvm_tex_2d_v4u32_s32:
+ return NVPTXISD::Tex2DU32S32;
+ case Intrinsic::nvvm_tex_2d_v4u32_f32:
+ return NVPTXISD::Tex2DU32Float;
+ case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
+ return NVPTXISD::Tex2DU32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
+ return NVPTXISD::Tex2DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
+ return NVPTXISD::Tex2DArrayFloatS32;
case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloat;
case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloatLevel;
case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
return NVPTXISD::Tex2DArrayFloatFloatGrad;
- case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
- return NVPTXISD::Tex2DArrayI32I32;
- case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
- return NVPTXISD::Tex2DArrayI32Float;
- case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
- return NVPTXISD::Tex2DArrayI32FloatLevel;
- case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
- return NVPTXISD::Tex2DArrayI32FloatGrad;
-
- case Intrinsic::nvvm_tex_3d_v4f32_i32:
- return NVPTXISD::Tex3DFloatI32;
+ case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
+ return NVPTXISD::Tex2DArrayS32S32;
+ case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
+ return NVPTXISD::Tex2DArrayS32Float;
+ case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
+ return NVPTXISD::Tex2DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
+ return NVPTXISD::Tex2DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
+ return NVPTXISD::Tex2DArrayU32S32;
+ case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
+ return NVPTXISD::Tex2DArrayU32Float;
+ case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
+ return NVPTXISD::Tex2DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
+ return NVPTXISD::Tex2DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_3d_v4f32_s32:
+ return NVPTXISD::Tex3DFloatS32;
case Intrinsic::nvvm_tex_3d_v4f32_f32:
return NVPTXISD::Tex3DFloatFloat;
case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
return NVPTXISD::Tex3DFloatFloatLevel;
case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
return NVPTXISD::Tex3DFloatFloatGrad;
- case Intrinsic::nvvm_tex_3d_v4i32_i32:
- return NVPTXISD::Tex3DI32I32;
- case Intrinsic::nvvm_tex_3d_v4i32_f32:
- return NVPTXISD::Tex3DI32Float;
- case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
- return NVPTXISD::Tex3DI32FloatLevel;
- case Intrinsic::nvvm_tex_3d_grad_v4i32_f32:
- return NVPTXISD::Tex3DI32FloatGrad;
+ case Intrinsic::nvvm_tex_3d_v4s32_s32:
+ return NVPTXISD::Tex3DS32S32;
+ case Intrinsic::nvvm_tex_3d_v4s32_f32:
+ return NVPTXISD::Tex3DS32Float;
+ case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
+ return NVPTXISD::Tex3DS32FloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
+ return NVPTXISD::Tex3DS32FloatGrad;
+ case Intrinsic::nvvm_tex_3d_v4u32_s32:
+ return NVPTXISD::Tex3DU32S32;
+ case Intrinsic::nvvm_tex_3d_v4u32_f32:
+ return NVPTXISD::Tex3DU32Float;
+ case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
+ return NVPTXISD::Tex3DU32FloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
+ return NVPTXISD::Tex3DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_cube_v4f32_f32:
+ return NVPTXISD::TexCubeFloatFloat;
+ case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
+ return NVPTXISD::TexCubeFloatFloatLevel;
+ case Intrinsic::nvvm_tex_cube_v4s32_f32:
+ return NVPTXISD::TexCubeS32Float;
+ case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
+ return NVPTXISD::TexCubeS32FloatLevel;
+ case Intrinsic::nvvm_tex_cube_v4u32_f32:
+ return NVPTXISD::TexCubeU32Float;
+ case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
+ return NVPTXISD::TexCubeU32FloatLevel;
+
+ case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
+ return NVPTXISD::TexCubeArrayFloatFloat;
+ case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
+ return NVPTXISD::TexCubeArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
+ return NVPTXISD::TexCubeArrayS32Float;
+ case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
+ return NVPTXISD::TexCubeArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
+ return NVPTXISD::TexCubeArrayU32Float;
+ case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
+ return NVPTXISD::TexCubeArrayU32FloatLevel;
+
+ case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
+ return NVPTXISD::Tld4R2DFloatFloat;
+ case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
+ return NVPTXISD::Tld4G2DFloatFloat;
+ case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
+ return NVPTXISD::Tld4B2DFloatFloat;
+ case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
+ return NVPTXISD::Tld4A2DFloatFloat;
+ case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
+ return NVPTXISD::Tld4R2DS64Float;
+ case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
+ return NVPTXISD::Tld4G2DS64Float;
+ case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
+ return NVPTXISD::Tld4B2DS64Float;
+ case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
+ return NVPTXISD::Tld4A2DS64Float;
+ case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
+ return NVPTXISD::Tld4R2DU64Float;
+ case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
+ return NVPTXISD::Tld4G2DU64Float;
+ case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
+ return NVPTXISD::Tld4B2DU64Float;
+ case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
+ return NVPTXISD::Tld4A2DU64Float;
+
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
+ return NVPTXISD::TexUnified1DFloatS32;
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
+ return NVPTXISD::TexUnified1DFloatFloat;
+ case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
+ return NVPTXISD::TexUnified1DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
+ return NVPTXISD::TexUnified1DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
+ return NVPTXISD::TexUnified1DS32S32;
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
+ return NVPTXISD::TexUnified1DS32Float;
+ case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
+ return NVPTXISD::TexUnified1DS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
+ return NVPTXISD::TexUnified1DS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
+ return NVPTXISD::TexUnified1DU32S32;
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
+ return NVPTXISD::TexUnified1DU32Float;
+ case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
+ return NVPTXISD::TexUnified1DU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
+ return NVPTXISD::TexUnified1DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
+ return NVPTXISD::TexUnified1DArrayFloatS32;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
+ return NVPTXISD::TexUnified1DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
+ return NVPTXISD::TexUnified1DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
+ return NVPTXISD::TexUnified1DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
+ return NVPTXISD::TexUnified1DArrayS32S32;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
+ return NVPTXISD::TexUnified1DArrayS32Float;
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
+ return NVPTXISD::TexUnified1DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
+ return NVPTXISD::TexUnified1DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
+ return NVPTXISD::TexUnified1DArrayU32S32;
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
+ return NVPTXISD::TexUnified1DArrayU32Float;
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
+ return NVPTXISD::TexUnified1DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
+ return NVPTXISD::TexUnified1DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
+ return NVPTXISD::TexUnified2DFloatS32;
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
+ return NVPTXISD::TexUnified2DFloatFloat;
+ case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
+ return NVPTXISD::TexUnified2DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
+ return NVPTXISD::TexUnified2DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
+ return NVPTXISD::TexUnified2DS32S32;
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
+ return NVPTXISD::TexUnified2DS32Float;
+ case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
+ return NVPTXISD::TexUnified2DS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
+ return NVPTXISD::TexUnified2DS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
+ return NVPTXISD::TexUnified2DU32S32;
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
+ return NVPTXISD::TexUnified2DU32Float;
+ case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
+ return NVPTXISD::TexUnified2DU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
+ return NVPTXISD::TexUnified2DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
+ return NVPTXISD::TexUnified2DArrayFloatS32;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
+ return NVPTXISD::TexUnified2DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
+ return NVPTXISD::TexUnified2DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
+ return NVPTXISD::TexUnified2DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
+ return NVPTXISD::TexUnified2DArrayS32S32;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
+ return NVPTXISD::TexUnified2DArrayS32Float;
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
+ return NVPTXISD::TexUnified2DArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
+ return NVPTXISD::TexUnified2DArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
+ return NVPTXISD::TexUnified2DArrayU32S32;
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
+ return NVPTXISD::TexUnified2DArrayU32Float;
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
+ return NVPTXISD::TexUnified2DArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
+ return NVPTXISD::TexUnified2DArrayU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
+ return NVPTXISD::TexUnified3DFloatS32;
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
+ return NVPTXISD::TexUnified3DFloatFloat;
+ case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
+ return NVPTXISD::TexUnified3DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
+ return NVPTXISD::TexUnified3DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
+ return NVPTXISD::TexUnified3DS32S32;
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
+ return NVPTXISD::TexUnified3DS32Float;
+ case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
+ return NVPTXISD::TexUnified3DS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
+ return NVPTXISD::TexUnified3DS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
+ return NVPTXISD::TexUnified3DU32S32;
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
+ return NVPTXISD::TexUnified3DU32Float;
+ case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
+ return NVPTXISD::TexUnified3DU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
+ return NVPTXISD::TexUnified3DU32FloatGrad;
+
+ case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeFloatFloat;
+ case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeS32Float;
+ case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeU32Float;
+ case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeU32FloatLevel;
+
+ case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayFloatFloat;
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayS32Float;
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayU32Float;
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
+
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedR2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedG2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedB2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
+ return NVPTXISD::Tld4UnifiedA2DFloatFloat;
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedR2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedG2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedB2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
+ return NVPTXISD::Tld4UnifiedA2DS64Float;
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedR2DU64Float;
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedG2DU64Float;
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedB2DU64Float;
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
+ return NVPTXISD::Tld4UnifiedA2DU64Float;
}
}
@@ -2223,18 +2886,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
switch (Intrinsic) {
default:
return 0;
+ case Intrinsic::nvvm_suld_1d_i8_clamp:
+ return NVPTXISD::Suld1DI8Clamp;
+ case Intrinsic::nvvm_suld_1d_i16_clamp:
+ return NVPTXISD::Suld1DI16Clamp;
+ case Intrinsic::nvvm_suld_1d_i32_clamp:
+ return NVPTXISD::Suld1DI32Clamp;
+ case Intrinsic::nvvm_suld_1d_i64_clamp:
+ return NVPTXISD::Suld1DI64Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i8_clamp:
+ return NVPTXISD::Suld1DV2I8Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i16_clamp:
+ return NVPTXISD::Suld1DV2I16Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i32_clamp:
+ return NVPTXISD::Suld1DV2I32Clamp;
+ case Intrinsic::nvvm_suld_1d_v2i64_clamp:
+ return NVPTXISD::Suld1DV2I64Clamp;
+ case Intrinsic::nvvm_suld_1d_v4i8_clamp:
+ return NVPTXISD::Suld1DV4I8Clamp;
+ case Intrinsic::nvvm_suld_1d_v4i16_clamp:
+ return NVPTXISD::Suld1DV4I16Clamp;
+ case Intrinsic::nvvm_suld_1d_v4i32_clamp:
+ return NVPTXISD::Suld1DV4I32Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i8_clamp:
+ return NVPTXISD::Suld1DArrayI8Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i16_clamp:
+ return NVPTXISD::Suld1DArrayI16Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i32_clamp:
+ return NVPTXISD::Suld1DArrayI32Clamp;
+ case Intrinsic::nvvm_suld_1d_array_i64_clamp:
+ return NVPTXISD::Suld1DArrayI64Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
+ return NVPTXISD::Suld1DArrayV2I8Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
+ return NVPTXISD::Suld1DArrayV2I16Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
+ return NVPTXISD::Suld1DArrayV2I32Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
+ return NVPTXISD::Suld1DArrayV2I64Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
+ return NVPTXISD::Suld1DArrayV4I8Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
+ return NVPTXISD::Suld1DArrayV4I16Clamp;
+ case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
+ return NVPTXISD::Suld1DArrayV4I32Clamp;
+ case Intrinsic::nvvm_suld_2d_i8_clamp:
+ return NVPTXISD::Suld2DI8Clamp;
+ case Intrinsic::nvvm_suld_2d_i16_clamp:
+ return NVPTXISD::Suld2DI16Clamp;
+ case Intrinsic::nvvm_suld_2d_i32_clamp:
+ return NVPTXISD::Suld2DI32Clamp;
+ case Intrinsic::nvvm_suld_2d_i64_clamp:
+ return NVPTXISD::Suld2DI64Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i8_clamp:
+ return NVPTXISD::Suld2DV2I8Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i16_clamp:
+ return NVPTXISD::Suld2DV2I16Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i32_clamp:
+ return NVPTXISD::Suld2DV2I32Clamp;
+ case Intrinsic::nvvm_suld_2d_v2i64_clamp:
+ return NVPTXISD::Suld2DV2I64Clamp;
+ case Intrinsic::nvvm_suld_2d_v4i8_clamp:
+ return NVPTXISD::Suld2DV4I8Clamp;
+ case Intrinsic::nvvm_suld_2d_v4i16_clamp:
+ return NVPTXISD::Suld2DV4I16Clamp;
+ case Intrinsic::nvvm_suld_2d_v4i32_clamp:
+ return NVPTXISD::Suld2DV4I32Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i8_clamp:
+ return NVPTXISD::Suld2DArrayI8Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i16_clamp:
+ return NVPTXISD::Suld2DArrayI16Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i32_clamp:
+ return NVPTXISD::Suld2DArrayI32Clamp;
+ case Intrinsic::nvvm_suld_2d_array_i64_clamp:
+ return NVPTXISD::Suld2DArrayI64Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
+ return NVPTXISD::Suld2DArrayV2I8Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
+ return NVPTXISD::Suld2DArrayV2I16Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
+ return NVPTXISD::Suld2DArrayV2I32Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
+ return NVPTXISD::Suld2DArrayV2I64Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
+ return NVPTXISD::Suld2DArrayV4I8Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
+ return NVPTXISD::Suld2DArrayV4I16Clamp;
+ case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
+ return NVPTXISD::Suld2DArrayV4I32Clamp;
+ case Intrinsic::nvvm_suld_3d_i8_clamp:
+ return NVPTXISD::Suld3DI8Clamp;
+ case Intrinsic::nvvm_suld_3d_i16_clamp:
+ return NVPTXISD::Suld3DI16Clamp;
+ case Intrinsic::nvvm_suld_3d_i32_clamp:
+ return NVPTXISD::Suld3DI32Clamp;
+ case Intrinsic::nvvm_suld_3d_i64_clamp:
+ return NVPTXISD::Suld3DI64Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i8_clamp:
+ return NVPTXISD::Suld3DV2I8Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i16_clamp:
+ return NVPTXISD::Suld3DV2I16Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i32_clamp:
+ return NVPTXISD::Suld3DV2I32Clamp;
+ case Intrinsic::nvvm_suld_3d_v2i64_clamp:
+ return NVPTXISD::Suld3DV2I64Clamp;
+ case Intrinsic::nvvm_suld_3d_v4i8_clamp:
+ return NVPTXISD::Suld3DV4I8Clamp;
+ case Intrinsic::nvvm_suld_3d_v4i16_clamp:
+ return NVPTXISD::Suld3DV4I16Clamp;
+ case Intrinsic::nvvm_suld_3d_v4i32_clamp:
+ return NVPTXISD::Suld3DV4I32Clamp;
case Intrinsic::nvvm_suld_1d_i8_trap:
return NVPTXISD::Suld1DI8Trap;
case Intrinsic::nvvm_suld_1d_i16_trap:
return NVPTXISD::Suld1DI16Trap;
case Intrinsic::nvvm_suld_1d_i32_trap:
return NVPTXISD::Suld1DI32Trap;
+ case Intrinsic::nvvm_suld_1d_i64_trap:
+ return NVPTXISD::Suld1DI64Trap;
case Intrinsic::nvvm_suld_1d_v2i8_trap:
return NVPTXISD::Suld1DV2I8Trap;
case Intrinsic::nvvm_suld_1d_v2i16_trap:
return NVPTXISD::Suld1DV2I16Trap;
case Intrinsic::nvvm_suld_1d_v2i32_trap:
return NVPTXISD::Suld1DV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_v2i64_trap:
+ return NVPTXISD::Suld1DV2I64Trap;
case Intrinsic::nvvm_suld_1d_v4i8_trap:
return NVPTXISD::Suld1DV4I8Trap;
case Intrinsic::nvvm_suld_1d_v4i16_trap:
@@ -2247,12 +3024,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
return NVPTXISD::Suld1DArrayI16Trap;
case Intrinsic::nvvm_suld_1d_array_i32_trap:
return NVPTXISD::Suld1DArrayI32Trap;
+ case Intrinsic::nvvm_suld_1d_array_i64_trap:
+ return NVPTXISD::Suld1DArrayI64Trap;
case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
return NVPTXISD::Suld1DArrayV2I8Trap;
case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
return NVPTXISD::Suld1DArrayV2I16Trap;
case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
return NVPTXISD::Suld1DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
+ return NVPTXISD::Suld1DArrayV2I64Trap;
case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
return NVPTXISD::Suld1DArrayV4I8Trap;
case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
@@ -2265,12 +3046,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
return NVPTXISD::Suld2DI16Trap;
case Intrinsic::nvvm_suld_2d_i32_trap:
return NVPTXISD::Suld2DI32Trap;
+ case Intrinsic::nvvm_suld_2d_i64_trap:
+ return NVPTXISD::Suld2DI64Trap;
case Intrinsic::nvvm_suld_2d_v2i8_trap:
return NVPTXISD::Suld2DV2I8Trap;
case Intrinsic::nvvm_suld_2d_v2i16_trap:
return NVPTXISD::Suld2DV2I16Trap;
case Intrinsic::nvvm_suld_2d_v2i32_trap:
return NVPTXISD::Suld2DV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_v2i64_trap:
+ return NVPTXISD::Suld2DV2I64Trap;
case Intrinsic::nvvm_suld_2d_v4i8_trap:
return NVPTXISD::Suld2DV4I8Trap;
case Intrinsic::nvvm_suld_2d_v4i16_trap:
@@ -2283,12 +3068,16 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
return NVPTXISD::Suld2DArrayI16Trap;
case Intrinsic::nvvm_suld_2d_array_i32_trap:
return NVPTXISD::Suld2DArrayI32Trap;
+ case Intrinsic::nvvm_suld_2d_array_i64_trap:
+ return NVPTXISD::Suld2DArrayI64Trap;
case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
return NVPTXISD::Suld2DArrayV2I8Trap;
case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
return NVPTXISD::Suld2DArrayV2I16Trap;
case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
return NVPTXISD::Suld2DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
+ return NVPTXISD::Suld2DArrayV2I64Trap;
case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
return NVPTXISD::Suld2DArrayV4I8Trap;
case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
@@ -2301,18 +3090,132 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
return NVPTXISD::Suld3DI16Trap;
case Intrinsic::nvvm_suld_3d_i32_trap:
return NVPTXISD::Suld3DI32Trap;
+ case Intrinsic::nvvm_suld_3d_i64_trap:
+ return NVPTXISD::Suld3DI64Trap;
case Intrinsic::nvvm_suld_3d_v2i8_trap:
return NVPTXISD::Suld3DV2I8Trap;
case Intrinsic::nvvm_suld_3d_v2i16_trap:
return NVPTXISD::Suld3DV2I16Trap;
case Intrinsic::nvvm_suld_3d_v2i32_trap:
return NVPTXISD::Suld3DV2I32Trap;
+ case Intrinsic::nvvm_suld_3d_v2i64_trap:
+ return NVPTXISD::Suld3DV2I64Trap;
case Intrinsic::nvvm_suld_3d_v4i8_trap:
return NVPTXISD::Suld3DV4I8Trap;
case Intrinsic::nvvm_suld_3d_v4i16_trap:
return NVPTXISD::Suld3DV4I16Trap;
case Intrinsic::nvvm_suld_3d_v4i32_trap:
return NVPTXISD::Suld3DV4I32Trap;
+ case Intrinsic::nvvm_suld_1d_i8_zero:
+ return NVPTXISD::Suld1DI8Zero;
+ case Intrinsic::nvvm_suld_1d_i16_zero:
+ return NVPTXISD::Suld1DI16Zero;
+ case Intrinsic::nvvm_suld_1d_i32_zero:
+ return NVPTXISD::Suld1DI32Zero;
+ case Intrinsic::nvvm_suld_1d_i64_zero:
+ return NVPTXISD::Suld1DI64Zero;
+ case Intrinsic::nvvm_suld_1d_v2i8_zero:
+ return NVPTXISD::Suld1DV2I8Zero;
+ case Intrinsic::nvvm_suld_1d_v2i16_zero:
+ return NVPTXISD::Suld1DV2I16Zero;
+ case Intrinsic::nvvm_suld_1d_v2i32_zero:
+ return NVPTXISD::Suld1DV2I32Zero;
+ case Intrinsic::nvvm_suld_1d_v2i64_zero:
+ return NVPTXISD::Suld1DV2I64Zero;
+ case Intrinsic::nvvm_suld_1d_v4i8_zero:
+ return NVPTXISD::Suld1DV4I8Zero;
+ case Intrinsic::nvvm_suld_1d_v4i16_zero:
+ return NVPTXISD::Suld1DV4I16Zero;
+ case Intrinsic::nvvm_suld_1d_v4i32_zero:
+ return NVPTXISD::Suld1DV4I32Zero;
+ case Intrinsic::nvvm_suld_1d_array_i8_zero:
+ return NVPTXISD::Suld1DArrayI8Zero;
+ case Intrinsic::nvvm_suld_1d_array_i16_zero:
+ return NVPTXISD::Suld1DArrayI16Zero;
+ case Intrinsic::nvvm_suld_1d_array_i32_zero:
+ return NVPTXISD::Suld1DArrayI32Zero;
+ case Intrinsic::nvvm_suld_1d_array_i64_zero:
+ return NVPTXISD::Suld1DArrayI64Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
+ return NVPTXISD::Suld1DArrayV2I8Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
+ return NVPTXISD::Suld1DArrayV2I16Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
+ return NVPTXISD::Suld1DArrayV2I32Zero;
+ case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
+ return NVPTXISD::Suld1DArrayV2I64Zero;
+ case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
+ return NVPTXISD::Suld1DArrayV4I8Zero;
+ case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
+ return NVPTXISD::Suld1DArrayV4I16Zero;
+ case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
+ return NVPTXISD::Suld1DArrayV4I32Zero;
+ case Intrinsic::nvvm_suld_2d_i8_zero:
+ return NVPTXISD::Suld2DI8Zero;
+ case Intrinsic::nvvm_suld_2d_i16_zero:
+ return NVPTXISD::Suld2DI16Zero;
+ case Intrinsic::nvvm_suld_2d_i32_zero:
+ return NVPTXISD::Suld2DI32Zero;
+ case Intrinsic::nvvm_suld_2d_i64_zero:
+ return NVPTXISD::Suld2DI64Zero;
+ case Intrinsic::nvvm_suld_2d_v2i8_zero:
+ return NVPTXISD::Suld2DV2I8Zero;
+ case Intrinsic::nvvm_suld_2d_v2i16_zero:
+ return NVPTXISD::Suld2DV2I16Zero;
+ case Intrinsic::nvvm_suld_2d_v2i32_zero:
+ return NVPTXISD::Suld2DV2I32Zero;
+ case Intrinsic::nvvm_suld_2d_v2i64_zero:
+ return NVPTXISD::Suld2DV2I64Zero;
+ case Intrinsic::nvvm_suld_2d_v4i8_zero:
+ return NVPTXISD::Suld2DV4I8Zero;
+ case Intrinsic::nvvm_suld_2d_v4i16_zero:
+ return NVPTXISD::Suld2DV4I16Zero;
+ case Intrinsic::nvvm_suld_2d_v4i32_zero:
+ return NVPTXISD::Suld2DV4I32Zero;
+ case Intrinsic::nvvm_suld_2d_array_i8_zero:
+ return NVPTXISD::Suld2DArrayI8Zero;
+ case Intrinsic::nvvm_suld_2d_array_i16_zero:
+ return NVPTXISD::Suld2DArrayI16Zero;
+ case Intrinsic::nvvm_suld_2d_array_i32_zero:
+ return NVPTXISD::Suld2DArrayI32Zero;
+ case Intrinsic::nvvm_suld_2d_array_i64_zero:
+ return NVPTXISD::Suld2DArrayI64Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
+ return NVPTXISD::Suld2DArrayV2I8Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
+ return NVPTXISD::Suld2DArrayV2I16Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
+ return NVPTXISD::Suld2DArrayV2I32Zero;
+ case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
+ return NVPTXISD::Suld2DArrayV2I64Zero;
+ case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
+ return NVPTXISD::Suld2DArrayV4I8Zero;
+ case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
+ return NVPTXISD::Suld2DArrayV4I16Zero;
+ case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
+ return NVPTXISD::Suld2DArrayV4I32Zero;
+ case Intrinsic::nvvm_suld_3d_i8_zero:
+ return NVPTXISD::Suld3DI8Zero;
+ case Intrinsic::nvvm_suld_3d_i16_zero:
+ return NVPTXISD::Suld3DI16Zero;
+ case Intrinsic::nvvm_suld_3d_i32_zero:
+ return NVPTXISD::Suld3DI32Zero;
+ case Intrinsic::nvvm_suld_3d_i64_zero:
+ return NVPTXISD::Suld3DI64Zero;
+ case Intrinsic::nvvm_suld_3d_v2i8_zero:
+ return NVPTXISD::Suld3DV2I8Zero;
+ case Intrinsic::nvvm_suld_3d_v2i16_zero:
+ return NVPTXISD::Suld3DV2I16Zero;
+ case Intrinsic::nvvm_suld_3d_v2i32_zero:
+ return NVPTXISD::Suld3DV2I32Zero;
+ case Intrinsic::nvvm_suld_3d_v2i64_zero:
+ return NVPTXISD::Suld3DV2I64Zero;
+ case Intrinsic::nvvm_suld_3d_v4i8_zero:
+ return NVPTXISD::Suld3DV4I8Zero;
+ case Intrinsic::nvvm_suld_3d_v4i16_zero:
+ return NVPTXISD::Suld3DV4I16Zero;
+ case Intrinsic::nvvm_suld_3d_v4i32_zero:
+ return NVPTXISD::Suld3DV4I32Zero;
}
}
@@ -2366,16 +3269,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
-
- // alignment is available as metadata.
- // Grab it and set the alignment.
- assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
- MDNode *AlignMD = I.getMetadata("align");
- assert(AlignMD && "Must have a non-null MDNode");
- assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
- Value *Align = AlignMD->getOperand(0);
- int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
- Info.align = Alignment;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
return true;
}
@@ -2395,42 +3289,69 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
-
- // alignment is available as metadata.
- // Grab it and set the alignment.
- assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
- MDNode *AlignMD = I.getMetadata("align");
- assert(AlignMD && "Must have a non-null MDNode");
- assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
- Value *Align = AlignMD->getOperand(0);
- int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
- Info.align = Alignment;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
return true;
}
- case Intrinsic::nvvm_tex_1d_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_v4f32_s32:
case Intrinsic::nvvm_tex_1d_v4f32_f32:
case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_array_v4f32_s32:
case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_2d_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_v4f32_s32:
case Intrinsic::nvvm_tex_2d_v4f32_f32:
case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_array_v4f32_s32:
case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
- case Intrinsic::nvvm_tex_3d_v4f32_i32:
+ case Intrinsic::nvvm_tex_3d_v4f32_s32:
case Intrinsic::nvvm_tex_3d_v4f32_f32:
case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
- case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: {
+ case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tld4_r_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_g_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_b_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_a_2d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_s32:
+ case Intrinsic::nvvm_tex_unified_3d_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
Info.opc = getOpcForTextureInstr(Intrinsic);
- Info.memVT = MVT::f32;
+ Info.memVT = MVT::v4f32;
Info.ptrVal = nullptr;
Info.offset = 0;
Info.vol = 0;
@@ -2439,28 +3360,120 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 16;
return true;
}
- case Intrinsic::nvvm_tex_1d_v4i32_i32:
- case Intrinsic::nvvm_tex_1d_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
- case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
- case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_v4i32_i32:
- case Intrinsic::nvvm_tex_2d_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
- case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
- case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
- case Intrinsic::nvvm_tex_3d_v4i32_i32:
- case Intrinsic::nvvm_tex_3d_v4i32_f32:
- case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
- case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: {
+ case Intrinsic::nvvm_tex_1d_v4s32_s32:
+ case Intrinsic::nvvm_tex_1d_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_1d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_v4s32_s32:
+ case Intrinsic::nvvm_tex_2d_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_2d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_3d_v4s32_s32:
+ case Intrinsic::nvvm_tex_3d_v4s32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_cube_v4u32_f32:
+ case Intrinsic::nvvm_tex_cube_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_cube_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_v4u32_s32:
+ case Intrinsic::nvvm_tex_1d_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_1d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_v4u32_s32:
+ case Intrinsic::nvvm_tex_2d_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_2d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_3d_v4u32_s32:
+ case Intrinsic::nvvm_tex_3d_v4u32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tld4_r_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_g_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_b_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_a_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_r_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_g_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_b_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_a_2d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_s32:
+ case Intrinsic::nvvm_tex_unified_3d_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_s32:
+ case Intrinsic::nvvm_tex_unified_3d_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32:
+ case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
Info.opc = getOpcForTextureInstr(Intrinsic);
- Info.memVT = MVT::i32;
+ Info.memVT = MVT::v4i32;
Info.ptrVal = nullptr;
Info.offset = 0;
Info.vol = 0;
@@ -2469,6 +3482,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 16;
return true;
}
+ case Intrinsic::nvvm_suld_1d_i8_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i8_clamp:
+ case Intrinsic::nvvm_suld_1d_v4i8_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i8_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i8_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v4i8_clamp:
+ case Intrinsic::nvvm_suld_2d_i8_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i8_clamp:
+ case Intrinsic::nvvm_suld_2d_v4i8_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i8_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i8_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v4i8_clamp:
+ case Intrinsic::nvvm_suld_3d_i8_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i8_clamp:
+ case Intrinsic::nvvm_suld_3d_v4i8_clamp:
case Intrinsic::nvvm_suld_1d_i8_trap:
case Intrinsic::nvvm_suld_1d_v2i8_trap:
case Intrinsic::nvvm_suld_1d_v4i8_trap:
@@ -2483,7 +3511,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
case Intrinsic::nvvm_suld_3d_i8_trap:
case Intrinsic::nvvm_suld_3d_v2i8_trap:
- case Intrinsic::nvvm_suld_3d_v4i8_trap: {
+ case Intrinsic::nvvm_suld_3d_v4i8_trap:
+ case Intrinsic::nvvm_suld_1d_i8_zero:
+ case Intrinsic::nvvm_suld_1d_v2i8_zero:
+ case Intrinsic::nvvm_suld_1d_v4i8_zero:
+ case Intrinsic::nvvm_suld_1d_array_i8_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i8_zero:
+ case Intrinsic::nvvm_suld_1d_array_v4i8_zero:
+ case Intrinsic::nvvm_suld_2d_i8_zero:
+ case Intrinsic::nvvm_suld_2d_v2i8_zero:
+ case Intrinsic::nvvm_suld_2d_v4i8_zero:
+ case Intrinsic::nvvm_suld_2d_array_i8_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i8_zero:
+ case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
+ case Intrinsic::nvvm_suld_3d_i8_zero:
+ case Intrinsic::nvvm_suld_3d_v2i8_zero:
+ case Intrinsic::nvvm_suld_3d_v4i8_zero: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i8;
Info.ptrVal = nullptr;
@@ -2494,6 +3537,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 16;
return true;
}
+ case Intrinsic::nvvm_suld_1d_i16_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i16_clamp:
+ case Intrinsic::nvvm_suld_1d_v4i16_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i16_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i16_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v4i16_clamp:
+ case Intrinsic::nvvm_suld_2d_i16_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i16_clamp:
+ case Intrinsic::nvvm_suld_2d_v4i16_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i16_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i16_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v4i16_clamp:
+ case Intrinsic::nvvm_suld_3d_i16_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i16_clamp:
+ case Intrinsic::nvvm_suld_3d_v4i16_clamp:
case Intrinsic::nvvm_suld_1d_i16_trap:
case Intrinsic::nvvm_suld_1d_v2i16_trap:
case Intrinsic::nvvm_suld_1d_v4i16_trap:
@@ -2508,7 +3566,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
case Intrinsic::nvvm_suld_3d_i16_trap:
case Intrinsic::nvvm_suld_3d_v2i16_trap:
- case Intrinsic::nvvm_suld_3d_v4i16_trap: {
+ case Intrinsic::nvvm_suld_3d_v4i16_trap:
+ case Intrinsic::nvvm_suld_1d_i16_zero:
+ case Intrinsic::nvvm_suld_1d_v2i16_zero:
+ case Intrinsic::nvvm_suld_1d_v4i16_zero:
+ case Intrinsic::nvvm_suld_1d_array_i16_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i16_zero:
+ case Intrinsic::nvvm_suld_1d_array_v4i16_zero:
+ case Intrinsic::nvvm_suld_2d_i16_zero:
+ case Intrinsic::nvvm_suld_2d_v2i16_zero:
+ case Intrinsic::nvvm_suld_2d_v4i16_zero:
+ case Intrinsic::nvvm_suld_2d_array_i16_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i16_zero:
+ case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
+ case Intrinsic::nvvm_suld_3d_i16_zero:
+ case Intrinsic::nvvm_suld_3d_v2i16_zero:
+ case Intrinsic::nvvm_suld_3d_v4i16_zero: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i16;
Info.ptrVal = nullptr;
@@ -2519,6 +3592,21 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 16;
return true;
}
+ case Intrinsic::nvvm_suld_1d_i32_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i32_clamp:
+ case Intrinsic::nvvm_suld_1d_v4i32_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i32_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i32_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v4i32_clamp:
+ case Intrinsic::nvvm_suld_2d_i32_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i32_clamp:
+ case Intrinsic::nvvm_suld_2d_v4i32_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i32_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i32_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v4i32_clamp:
+ case Intrinsic::nvvm_suld_3d_i32_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i32_clamp:
+ case Intrinsic::nvvm_suld_3d_v4i32_clamp:
case Intrinsic::nvvm_suld_1d_i32_trap:
case Intrinsic::nvvm_suld_1d_v2i32_trap:
case Intrinsic::nvvm_suld_1d_v4i32_trap:
@@ -2533,7 +3621,22 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
case Intrinsic::nvvm_suld_3d_i32_trap:
case Intrinsic::nvvm_suld_3d_v2i32_trap:
- case Intrinsic::nvvm_suld_3d_v4i32_trap: {
+ case Intrinsic::nvvm_suld_3d_v4i32_trap:
+ case Intrinsic::nvvm_suld_1d_i32_zero:
+ case Intrinsic::nvvm_suld_1d_v2i32_zero:
+ case Intrinsic::nvvm_suld_1d_v4i32_zero:
+ case Intrinsic::nvvm_suld_1d_array_i32_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i32_zero:
+ case Intrinsic::nvvm_suld_1d_array_v4i32_zero:
+ case Intrinsic::nvvm_suld_2d_i32_zero:
+ case Intrinsic::nvvm_suld_2d_v2i32_zero:
+ case Intrinsic::nvvm_suld_2d_v4i32_zero:
+ case Intrinsic::nvvm_suld_2d_array_i32_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i32_zero:
+ case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
+ case Intrinsic::nvvm_suld_3d_i32_zero:
+ case Intrinsic::nvvm_suld_3d_v2i32_zero:
+ case Intrinsic::nvvm_suld_3d_v4i32_zero: {
Info.opc = getOpcForSurfaceInstr(Intrinsic);
Info.memVT = MVT::i32;
Info.ptrVal = nullptr;
@@ -2544,7 +3647,46 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 16;
return true;
}
-
+ case Intrinsic::nvvm_suld_1d_i64_clamp:
+ case Intrinsic::nvvm_suld_1d_v2i64_clamp:
+ case Intrinsic::nvvm_suld_1d_array_i64_clamp:
+ case Intrinsic::nvvm_suld_1d_array_v2i64_clamp:
+ case Intrinsic::nvvm_suld_2d_i64_clamp:
+ case Intrinsic::nvvm_suld_2d_v2i64_clamp:
+ case Intrinsic::nvvm_suld_2d_array_i64_clamp:
+ case Intrinsic::nvvm_suld_2d_array_v2i64_clamp:
+ case Intrinsic::nvvm_suld_3d_i64_clamp:
+ case Intrinsic::nvvm_suld_3d_v2i64_clamp:
+ case Intrinsic::nvvm_suld_1d_i64_trap:
+ case Intrinsic::nvvm_suld_1d_v2i64_trap:
+ case Intrinsic::nvvm_suld_1d_array_i64_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i64_trap:
+ case Intrinsic::nvvm_suld_2d_i64_trap:
+ case Intrinsic::nvvm_suld_2d_v2i64_trap:
+ case Intrinsic::nvvm_suld_2d_array_i64_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i64_trap:
+ case Intrinsic::nvvm_suld_3d_i64_trap:
+ case Intrinsic::nvvm_suld_3d_v2i64_trap:
+ case Intrinsic::nvvm_suld_1d_i64_zero:
+ case Intrinsic::nvvm_suld_1d_v2i64_zero:
+ case Intrinsic::nvvm_suld_1d_array_i64_zero:
+ case Intrinsic::nvvm_suld_1d_array_v2i64_zero:
+ case Intrinsic::nvvm_suld_2d_i64_zero:
+ case Intrinsic::nvvm_suld_2d_v2i64_zero:
+ case Intrinsic::nvvm_suld_2d_array_i64_zero:
+ case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
+ case Intrinsic::nvvm_suld_3d_i64_zero:
+ case Intrinsic::nvvm_suld_3d_v2i64_zero: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i64;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
}
return false;
}
@@ -2648,7 +3790,31 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
// NVPTX DAG Combining
//===----------------------------------------------------------------------===//
-extern unsigned FMAContractLevel;
+bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
+ CodeGenOpt::Level OptLevel) const {
+ const Function *F = MF.getFunction();
+ const TargetOptions &TO = MF.getTarget().Options;
+
+ // Always honor command-line argument
+ if (FMAContractLevelOpt.getNumOccurrences() > 0) {
+ return FMAContractLevelOpt > 0;
+ } else if (OptLevel == 0) {
+ // Do not contract if we're not optimizing the code
+ return false;
+ } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
+ // Honor TargetOptions flags that explicitly say fusion is okay
+ return true;
+ } else if (F->hasFnAttribute("unsafe-fp-math")) {
+ // Check for unsafe-fp-math=true coming from Clang
+ Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+ StringRef Val = Attr.getValueAsString();
+ if (Val == "true")
+ return true;
+ }
+
+ // We did not have a clear indication that fusion is allowed, so assume not
+ return false;
+}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
@@ -2682,7 +3848,9 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
}
else if (N0.getOpcode() == ISD::FMUL) {
if (VT == MVT::f32 || VT == MVT::f64) {
- if (FMAContractLevel == 0)
+ const auto *TLI = static_cast<const NVPTXTargetLowering *>(
+ &DAG.getTargetLoweringInfo());
+ if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
return SDValue();
// For floating point:
@@ -2867,13 +4035,13 @@ static bool IsMulWideOperandDemotable(SDValue Op,
if (Op.getOpcode() == ISD::SIGN_EXTEND ||
Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Signed;
return true;
}
} else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Unsigned;
return true;
}
@@ -3027,8 +4195,7 @@ static SDValue PerformSHLCombine(SDNode *N,
SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- // FIXME: Get this from the DAG somehow
- CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
@@ -3046,6 +4213,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+ const DataLayout *TD,
SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
SDLoc DL(N);
@@ -3073,6 +4241,20 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
break;
}
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ unsigned Align = LD->getAlignment();
+ unsigned PrefAlign =
+ TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
+ if (Align < PrefAlign) {
+ // This load is not sufficiently aligned, so bail out and let this vector
+ // load be scalarized. Note that we may still be able to emit smaller
+ // vector loads. For example, if we are loading a <4 x float> with an
+ // alignment of 8, this check will fail but the legalizer will try again
+ // with 2 x <2 x float>, which will succeed with an alignment of 8.
+ return;
+ }
+
EVT EltVT = ResVT.getVectorElementType();
unsigned NumElts = ResVT.getVectorNumElements();
@@ -3109,8 +4291,6 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
OtherOps.push_back(N->getOperand(i));
- LoadSDNode *LD = cast<LoadSDNode>(N);
-
// The select routine does not have access to the LoadSDNode instance, so
// pass along the extension information
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
@@ -3283,7 +4463,7 @@ void NVPTXTargetLowering::ReplaceNodeResults(
default:
report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
- ReplaceLoadVector(N, DAG, Results);
+ ReplaceLoadVector(N, DAG, getDataLayout(), Results);
return;
case ISD::INTRINSIC_W_CHAIN:
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
@@ -3316,3 +4496,10 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete DwarfRangesSection;
delete DwarfMacroInfoSection;
}
+
+const MCSection *
+NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ return getDataSection();
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 7b4026d..d66d81a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXISELLOWERING_H
-#define NVPTXISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
#include "NVPTX.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -77,54 +77,244 @@ enum NodeType {
StoreRetvalV4,
// Texture intrinsics
- Tex1DFloatI32,
+ Tex1DFloatS32,
Tex1DFloatFloat,
Tex1DFloatFloatLevel,
Tex1DFloatFloatGrad,
- Tex1DI32I32,
- Tex1DI32Float,
- Tex1DI32FloatLevel,
- Tex1DI32FloatGrad,
- Tex1DArrayFloatI32,
+ Tex1DS32S32,
+ Tex1DS32Float,
+ Tex1DS32FloatLevel,
+ Tex1DS32FloatGrad,
+ Tex1DU32S32,
+ Tex1DU32Float,
+ Tex1DU32FloatLevel,
+ Tex1DU32FloatGrad,
+ Tex1DArrayFloatS32,
Tex1DArrayFloatFloat,
Tex1DArrayFloatFloatLevel,
Tex1DArrayFloatFloatGrad,
- Tex1DArrayI32I32,
- Tex1DArrayI32Float,
- Tex1DArrayI32FloatLevel,
- Tex1DArrayI32FloatGrad,
- Tex2DFloatI32,
+ Tex1DArrayS32S32,
+ Tex1DArrayS32Float,
+ Tex1DArrayS32FloatLevel,
+ Tex1DArrayS32FloatGrad,
+ Tex1DArrayU32S32,
+ Tex1DArrayU32Float,
+ Tex1DArrayU32FloatLevel,
+ Tex1DArrayU32FloatGrad,
+ Tex2DFloatS32,
Tex2DFloatFloat,
Tex2DFloatFloatLevel,
Tex2DFloatFloatGrad,
- Tex2DI32I32,
- Tex2DI32Float,
- Tex2DI32FloatLevel,
- Tex2DI32FloatGrad,
- Tex2DArrayFloatI32,
+ Tex2DS32S32,
+ Tex2DS32Float,
+ Tex2DS32FloatLevel,
+ Tex2DS32FloatGrad,
+ Tex2DU32S32,
+ Tex2DU32Float,
+ Tex2DU32FloatLevel,
+ Tex2DU32FloatGrad,
+ Tex2DArrayFloatS32,
Tex2DArrayFloatFloat,
Tex2DArrayFloatFloatLevel,
Tex2DArrayFloatFloatGrad,
- Tex2DArrayI32I32,
- Tex2DArrayI32Float,
- Tex2DArrayI32FloatLevel,
- Tex2DArrayI32FloatGrad,
- Tex3DFloatI32,
+ Tex2DArrayS32S32,
+ Tex2DArrayS32Float,
+ Tex2DArrayS32FloatLevel,
+ Tex2DArrayS32FloatGrad,
+ Tex2DArrayU32S32,
+ Tex2DArrayU32Float,
+ Tex2DArrayU32FloatLevel,
+ Tex2DArrayU32FloatGrad,
+ Tex3DFloatS32,
Tex3DFloatFloat,
Tex3DFloatFloatLevel,
Tex3DFloatFloatGrad,
- Tex3DI32I32,
- Tex3DI32Float,
- Tex3DI32FloatLevel,
- Tex3DI32FloatGrad,
+ Tex3DS32S32,
+ Tex3DS32Float,
+ Tex3DS32FloatLevel,
+ Tex3DS32FloatGrad,
+ Tex3DU32S32,
+ Tex3DU32Float,
+ Tex3DU32FloatLevel,
+ Tex3DU32FloatGrad,
+ TexCubeFloatFloat,
+ TexCubeFloatFloatLevel,
+ TexCubeS32Float,
+ TexCubeS32FloatLevel,
+ TexCubeU32Float,
+ TexCubeU32FloatLevel,
+ TexCubeArrayFloatFloat,
+ TexCubeArrayFloatFloatLevel,
+ TexCubeArrayS32Float,
+ TexCubeArrayS32FloatLevel,
+ TexCubeArrayU32Float,
+ TexCubeArrayU32FloatLevel,
+ Tld4R2DFloatFloat,
+ Tld4G2DFloatFloat,
+ Tld4B2DFloatFloat,
+ Tld4A2DFloatFloat,
+ Tld4R2DS64Float,
+ Tld4G2DS64Float,
+ Tld4B2DS64Float,
+ Tld4A2DS64Float,
+ Tld4R2DU64Float,
+ Tld4G2DU64Float,
+ Tld4B2DU64Float,
+ Tld4A2DU64Float,
+ TexUnified1DFloatS32,
+ TexUnified1DFloatFloat,
+ TexUnified1DFloatFloatLevel,
+ TexUnified1DFloatFloatGrad,
+ TexUnified1DS32S32,
+ TexUnified1DS32Float,
+ TexUnified1DS32FloatLevel,
+ TexUnified1DS32FloatGrad,
+ TexUnified1DU32S32,
+ TexUnified1DU32Float,
+ TexUnified1DU32FloatLevel,
+ TexUnified1DU32FloatGrad,
+ TexUnified1DArrayFloatS32,
+ TexUnified1DArrayFloatFloat,
+ TexUnified1DArrayFloatFloatLevel,
+ TexUnified1DArrayFloatFloatGrad,
+ TexUnified1DArrayS32S32,
+ TexUnified1DArrayS32Float,
+ TexUnified1DArrayS32FloatLevel,
+ TexUnified1DArrayS32FloatGrad,
+ TexUnified1DArrayU32S32,
+ TexUnified1DArrayU32Float,
+ TexUnified1DArrayU32FloatLevel,
+ TexUnified1DArrayU32FloatGrad,
+ TexUnified2DFloatS32,
+ TexUnified2DFloatFloat,
+ TexUnified2DFloatFloatLevel,
+ TexUnified2DFloatFloatGrad,
+ TexUnified2DS32S32,
+ TexUnified2DS32Float,
+ TexUnified2DS32FloatLevel,
+ TexUnified2DS32FloatGrad,
+ TexUnified2DU32S32,
+ TexUnified2DU32Float,
+ TexUnified2DU32FloatLevel,
+ TexUnified2DU32FloatGrad,
+ TexUnified2DArrayFloatS32,
+ TexUnified2DArrayFloatFloat,
+ TexUnified2DArrayFloatFloatLevel,
+ TexUnified2DArrayFloatFloatGrad,
+ TexUnified2DArrayS32S32,
+ TexUnified2DArrayS32Float,
+ TexUnified2DArrayS32FloatLevel,
+ TexUnified2DArrayS32FloatGrad,
+ TexUnified2DArrayU32S32,
+ TexUnified2DArrayU32Float,
+ TexUnified2DArrayU32FloatLevel,
+ TexUnified2DArrayU32FloatGrad,
+ TexUnified3DFloatS32,
+ TexUnified3DFloatFloat,
+ TexUnified3DFloatFloatLevel,
+ TexUnified3DFloatFloatGrad,
+ TexUnified3DS32S32,
+ TexUnified3DS32Float,
+ TexUnified3DS32FloatLevel,
+ TexUnified3DS32FloatGrad,
+ TexUnified3DU32S32,
+ TexUnified3DU32Float,
+ TexUnified3DU32FloatLevel,
+ TexUnified3DU32FloatGrad,
+ TexUnifiedCubeFloatFloat,
+ TexUnifiedCubeFloatFloatLevel,
+ TexUnifiedCubeS32Float,
+ TexUnifiedCubeS32FloatLevel,
+ TexUnifiedCubeU32Float,
+ TexUnifiedCubeU32FloatLevel,
+ TexUnifiedCubeArrayFloatFloat,
+ TexUnifiedCubeArrayFloatFloatLevel,
+ TexUnifiedCubeArrayS32Float,
+ TexUnifiedCubeArrayS32FloatLevel,
+ TexUnifiedCubeArrayU32Float,
+ TexUnifiedCubeArrayU32FloatLevel,
+ Tld4UnifiedR2DFloatFloat,
+ Tld4UnifiedG2DFloatFloat,
+ Tld4UnifiedB2DFloatFloat,
+ Tld4UnifiedA2DFloatFloat,
+ Tld4UnifiedR2DS64Float,
+ Tld4UnifiedG2DS64Float,
+ Tld4UnifiedB2DS64Float,
+ Tld4UnifiedA2DS64Float,
+ Tld4UnifiedR2DU64Float,
+ Tld4UnifiedG2DU64Float,
+ Tld4UnifiedB2DU64Float,
+ Tld4UnifiedA2DU64Float,
// Surface intrinsics
+ Suld1DI8Clamp,
+ Suld1DI16Clamp,
+ Suld1DI32Clamp,
+ Suld1DI64Clamp,
+ Suld1DV2I8Clamp,
+ Suld1DV2I16Clamp,
+ Suld1DV2I32Clamp,
+ Suld1DV2I64Clamp,
+ Suld1DV4I8Clamp,
+ Suld1DV4I16Clamp,
+ Suld1DV4I32Clamp,
+
+ Suld1DArrayI8Clamp,
+ Suld1DArrayI16Clamp,
+ Suld1DArrayI32Clamp,
+ Suld1DArrayI64Clamp,
+ Suld1DArrayV2I8Clamp,
+ Suld1DArrayV2I16Clamp,
+ Suld1DArrayV2I32Clamp,
+ Suld1DArrayV2I64Clamp,
+ Suld1DArrayV4I8Clamp,
+ Suld1DArrayV4I16Clamp,
+ Suld1DArrayV4I32Clamp,
+
+ Suld2DI8Clamp,
+ Suld2DI16Clamp,
+ Suld2DI32Clamp,
+ Suld2DI64Clamp,
+ Suld2DV2I8Clamp,
+ Suld2DV2I16Clamp,
+ Suld2DV2I32Clamp,
+ Suld2DV2I64Clamp,
+ Suld2DV4I8Clamp,
+ Suld2DV4I16Clamp,
+ Suld2DV4I32Clamp,
+
+ Suld2DArrayI8Clamp,
+ Suld2DArrayI16Clamp,
+ Suld2DArrayI32Clamp,
+ Suld2DArrayI64Clamp,
+ Suld2DArrayV2I8Clamp,
+ Suld2DArrayV2I16Clamp,
+ Suld2DArrayV2I32Clamp,
+ Suld2DArrayV2I64Clamp,
+ Suld2DArrayV4I8Clamp,
+ Suld2DArrayV4I16Clamp,
+ Suld2DArrayV4I32Clamp,
+
+ Suld3DI8Clamp,
+ Suld3DI16Clamp,
+ Suld3DI32Clamp,
+ Suld3DI64Clamp,
+ Suld3DV2I8Clamp,
+ Suld3DV2I16Clamp,
+ Suld3DV2I32Clamp,
+ Suld3DV2I64Clamp,
+ Suld3DV4I8Clamp,
+ Suld3DV4I16Clamp,
+ Suld3DV4I32Clamp,
+
Suld1DI8Trap,
Suld1DI16Trap,
Suld1DI32Trap,
+ Suld1DI64Trap,
Suld1DV2I8Trap,
Suld1DV2I16Trap,
Suld1DV2I32Trap,
+ Suld1DV2I64Trap,
Suld1DV4I8Trap,
Suld1DV4I16Trap,
Suld1DV4I32Trap,
@@ -132,9 +322,11 @@ enum NodeType {
Suld1DArrayI8Trap,
Suld1DArrayI16Trap,
Suld1DArrayI32Trap,
+ Suld1DArrayI64Trap,
Suld1DArrayV2I8Trap,
Suld1DArrayV2I16Trap,
Suld1DArrayV2I32Trap,
+ Suld1DArrayV2I64Trap,
Suld1DArrayV4I8Trap,
Suld1DArrayV4I16Trap,
Suld1DArrayV4I32Trap,
@@ -142,9 +334,11 @@ enum NodeType {
Suld2DI8Trap,
Suld2DI16Trap,
Suld2DI32Trap,
+ Suld2DI64Trap,
Suld2DV2I8Trap,
Suld2DV2I16Trap,
Suld2DV2I32Trap,
+ Suld2DV2I64Trap,
Suld2DV4I8Trap,
Suld2DV4I16Trap,
Suld2DV4I32Trap,
@@ -152,9 +346,11 @@ enum NodeType {
Suld2DArrayI8Trap,
Suld2DArrayI16Trap,
Suld2DArrayI32Trap,
+ Suld2DArrayI64Trap,
Suld2DArrayV2I8Trap,
Suld2DArrayV2I16Trap,
Suld2DArrayV2I32Trap,
+ Suld2DArrayV2I64Trap,
Suld2DArrayV4I8Trap,
Suld2DArrayV4I16Trap,
Suld2DArrayV4I32Trap,
@@ -162,12 +358,74 @@ enum NodeType {
Suld3DI8Trap,
Suld3DI16Trap,
Suld3DI32Trap,
+ Suld3DI64Trap,
Suld3DV2I8Trap,
Suld3DV2I16Trap,
Suld3DV2I32Trap,
+ Suld3DV2I64Trap,
Suld3DV4I8Trap,
Suld3DV4I16Trap,
- Suld3DV4I32Trap
+ Suld3DV4I32Trap,
+
+ Suld1DI8Zero,
+ Suld1DI16Zero,
+ Suld1DI32Zero,
+ Suld1DI64Zero,
+ Suld1DV2I8Zero,
+ Suld1DV2I16Zero,
+ Suld1DV2I32Zero,
+ Suld1DV2I64Zero,
+ Suld1DV4I8Zero,
+ Suld1DV4I16Zero,
+ Suld1DV4I32Zero,
+
+ Suld1DArrayI8Zero,
+ Suld1DArrayI16Zero,
+ Suld1DArrayI32Zero,
+ Suld1DArrayI64Zero,
+ Suld1DArrayV2I8Zero,
+ Suld1DArrayV2I16Zero,
+ Suld1DArrayV2I32Zero,
+ Suld1DArrayV2I64Zero,
+ Suld1DArrayV4I8Zero,
+ Suld1DArrayV4I16Zero,
+ Suld1DArrayV4I32Zero,
+
+ Suld2DI8Zero,
+ Suld2DI16Zero,
+ Suld2DI32Zero,
+ Suld2DI64Zero,
+ Suld2DV2I8Zero,
+ Suld2DV2I16Zero,
+ Suld2DV2I32Zero,
+ Suld2DV2I64Zero,
+ Suld2DV4I8Zero,
+ Suld2DV4I16Zero,
+ Suld2DV4I32Zero,
+
+ Suld2DArrayI8Zero,
+ Suld2DArrayI16Zero,
+ Suld2DArrayI32Zero,
+ Suld2DArrayI64Zero,
+ Suld2DArrayV2I8Zero,
+ Suld2DArrayV2I16Zero,
+ Suld2DArrayV2I32Zero,
+ Suld2DArrayV2I64Zero,
+ Suld2DArrayV4I8Zero,
+ Suld2DArrayV4I16Zero,
+ Suld2DArrayV4I32Zero,
+
+ Suld3DI8Zero,
+ Suld3DI16Zero,
+ Suld3DI32Zero,
+ Suld3DI64Zero,
+ Suld3DV2I8Zero,
+ Suld3DV2I16Zero,
+ Suld3DV2I32Zero,
+ Suld3DV2I64Zero,
+ Suld3DV4I8Zero,
+ Suld3DV4I16Zero,
+ Suld3DV4I32Zero
};
}
@@ -178,7 +436,7 @@ class NVPTXSubtarget;
//===--------------------------------------------------------------------===//
class NVPTXTargetLowering : public TargetLowering {
public:
- explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
+ explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -237,7 +495,7 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- NVPTXTargetMachine *nvTM;
+ const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
@@ -245,6 +503,10 @@ public:
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
+ bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
+
+ bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
+
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
@@ -274,4 +536,4 @@ private:
};
} // namespace llvm
-#endif // NVPTXISELLOWERING_H
+#endif
diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td
index f11f1b8..ffcb5d5 100644
--- a/lib/Target/NVPTX/NVPTXInstrFormats.td
+++ b/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -36,8 +36,24 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
bit IsLoad = 0;
bit IsStore = 0;
- let TSFlags{3-0} = VecInstType;
- let TSFlags{4-4} = IsSimpleMove;
- let TSFlags{5-5} = IsLoad;
- let TSFlags{6-6} = IsStore;
+ bit IsTex = 0;
+ bit IsSust = 0;
+ bit IsSurfTexQuery = 0;
+ bit IsTexModeUnified = 0;
+
+ // The following field is encoded as log2 of the vector size minus one,
+ // with 0 meaning the operation is not a surface instruction. For example,
+ // if IsSuld == 2, then the instruction is a suld instruction with vector size
+ // 2**(2-1) = 2.
+ bits<2> IsSuld = 0;
+
+ let TSFlags{3-0} = VecInstType;
+ let TSFlags{4-4} = IsSimpleMove;
+ let TSFlags{5-5} = IsLoad;
+ let TSFlags{6-6} = IsStore;
+ let TSFlags{7} = IsTex;
+ let TSFlags{9-8} = IsSuld;
+ let TSFlags{10} = IsSust;
+ let TSFlags{11} = IsSurfTexQuery;
+ let TSFlags{12} = IsTexModeUnified;
}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 2ac2974..6de7536 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXINSTRUCTIONINFO_H
-#define NVPTXINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H
#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index d2c0373..9900b8c 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -139,17 +139,10 @@ def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
-def doFMAF32 : Predicate<"doFMAF32">;
-def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">;
-def doFMAF32AGG : Predicate<"doFMAF32AGG">;
-def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">;
-def doFMAF64 : Predicate<"doFMAF64">;
-def doFMAF64AGG : Predicate<"doFMAF64AGG">;
-
def doMulWide : Predicate<"doMulWide">;
-def allowFMA : Predicate<"allowFMA">;
-def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">;
+def allowFMA : Predicate<"allowFMA()">;
+def noFMA : Predicate<"!allowFMA()">;
def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
@@ -222,13 +215,13 @@ multiclass F3<string OpcStr, SDNode OpNode> {
!strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst,
(OpNode Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[allowFMA_ftz]>;
+ Requires<[allowFMA, doF32FTZ]>;
def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst,
(OpNode Float32Regs:$a, fpimm:$b))]>,
- Requires<[allowFMA_ftz]>;
+ Requires<[allowFMA, doF32FTZ]>;
def f32rr : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
!strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
@@ -248,34 +241,38 @@ multiclass F3_rn<string OpcStr, SDNode OpNode> {
(ins Float64Regs:$a, Float64Regs:$b),
!strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
[(set Float64Regs:$dst,
- (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+ (OpNode Float64Regs:$a, Float64Regs:$b))]>,
+ Requires<[noFMA]>;
def f64ri : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b),
!strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
[(set Float64Regs:$dst,
- (OpNode Float64Regs:$a, fpimm:$b))]>;
+ (OpNode Float64Regs:$a, fpimm:$b))]>,
+ Requires<[noFMA]>;
def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
!strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst,
(OpNode Float32Regs:$a, Float32Regs:$b))]>,
- Requires<[doF32FTZ]>;
+ Requires<[noFMA, doF32FTZ]>;
def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst,
(OpNode Float32Regs:$a, fpimm:$b))]>,
- Requires<[doF32FTZ]>;
+ Requires<[noFMA, doF32FTZ]>;
def f32rr : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
!strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+ (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+ Requires<[noFMA]>;
def f32ri : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst,
- (OpNode Float32Regs:$a, fpimm:$b))]>;
+ (OpNode Float32Regs:$a, fpimm:$b))]>,
+ Requires<[noFMA]>;
}
multiclass F2<string OpcStr, SDNode OpNode> {
@@ -919,8 +916,8 @@ multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
}
defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
-defm FMA32 : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>;
-defm FMA64 : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>;
+defm FMA32 : FPCONTRACT32<"fma.rn.f32", true>;
+defm FMA64 : FPCONTRACT64<"fma.rn.f64", true>;
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
@@ -1917,7 +1914,7 @@ def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">;
def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
Int32Regs:$val3, Int32Regs:$val4,
i32imm:$a, i32imm:$b),
- "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
+ "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
[]>;
def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 0ad3dfa..14e51aa 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -792,13 +792,18 @@ def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
"}}")))),
Float32Regs, Int16Regs, int_nvvm_h2f>;
-def : Pat<(f32 (f16_to_f32 Int16Regs:$a)),
+def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
(CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
-def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
+def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
+def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;
+def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
+ (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
+def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
+ (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
+
//
// Bitcast
//
@@ -1936,9 +1941,10 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
// also defined in NVPTXReplaceImageHandles.cpp
-
+// texmode_independent
+let IsTex = 1, IsTexModeUnified = 0 in {
// Texture fetch instructions using handles
-def TEX_1D_F32_I32
+def TEX_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
@@ -1965,19 +1971,19 @@ def TEX_1D_F32_F32_GRAD
"tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
-def TEX_1D_I32_I32
+def TEX_1D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
"tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
-def TEX_1D_I32_F32
+def TEX_1D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
"tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
-def TEX_1D_I32_F32_LEVEL
+def TEX_1D_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
@@ -1985,7 +1991,7 @@ def TEX_1D_I32_F32_LEVEL
"tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], $lod;",
[]>;
-def TEX_1D_I32_F32_GRAD
+def TEX_1D_S32_F32_GRAD
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
@@ -1993,8 +1999,36 @@ def TEX_1D_I32_F32_GRAD
"tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
+def TEX_1D_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
+ "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
+ "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], $lod;",
+ []>;
+def TEX_1D_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
-def TEX_1D_ARRAY_F32_I32
+def TEX_1D_ARRAY_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
@@ -2024,21 +2058,21 @@ def TEX_1D_ARRAY_F32_F32_GRAD
"tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
-def TEX_1D_ARRAY_I32_I32
+def TEX_1D_ARRAY_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
"tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
-def TEX_1D_ARRAY_I32_F32
+def TEX_1D_ARRAY_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
"tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
-def TEX_1D_ARRAY_I32_F32_LEVEL
+def TEX_1D_ARRAY_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
@@ -2046,7 +2080,7 @@ def TEX_1D_ARRAY_I32_F32_LEVEL
"tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], $lod;",
[]>;
-def TEX_1D_ARRAY_I32_F32_GRAD
+def TEX_1D_ARRAY_S32_F32_GRAD
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
@@ -2054,8 +2088,38 @@ def TEX_1D_ARRAY_I32_F32_GRAD
"tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
+def TEX_1D_ARRAY_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_1D_ARRAY_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
-def TEX_2D_F32_I32
+def TEX_2D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
@@ -2087,21 +2151,21 @@ def TEX_2D_F32_F32_GRAD
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
-def TEX_2D_I32_I32
+def TEX_2D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
"tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
-def TEX_2D_I32_F32
+def TEX_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
"tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
-def TEX_2D_I32_F32_LEVEL
+def TEX_2D_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
@@ -2109,7 +2173,7 @@ def TEX_2D_I32_F32_LEVEL
"tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], $lod;",
[]>;
-def TEX_2D_I32_F32_GRAD
+def TEX_2D_S32_F32_GRAD
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
@@ -2119,8 +2183,40 @@ def TEX_2D_I32_F32_GRAD
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
+def TEX_2D_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_2D_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
-def TEX_2D_ARRAY_F32_I32
+def TEX_2D_ARRAY_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
@@ -2154,7 +2250,7 @@ def TEX_2D_ARRAY_F32_F32_GRAD
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
-def TEX_2D_ARRAY_I32_I32
+def TEX_2D_ARRAY_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
@@ -2162,7 +2258,7 @@ def TEX_2D_ARRAY_I32_I32
"tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
-def TEX_2D_ARRAY_I32_F32
+def TEX_2D_ARRAY_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
@@ -2170,7 +2266,7 @@ def TEX_2D_ARRAY_I32_F32
"tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
-def TEX_2D_ARRAY_I32_F32_LEVEL
+def TEX_2D_ARRAY_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
@@ -2178,7 +2274,7 @@ def TEX_2D_ARRAY_I32_F32_LEVEL
"tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
-def TEX_2D_ARRAY_I32_F32_GRAD
+def TEX_2D_ARRAY_S32_F32_GRAD
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
@@ -2189,8 +2285,43 @@ def TEX_2D_ARRAY_I32_F32_GRAD
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
+def TEX_2D_ARRAY_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_2D_ARRAY_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
-def TEX_3D_F32_I32
+def TEX_3D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
@@ -2227,7 +2358,7 @@ def TEX_3D_F32_F32_GRAD
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
[]>;
-def TEX_3D_I32_I32
+def TEX_3D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
@@ -2235,7 +2366,7 @@ def TEX_3D_I32_I32
"tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
-def TEX_3D_I32_F32
+def TEX_3D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
@@ -2243,7 +2374,7 @@ def TEX_3D_I32_F32
"tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
-def TEX_3D_I32_F32_LEVEL
+def TEX_3D_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
@@ -2251,7 +2382,7 @@ def TEX_3D_I32_F32_LEVEL
"tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
-def TEX_3D_I32_F32_GRAD
+def TEX_3D_S32_F32_GRAD
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
@@ -2264,104 +2395,1276 @@ def TEX_3D_I32_F32_GRAD
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
[]>;
+def TEX_3D_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_3D_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+def TEX_CUBE_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_CUBE_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_CUBE_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_CUBE_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_CUBE_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_CUBE_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
-// Surface load instructions
-def SULD_1D_I8_TRAP
+def TEX_CUBE_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $z\\}];",
+ []>;
+def TEX_CUBE_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+def TEX_CUBE_ARRAY_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $z\\}];",
+ []>;
+def TEX_CUBE_ARRAY_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+def TEX_CUBE_ARRAY_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $z\\}];",
+ []>;
+def TEX_CUBE_ARRAY_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+
+def TLD4_R_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_G_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_B_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_A_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_R_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_G_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_B_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_A_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_R_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_G_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_B_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TLD4_A_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+}
+
+
+// texmode_unified
+let IsTex = 1, IsTexModeUnified = 1 in {
+// Texture fetch instructions using handles
+def TEX_UNIFIED_1D_F32_S32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x),
+ "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+def TEX_UNIFIED_1D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x),
+ "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+def TEX_UNIFIED_1D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
+ "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x\\}], $lod;",
+ []>;
+def TEX_UNIFIED_1D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_UNIFIED_1D_S32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x),
+ "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+def TEX_UNIFIED_1D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x),
+ "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+def TEX_UNIFIED_1D_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x\\}], $lod;",
+ []>;
+def TEX_UNIFIED_1D_S32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_UNIFIED_1D_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x),
+ "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+def TEX_UNIFIED_1D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x),
+ "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+def TEX_UNIFIED_1D_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x\\}], $lod;",
+ []>;
+def TEX_UNIFIED_1D_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+def TEX_UNIFIED_1D_ARRAY_F32_S32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}];",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}];",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_S32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}];",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}];",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}];",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}];",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+def TEX_UNIFIED_2D_F32_S32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_UNIFIED_2D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_UNIFIED_2D_S32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_UNIFIED_2D_S32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_UNIFIED_2D_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_UNIFIED_2D_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+
+def TEX_UNIFIED_2D_ARRAY_F32_S32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_S32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+
+def TEX_UNIFIED_3D_F32_S32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_3D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_3D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_UNIFIED_3D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+def TEX_UNIFIED_3D_S32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_3D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_3D_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_UNIFIED_3D_S32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+def TEX_UNIFIED_3D_U32_S32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_3D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_3D_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_UNIFIED_3D_U32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+def TEX_UNIFIED_CUBE_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_CUBE_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_UNIFIED_CUBE_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_CUBE_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_UNIFIED_CUBE_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_UNIFIED_CUBE_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+
+def TEX_UNIFIED_CUBE_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $z\\}];",
+ []>;
+def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+def TEX_UNIFIED_CUBE_ARRAY_S32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $z\\}];",
+ []>;
+def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+def TEX_UNIFIED_CUBE_ARRAY_U32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
+ "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $z\\}];",
+ []>;
+def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int32Regs:$l,
+ Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
+ Float32Regs:$lod),
+ "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+
+def TLD4_UNIFIED_R_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_G_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_B_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_A_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
+ Float32Regs:$v2, Float32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_R_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_G_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_B_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_A_2D_S32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_R_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_G_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_B_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+def TLD4_UNIFIED_A_2D_U32_F32
+ : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
+ Int32Regs:$v2, Int32Regs:$v3),
+ (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
+ "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "[$t, \\{$x, $y\\}];",
+ []>;
+}
+
+
+
+//=== Surface load instructions
+// .clamp variant
+let IsSuld = 1 in {
+def SULD_1D_I8_CLAMP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_I16_TRAP
+def SULD_1D_I16_CLAMP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_I32_TRAP
+def SULD_1D_I32_CLAMP
: NVPTXInst<(outs Int32Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+def SULD_1D_I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+
+def SULD_1D_ARRAY_I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
[]>;
-def SULD_1D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+def SULD_1D_ARRAY_I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+let IsSuld = 2 in {
+def SULD_1D_V2I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+def SULD_1D_V2I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+def SULD_1D_V2I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+def SULD_1D_V2I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
(ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
[]>;
-def SULD_1D_ARRAY_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
+def SULD_1D_ARRAY_V2I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
[]>;
-def SULD_1D_ARRAY_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
+def SULD_1D_ARRAY_V2I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
[]>;
-def SULD_1D_ARRAY_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
+def SULD_1D_ARRAY_V2I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
[]>;
-def SULD_1D_ARRAY_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+def SULD_1D_ARRAY_V2I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
[]>;
-def SULD_1D_ARRAY_V2I16_TRAP
+
+def SULD_2D_V2I8_CLAMP
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
[]>;
-def SULD_1D_ARRAY_V2I32_TRAP
+def SULD_2D_V2I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I32_CLAMP
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
[]>;
-def SULD_1D_ARRAY_V4I8_TRAP
+def SULD_2D_V2I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_V2I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_V2I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I64_CLAMP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+let IsSuld = 3 in {
+def SULD_1D_V4I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_V4I8_CLAMP
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
"[$s, \\{$l, $x\\}];",
[]>;
-def SULD_1D_ARRAY_V4I16_TRAP
+def SULD_1D_ARRAY_V4I16_CLAMP
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
"[$s, \\{$l, $x\\}];",
[]>;
-def SULD_1D_ARRAY_V4I32_TRAP
+def SULD_1D_ARRAY_V4I32_CLAMP
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
"[$s, \\{$l, $x\\}];",
[]>;
+def SULD_2D_V4I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_V4I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+
+def SULD_3D_V4I8_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I16_CLAMP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I32_CLAMP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+
+// .trap variant
+let IsSuld = 1 in {
+def SULD_1D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+
def SULD_2D_I8_TRAP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
@@ -2377,35 +3680,10 @@ def SULD_2D_I32_TRAP
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
"suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
[]>;
-def SULD_2D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+def SULD_2D_I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
[]>;
def SULD_2D_ARRAY_I8_TRAP
@@ -2423,6 +3701,98 @@ def SULD_2D_ARRAY_I32_TRAP
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
"suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
[]>;
+def SULD_2D_ARRAY_I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+let IsSuld = 2 in {
+def SULD_1D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
def SULD_2D_ARRAY_V2I8_TRAP
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
@@ -2441,6 +3811,87 @@ def SULD_2D_ARRAY_V2I32_TRAP
"suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
"[$s, \\{$l, $x, $y, $y\\}];",
[]>;
+def SULD_2D_ARRAY_V2I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I64_TRAP
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+let IsSuld = 3 in {
+def SULD_1D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
def SULD_2D_ARRAY_V4I8_TRAP
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
@@ -2460,59 +3911,343 @@ def SULD_2D_ARRAY_V4I32_TRAP
"[$s, \\{$l, $x, $y, $y\\}];",
[]>;
-def SULD_3D_I8_TRAP
+
+def SULD_3D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+// .zero variant
+let IsSuld = 1 in {
+def SULD_1D_I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_I8_ZERO
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_I16_TRAP
+def SULD_3D_I16_ZERO
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_I32_TRAP
+def SULD_3D_I32_ZERO
: NVPTXInst<(outs Int32Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_V2I8_TRAP
+def SULD_3D_I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+let IsSuld = 2 in {
+def SULD_1D_V2I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_V2I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_V2I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_V2I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_V2I8_ZERO
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_V2I16_TRAP
+def SULD_3D_V2I16_ZERO
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_V2I32_TRAP
+def SULD_3D_V2I32_ZERO
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_V4I8_TRAP
+def SULD_3D_V2I64_ZERO
+ : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+}
+
+let IsSuld = 3 in {
+def SULD_1D_V4I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_V4I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_V4I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_V4I8_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I16_ZERO
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I32_ZERO
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+
+def SULD_3D_V4I8_ZERO
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
"[$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_V4I16_TRAP
+def SULD_3D_V4I16_ZERO
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
"[$s, \\{$x, $y, $z, $z\\}];",
[]>;
-def SULD_3D_V4I32_TRAP
+def SULD_3D_V4I32_ZERO
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
"[$s, \\{$x, $y, $z, $z\\}];",
[]>;
-
+}
//-----------------------------------
// Texture Query Intrinsics
//-----------------------------------
+
+let IsSurfTexQuery = 1 in {
def TXQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_order.b32 \t$d, [$a];",
@@ -2545,6 +4280,7 @@ def TXQ_NUM_MIPMAP_LEVELS
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.num_mipmap_levels.b32 \t$d, [$a];",
[]>;
+}
def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
(TXQ_CHANNEL_ORDER Int64Regs:$a)>;
@@ -2567,6 +4303,8 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
//-----------------------------------
// Surface Query Intrinsics
//-----------------------------------
+
+let IsSurfTexQuery = 1 in {
def SUQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_order.b32 \t$d, [$a];",
@@ -2591,6 +4329,7 @@ def SUQ_ARRAY_SIZE
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.array_size.b32 \t$d, [$a];",
[]>;
+}
def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
(SUQ_CHANNEL_ORDER Int64Regs:$a)>;
@@ -2624,8 +4363,354 @@ def ISTYPEP_TEXTURE
//===- Surface Stores -----------------------------------------------------===//
+let IsSust = 1 in {
// Unformatted
+// .clamp variant
+def SUST_B_1D_B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
+ "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_V2B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V4B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_1D_ARRAY_B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
+ "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
+ Int64Regs:$g),
+ "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_2D_B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_V2B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
+ Int64Regs:$g),
+ "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V4B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+def SUST_B_2D_ARRAY_B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r),
+ "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r),
+ "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r, Int64Regs:$g),
+ "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_3D_B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r),
+ "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_V2B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B64_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g),
+ "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V4B8_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B16_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B32_CLAMP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+// .trap variant
def SUST_B_1D_B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
@@ -2641,6 +4726,11 @@ def SUST_B_1D_B32_TRAP
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
"sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
[]>;
+def SUST_B_1D_B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
+ "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
def SUST_B_1D_V2B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
@@ -2656,6 +4746,11 @@ def SUST_B_1D_V2B32_TRAP
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
"sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
[]>;
+def SUST_B_1D_V2B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
def SUST_B_1D_V4B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
@@ -2691,6 +4786,11 @@ def SUST_B_1D_ARRAY_B32_TRAP
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
"sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
[]>;
+def SUST_B_1D_ARRAY_B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
+ "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
def SUST_B_1D_ARRAY_V2B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
@@ -2709,6 +4809,12 @@ def SUST_B_1D_ARRAY_V2B32_TRAP
Int32Regs:$g),
"sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
[]>;
+def SUST_B_1D_ARRAY_V2B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
+ Int64Regs:$g),
+ "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
def SUST_B_1D_ARRAY_V4B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
@@ -2747,6 +4853,11 @@ def SUST_B_2D_B32_TRAP
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
"sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
[]>;
+def SUST_B_2D_B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
def SUST_B_2D_V2B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
@@ -2765,6 +4876,12 @@ def SUST_B_2D_V2B32_TRAP
Int32Regs:$g),
"sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
[]>;
+def SUST_B_2D_V2B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
+ Int64Regs:$g),
+ "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
def SUST_B_2D_V4B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
@@ -2806,6 +4923,12 @@ def SUST_B_2D_ARRAY_B32_TRAP
Int32Regs:$r),
"sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
[]>;
+def SUST_B_2D_ARRAY_B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r),
+ "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
def SUST_B_2D_ARRAY_V2B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
@@ -2827,6 +4950,13 @@ def SUST_B_2D_ARRAY_V2B32_TRAP
"sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
"\\{$r, $g\\};",
[]>;
+def SUST_B_2D_ARRAY_V2B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r, Int64Regs:$g),
+ "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
def SUST_B_2D_ARRAY_V4B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
@@ -2868,6 +4998,12 @@ def SUST_B_3D_B32_TRAP
Int32Regs:$r),
"sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
[]>;
+def SUST_B_3D_B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r),
+ "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
def SUST_B_3D_V2B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
@@ -2889,6 +5025,13 @@ def SUST_B_3D_V2B32_TRAP
"sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
"\\{$r, $g\\};",
[]>;
+def SUST_B_3D_V2B64_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g),
+ "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
def SUST_B_3D_V4B8_TRAP
: NVPTXInst<(outs),
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
@@ -2911,6 +5054,353 @@ def SUST_B_3D_V4B32_TRAP
"\\{$r, $g, $b, $a\\};",
[]>;
+
+// .zero variant
+def SUST_B_1D_B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
+ "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_V2B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V4B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_1D_ARRAY_B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
+ "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
+ Int64Regs:$g),
+ "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_2D_B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_V2B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
+ Int64Regs:$g),
+ "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V4B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_2D_ARRAY_B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r),
+ "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r),
+ "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r, Int64Regs:$g),
+ "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_3D_B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r),
+ "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_V2B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B64_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g),
+ "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V4B8_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B16_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B32_ZERO
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+
// Formatted
def SUST_P_1D_B8_TRAP
@@ -3197,12 +5687,341 @@ def SUST_P_3D_V4B32_TRAP
"sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
"\\{$r, $g, $b, $a\\};",
[]>;
-
+}
// Surface store instruction patterns
// I'm not sure why we can't just include these in the instruction definitions,
// but TableGen complains of type errors :(
+// .clamp variant
+def : Pat<(int_nvvm_sust_b_1d_i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i64_clamp
+ Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
+ (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
+ Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
+ Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
+ (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i64_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
+ Int64Regs:$g),
+ (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_3d_i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B8_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B16_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ (SUST_B_3D_B32_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i64_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r),
+ (SUST_B_3D_B64_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+// .trap variant
def : Pat<(int_nvvm_sust_b_1d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
(SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
@@ -3215,6 +6034,10 @@ def : Pat<(int_nvvm_sust_b_1d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
(SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+def : Pat<(int_nvvm_sust_b_1d_i64_trap
+ Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
+ (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+
def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
(SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
@@ -3230,6 +6053,11 @@ def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
(SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
+def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
+ Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int64Regs:$r, Int64Regs:$g)>;
+
def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
@@ -3265,6 +6093,11 @@ def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
(SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
+def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
+ (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int64Regs:$r)>;
+
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
(SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
@@ -3280,6 +6113,11 @@ def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
(SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
+def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int64Regs:$r, Int64Regs:$g)>;
+
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
@@ -3315,6 +6153,11 @@ def : Pat<(int_nvvm_sust_b_2d_i32_trap
(SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
+def : Pat<(int_nvvm_sust_b_2d_i64_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r)>;
+
def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
(SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
@@ -3330,6 +6173,11 @@ def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
(SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
+def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r, Int64Regs:$g)>;
+
def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
@@ -3368,6 +6216,12 @@ def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
+def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r)>;
+
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
@@ -3388,6 +6242,12 @@ def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
(SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
+def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
+ Int64Regs:$g),
+ (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
+
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
@@ -3432,6 +6292,13 @@ def : Pat<(int_nvvm_sust_b_3d_i32_trap
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
+def : Pat<(int_nvvm_sust_b_3d_i64_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r),
+ (SUST_B_3D_B64_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r)>;
+
def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
@@ -3453,6 +6320,13 @@ def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
+def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g)>;
+
def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
@@ -3475,6 +6349,334 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+// .zero variant
+def : Pat<(int_nvvm_sust_b_1d_i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i64_zero
+ Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
+ (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
+ Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
+ Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
+ (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i64_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
+ (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
+ Int64Regs:$g),
+ (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_3d_i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B8_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B16_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ (SUST_B_3D_B32_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i64_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r),
+ (SUST_B_3D_B64_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g),
+ (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int64Regs:$r, Int64Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
def : Pat<(int_nvvm_sust_p_1d_i8_trap
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index 5ec1fc9..8759406 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_LOWER_AGGR_COPIES_H
-#define NVPTX_LOWER_AGGR_COPIES_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/IR/DataLayout.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
new file mode 100644
index 0000000..3149399
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -0,0 +1,134 @@
+//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Copy struct args to local memory. This is needed for kernel functions only.
+// This is a preparation for handling cases like
+//
+// kernel void foo(struct A arg, ...)
+// {
+// struct A *p = &arg;
+// ...
+// ... = p->filed1 ... (this is no generic address for .param)
+// p->filed2 = ... (this is no write access to .param)
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
+}
+
+class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+ void handleStructPtrArgs(Function &);
+ void handleParam(Argument *);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ NVPTXLowerStructArgs() : FunctionPass(ID) {}
+ const char *getPassName() const override {
+ return "Copy structure (byval *) arguments to stack";
+ }
+};
+
+char NVPTXLowerStructArgs::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
+ "Lower structure arguments (NVPTX)", false, false)
+
+void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
+ Function *Func = Arg->getParent();
+ Instruction *FirstInst = &(Func->getEntryBlock().front());
+ PointerType *PType = dyn_cast<PointerType>(Arg->getType());
+
+ assert(PType && "Expecting pointer type in handleParam");
+
+ Type *StructType = PType->getElementType();
+ AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
+
+ /* Set the alignment to alignment of the byval parameter. This is because,
+ * later load/stores assume that alignment, and we are going to replace
+ * the use of the byval parameter with this alloca instruction.
+ */
+ AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
+
+ Arg->replaceAllUsesWith(AllocA);
+
+ // Get the cvt.gen.to.param intrinsic
+ Type *CvtTypes[] = {
+ Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),
+ Type::getInt8PtrTy(Func->getParent()->getContext(),
+ ADDRESS_SPACE_GENERIC)};
+ Function *CvtFunc = Intrinsic::getDeclaration(
+ Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes);
+
+ Value *BitcastArgs[] = {
+ new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),
+ ADDRESS_SPACE_GENERIC),
+ Arg->getName(), FirstInst)};
+ CallInst *CallCVT =
+ CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst);
+
+ BitCastInst *BitCast = new BitCastInst(
+ CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM),
+ Arg->getName(), FirstInst);
+ LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
+ new StoreInst(LI, AllocA, FirstInst);
+}
+
+// =============================================================================
+// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then
+// add the following instructions to the first basic block :
+//
+// %temp = alloca %struct.x, align 8
+// %tt1 = bitcast %struct.x * %d to i8 *
+// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
+// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
+// %tv = load %struct.x addrspace(101) * %tempd
+// store %struct.x %tv, %struct.x * %temp, align 8
+//
+// The above code allocates some space in the stack and copies the incoming
+// struct from param space to local space.
+// Then replace all occurences of %d by %temp.
+// =============================================================================
+void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
+ for (Argument &Arg : F.args()) {
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+ handleParam(&Arg);
+ }
+ }
+}
+
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
+ // Skip non-kernels. See the comments at the top of this file.
+ if (!isKernelFunction(F))
+ return false;
+
+ handleStructPtrArgs(F);
+ return true;
+}
+
+FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
+ return new NVPTXLowerStructArgs();
+}
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 5547649..d39a394 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -9,8 +9,8 @@
// Modeled after ARMMCExpr
-#ifndef NVPTXMCEXPR_H
-#define NVPTXMCEXPR_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H
#include "llvm/ADT/APFloat.h"
#include "llvm/MC/MCExpr.h"
@@ -63,7 +63,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override {
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 67fb390..10f1135 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
+
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -44,3 +47,5 @@ public:
}
};
}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 348ab0c..a1e1b9e 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -48,8 +49,8 @@ char NVPTXPrologEpilogPass::ID = 0;
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering &TFI = *TM.getFrameLowering();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
+ const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
bool Modified = false;
calculateFrameObjectOffsets(MF);
@@ -108,8 +109,8 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
void
NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 62f288b..358ccce 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -53,9 +53,9 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
return "%f";
}
if (RC == &NVPTX::Float64RegsRegClass) {
- return "%fl";
+ return "%fd";
} else if (RC == &NVPTX::Int64RegsRegClass) {
- return "%rl";
+ return "%rd";
} else if (RC == &NVPTX::Int32RegsRegClass) {
return "%r";
} else if (RC == &NVPTX::Int16RegsRegClass) {
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index a7594be..d2e6733 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXREGISTERINFO_H
-#define NVPTXREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 3482248..efcee6b 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -35,9 +35,9 @@ foreach i = 0-4 in {
def P#i : NVPTXReg<"%p"#i>; // Predicate
def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
def R#i : NVPTXReg<"%r"#i>; // 32-bit
- def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
+ def RL#i : NVPTXReg<"%rd"#i>; // 64-bit
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
- def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
+ def FL#i : NVPTXReg<"%fd"#i>; // 64-bit float
// Arguments
def ia#i : NVPTXReg<"%ia"#i>;
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index afd53a6..324420d 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -15,6 +15,7 @@
#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
+#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -33,9 +34,15 @@ public:
NVPTXReplaceImageHandles();
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "NVPTX Replace Image Handles";
+ }
private:
bool processInstr(MachineInstr &MI);
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
+ bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
+ unsigned &Idx);
};
}
@@ -65,242 +72,43 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
E = InstrsToRemove.end(); I != E; ++I) {
(*I)->eraseFromParent();
}
-
return Changed;
}
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent();
- // Check if we have a surface/texture instruction
- switch (MI.getOpcode()) {
- default: return false;
- case NVPTX::TEX_1D_F32_I32:
- case NVPTX::TEX_1D_F32_F32:
- case NVPTX::TEX_1D_F32_F32_LEVEL:
- case NVPTX::TEX_1D_F32_F32_GRAD:
- case NVPTX::TEX_1D_I32_I32:
- case NVPTX::TEX_1D_I32_F32:
- case NVPTX::TEX_1D_I32_F32_LEVEL:
- case NVPTX::TEX_1D_I32_F32_GRAD:
- case NVPTX::TEX_1D_ARRAY_F32_I32:
- case NVPTX::TEX_1D_ARRAY_F32_F32:
- case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
- case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
- case NVPTX::TEX_1D_ARRAY_I32_I32:
- case NVPTX::TEX_1D_ARRAY_I32_F32:
- case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
- case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
- case NVPTX::TEX_2D_F32_I32:
- case NVPTX::TEX_2D_F32_F32:
- case NVPTX::TEX_2D_F32_F32_LEVEL:
- case NVPTX::TEX_2D_F32_F32_GRAD:
- case NVPTX::TEX_2D_I32_I32:
- case NVPTX::TEX_2D_I32_F32:
- case NVPTX::TEX_2D_I32_F32_LEVEL:
- case NVPTX::TEX_2D_I32_F32_GRAD:
- case NVPTX::TEX_2D_ARRAY_F32_I32:
- case NVPTX::TEX_2D_ARRAY_F32_F32:
- case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
- case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
- case NVPTX::TEX_2D_ARRAY_I32_I32:
- case NVPTX::TEX_2D_ARRAY_I32_F32:
- case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
- case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
- case NVPTX::TEX_3D_F32_I32:
- case NVPTX::TEX_3D_F32_F32:
- case NVPTX::TEX_3D_F32_F32_LEVEL:
- case NVPTX::TEX_3D_F32_F32_GRAD:
- case NVPTX::TEX_3D_I32_I32:
- case NVPTX::TEX_3D_I32_F32:
- case NVPTX::TEX_3D_I32_F32_LEVEL:
- case NVPTX::TEX_3D_I32_F32_GRAD: {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ if (MCID.TSFlags & NVPTXII::IsTexFlag) {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
MachineOperand &TexHandle = MI.getOperand(4);
- MachineOperand &SampHandle = MI.getOperand(5);
-
replaceImageHandle(TexHandle, MF);
- replaceImageHandle(SampHandle, MF);
-
- return true;
- }
- case NVPTX::SULD_1D_I8_TRAP:
- case NVPTX::SULD_1D_I16_TRAP:
- case NVPTX::SULD_1D_I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_I32_TRAP:
- case NVPTX::SULD_2D_I8_TRAP:
- case NVPTX::SULD_2D_I16_TRAP:
- case NVPTX::SULD_2D_I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_I32_TRAP:
- case NVPTX::SULD_3D_I8_TRAP:
- case NVPTX::SULD_3D_I16_TRAP:
- case NVPTX::SULD_3D_I32_TRAP: {
- // This is a V1 surface load, so operand 1 is a surfref
- MachineOperand &SurfHandle = MI.getOperand(1);
- replaceImageHandle(SurfHandle, MF);
+ if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
+ MachineOperand &SampHandle = MI.getOperand(5);
+ replaceImageHandle(SampHandle, MF);
+ }
return true;
- }
- case NVPTX::SULD_1D_V2I8_TRAP:
- case NVPTX::SULD_1D_V2I16_TRAP:
- case NVPTX::SULD_1D_V2I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
- case NVPTX::SULD_2D_V2I8_TRAP:
- case NVPTX::SULD_2D_V2I16_TRAP:
- case NVPTX::SULD_2D_V2I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
- case NVPTX::SULD_3D_V2I8_TRAP:
- case NVPTX::SULD_3D_V2I16_TRAP:
- case NVPTX::SULD_3D_V2I32_TRAP: {
- // This is a V2 surface load, so operand 2 is a surfref
- MachineOperand &SurfHandle = MI.getOperand(2);
-
- replaceImageHandle(SurfHandle, MF);
+ } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
+ unsigned VecSize =
+ 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
- return true;
- }
- case NVPTX::SULD_1D_V4I8_TRAP:
- case NVPTX::SULD_1D_V4I16_TRAP:
- case NVPTX::SULD_1D_V4I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
- case NVPTX::SULD_2D_V4I8_TRAP:
- case NVPTX::SULD_2D_V4I16_TRAP:
- case NVPTX::SULD_2D_V4I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
- case NVPTX::SULD_3D_V4I8_TRAP:
- case NVPTX::SULD_3D_V4I16_TRAP:
- case NVPTX::SULD_3D_V4I32_TRAP: {
- // This is a V4 surface load, so operand 4 is a surfref
- MachineOperand &SurfHandle = MI.getOperand(4);
+ // For a surface load of vector size N, the Nth operand will be the surfref
+ MachineOperand &SurfHandle = MI.getOperand(VecSize);
replaceImageHandle(SurfHandle, MF);
return true;
- }
- case NVPTX::SUST_B_1D_B8_TRAP:
- case NVPTX::SUST_B_1D_B16_TRAP:
- case NVPTX::SUST_B_1D_B32_TRAP:
- case NVPTX::SUST_B_1D_V2B8_TRAP:
- case NVPTX::SUST_B_1D_V2B16_TRAP:
- case NVPTX::SUST_B_1D_V2B32_TRAP:
- case NVPTX::SUST_B_1D_V4B8_TRAP:
- case NVPTX::SUST_B_1D_V4B16_TRAP:
- case NVPTX::SUST_B_1D_V4B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_B_2D_B8_TRAP:
- case NVPTX::SUST_B_2D_B16_TRAP:
- case NVPTX::SUST_B_2D_B32_TRAP:
- case NVPTX::SUST_B_2D_V2B8_TRAP:
- case NVPTX::SUST_B_2D_V2B16_TRAP:
- case NVPTX::SUST_B_2D_V2B32_TRAP:
- case NVPTX::SUST_B_2D_V4B8_TRAP:
- case NVPTX::SUST_B_2D_V4B16_TRAP:
- case NVPTX::SUST_B_2D_V4B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_B_3D_B8_TRAP:
- case NVPTX::SUST_B_3D_B16_TRAP:
- case NVPTX::SUST_B_3D_B32_TRAP:
- case NVPTX::SUST_B_3D_V2B8_TRAP:
- case NVPTX::SUST_B_3D_V2B16_TRAP:
- case NVPTX::SUST_B_3D_V2B32_TRAP:
- case NVPTX::SUST_B_3D_V4B8_TRAP:
- case NVPTX::SUST_B_3D_V4B16_TRAP:
- case NVPTX::SUST_B_3D_V4B32_TRAP:
- case NVPTX::SUST_P_1D_B8_TRAP:
- case NVPTX::SUST_P_1D_B16_TRAP:
- case NVPTX::SUST_P_1D_B32_TRAP:
- case NVPTX::SUST_P_1D_V2B8_TRAP:
- case NVPTX::SUST_P_1D_V2B16_TRAP:
- case NVPTX::SUST_P_1D_V2B32_TRAP:
- case NVPTX::SUST_P_1D_V4B8_TRAP:
- case NVPTX::SUST_P_1D_V4B16_TRAP:
- case NVPTX::SUST_P_1D_V4B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_P_2D_B8_TRAP:
- case NVPTX::SUST_P_2D_B16_TRAP:
- case NVPTX::SUST_P_2D_B32_TRAP:
- case NVPTX::SUST_P_2D_V2B8_TRAP:
- case NVPTX::SUST_P_2D_V2B16_TRAP:
- case NVPTX::SUST_P_2D_V2B32_TRAP:
- case NVPTX::SUST_P_2D_V4B8_TRAP:
- case NVPTX::SUST_P_2D_V4B16_TRAP:
- case NVPTX::SUST_P_2D_V4B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_P_3D_B8_TRAP:
- case NVPTX::SUST_P_3D_B16_TRAP:
- case NVPTX::SUST_P_3D_B32_TRAP:
- case NVPTX::SUST_P_3D_V2B8_TRAP:
- case NVPTX::SUST_P_3D_V2B16_TRAP:
- case NVPTX::SUST_P_3D_V2B32_TRAP:
- case NVPTX::SUST_P_3D_V4B8_TRAP:
- case NVPTX::SUST_P_3D_V4B16_TRAP:
- case NVPTX::SUST_P_3D_V4B32_TRAP: {
+ } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
// This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0);
replaceImageHandle(SurfHandle, MF);
return true;
- }
- case NVPTX::TXQ_CHANNEL_ORDER:
- case NVPTX::TXQ_CHANNEL_DATA_TYPE:
- case NVPTX::TXQ_WIDTH:
- case NVPTX::TXQ_HEIGHT:
- case NVPTX::TXQ_DEPTH:
- case NVPTX::TXQ_ARRAY_SIZE:
- case NVPTX::TXQ_NUM_SAMPLES:
- case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
- case NVPTX::SUQ_CHANNEL_ORDER:
- case NVPTX::SUQ_CHANNEL_DATA_TYPE:
- case NVPTX::SUQ_WIDTH:
- case NVPTX::SUQ_HEIGHT:
- case NVPTX::SUQ_DEPTH:
- case NVPTX::SUQ_ARRAY_SIZE: {
+ } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
// This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1);
@@ -308,22 +116,38 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
return true;
}
- }
+
+ return false;
}
void NVPTXReplaceImageHandles::
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
+ unsigned Idx;
+ if (findIndexForHandle(Op, MF, Idx)) {
+ Op.ChangeToImmediate(Idx);
+ }
+}
+
+bool NVPTXReplaceImageHandles::
+findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
+
+ assert(Op.isReg() && "Handle is not in a reg?");
+
// Which instruction defines the handle?
- MachineInstr *MI = MRI.getVRegDef(Op.getReg());
- assert(MI && "No def for image handle vreg?");
- MachineInstr &TexHandleDef = *MI;
+ MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
switch (TexHandleDef.getOpcode()) {
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
+ const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
+ if (ST.getDrvInterface() == NVPTX::CUDA) {
+ // For CUDA, we preserve the param loads coming from function arguments
+ return false;
+ }
+
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
std::string ParamBaseName = MF.getName();
@@ -333,19 +157,27 @@ replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
std::string NewSym;
raw_string_ostream NewSymStr(NewSym);
NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
- Op.ChangeToImmediate(
- MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
+
InstrsToRemove.insert(&TexHandleDef);
- break;
+ Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
+ return true;
}
case NVPTX::texsurf_handles: {
// The handle is a global variable, replace with the global variable name
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!");
- Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
InstrsToRemove.insert(&TexHandleDef);
- break;
+ Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
+ return true;
+ }
+ case NVPTX::nvvm_move_i64:
+ case TargetOpcode::COPY: {
+ bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx);
+ if (Res) {
+ InstrsToRemove.insert(&TexHandleDef);
+ }
+ return Res;
}
default:
llvm_unreachable("Unknown instruction operating on handle");
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index aa0436b..f1d3cb4 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_NVPTXSECTION_H
-#define LLVM_NVPTXSECTION_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index d5cded2..3d52532 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -59,7 +59,8 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
SmVersion(20), DL(computeDataLayout(is64Bit)),
InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) {
+ TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL),
+ FrameLowering(*this) {
Triple T(TT);
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 3ed5747..fb2d404 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXSUBTARGET_H
-#define NVPTXSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
#include "NVPTX.h"
#include "NVPTXFrameLowering.h"
@@ -57,14 +57,20 @@ public:
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM, bool is64Bit);
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const NVPTXRegisterInfo *getRegisterInfo() const {
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const NVPTXRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; }
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const NVPTXTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
bool hasBrkPt() const { return SmVersion >= 11; }
bool hasAtomRedG32() const { return SmVersion >= 11; }
@@ -91,7 +97,12 @@ public:
inline bool hasROT64() const { return SmVersion >= 20; }
bool hasImageHandles() const {
- // Currently disabled
+ // Enable handles for Kepler+, where CUDA supports indirect surfaces and
+ // textures
+ if (getDrvInterface() == NVPTX::CUDA)
+ return (SmVersion >= 30);
+
+ // Disabled, otherwise
return false;
}
bool is64Bit() const { return Is64Bit; }
@@ -108,4 +119,4 @@ public:
} // End llvm namespace
-#endif // NVPTXSUBTARGET_H
+#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 069a1b9..d87693f 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
+#include "NVPTXTargetObjectFile.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -50,6 +51,7 @@ void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -64,6 +66,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
+ initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
}
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
@@ -72,10 +75,13 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
+NVPTXTargetMachine::~NVPTXTargetMachine() {}
+
void NVPTXTargetMachine32::anchor() {}
NVPTXTargetMachine32::NVPTXTargetMachine32(
@@ -119,6 +125,14 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
+void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our NVPTX pass. This
+ // allows the NVPTX pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createNVPTXTargetTransformInfoPass(this));
+}
+
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index a7a1c8f..a726bd1 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_TARGETMACHINE_H
-#define NVPTX_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
#include "NVPTXSubtarget.h"
#include "ManagedStringPool.h"
@@ -25,6 +25,7 @@ namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
NVPTXSubtarget Subtarget;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
@@ -35,27 +36,9 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const NVPTXInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const NVPTXRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
-
- const NVPTXTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
+ ~NVPTXTargetMachine() override;
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
+ const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
@@ -63,17 +46,17 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- // Emission of machine code through JITCodeEmitter is not supported.
- bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
- bool = true) override {
- return true;
- }
-
// Emission of machine code through MCJIT is not supported.
bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
bool = true) override {
return true;
}
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ /// \brief Register NVPTX analysis passes with a pass manager.
+ void addAnalysisPasses(PassManagerBase &PM) override;
}; // NVPTXTargetMachine.
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 0b438c5..00ceca5 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
-#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H
#include "NVPTXSection.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -87,7 +87,8 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
- const MCSection *getSectionForConstant(SectionKind Kind) const override {
+ const MCSection *getSectionForConstant(SectionKind Kind,
+ const Constant *C) const override {
return ReadOnlySection;
}
@@ -97,6 +98,9 @@ public:
return DataSection;
}
+ const MCSection *
+ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const override;
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
new file mode 100644
index 0000000..b09d0d4
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -0,0 +1,115 @@
+//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// NVPTX target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "NVPTXtti"
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't have a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeNVPTXTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo {
+ const NVPTXTargetLowering *TLI;
+public:
+ NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ NVPTXTTI(const NVPTXTargetMachine *TM)
+ : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) {
+ initializeNVPTXTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void initializePass() override { pushTTIStack(this); }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ void *getAdjustedAnalysisPointer(const void *ID) override {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo *)this;
+ return this;
+ }
+
+ bool hasBranchDivergence() const override;
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+ OperandValueKind Opd2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const override;
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti",
+ "NVPTX Target Transform Info", true, true, false)
+char NVPTXTTI::ID = 0;
+
+ImmutablePass *
+llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) {
+ return new NVPTXTTI(TM);
+}
+
+bool NVPTXTTI::hasBranchDivergence() const { return true; }
+
+unsigned NVPTXTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+ switch (ISD) {
+ default:
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::XOR:
+ case ISD::OR:
+ case ISD::AND:
+ // The machine code (SASS) simulates an i64 with two i32. Therefore, we
+ // estimate that arithmetic operations on i64 are twice as expensive as
+ // those on types that can fit into one machine register.
+ if (LT.second.SimpleTy == MVT::i64)
+ return 2 * LT.first;
+ // Delegate other cases to the basic TTI.
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ }
+}
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index a9fd190b..5caa8bd 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -90,11 +90,11 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
return;
if ((*annotationCache).find(m) != (*annotationCache).end())
- (*annotationCache)[m][gv] = tmp;
+ (*annotationCache)[m][gv] = std::move(tmp);
else {
global_val_annot_t tmp1;
- tmp1[gv] = tmp;
- (*annotationCache)[m] = tmp1;
+ tmp1[gv] = std::move(tmp);
+ (*annotationCache)[m] = std::move(tmp1);
}
}
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 446bfa1..7e2ce73 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXUTILITIES_H
-#define NVPTXUTILITIES_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h
index d1d1171..1915dac 100644
--- a/lib/Target/NVPTX/NVPTXutil.h
+++ b/lib/Target/NVPTX/NVPTXutil.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_UTIL_H
-#define LLVM_TARGET_NVPTX_UTIL_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 2f562ca..06bb968 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -213,16 +214,12 @@ struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
const MCInstrInfo &MII;
bool IsPPC64;
bool IsDarwin;
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
- bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
bool isPPC64() const { return IsPPC64; }
bool isDarwin() const { return IsDarwin; }
@@ -244,10 +241,12 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseDirectiveTC(unsigned Size, SMLoc L);
bool ParseDirectiveMachine(SMLoc L);
bool ParseDarwinDirectiveMachine(SMLoc L);
+ bool ParseDirectiveAbiVersion(SMLoc L);
+ bool ParseDirectiveLocalEntry(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
@@ -263,9 +262,8 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
+ const MCInstrInfo &_MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -294,6 +292,7 @@ struct PPCOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
Immediate,
+ ContextImmediate,
Expression,
TLSRegister
} Kind;
@@ -338,6 +337,7 @@ public:
Tok = o.Tok;
break;
case Immediate:
+ case ContextImmediate:
Imm = o.Imm;
break;
case Expression:
@@ -362,6 +362,16 @@ public:
assert(Kind == Immediate && "Invalid access!");
return Imm.Val;
}
+ int64_t getImmS16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ if (Kind == Immediate)
+ return Imm.Val;
+ return static_cast<int16_t>(Imm.Val);
+ }
+ int64_t getImmU16Context() const {
+ assert((Kind == Immediate || Kind == ContextImmediate) && "Invalid access!");
+ return Imm.Val;
+ }
const MCExpr *getExpr() const {
assert(Kind == Expression && "Invalid access!");
@@ -406,22 +416,73 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
- bool isU16Imm() const { return Kind == Expression ||
- (Kind == Immediate && isUInt<16>(getImm())); }
- bool isS16Imm() const { return Kind == Expression ||
- (Kind == Immediate && isInt<16>(getImm())); }
+ bool isU6ImmX2() const { return Kind == Immediate &&
+ isUInt<6>(getImm()) &&
+ (getImm() & 1) == 0; }
+ bool isU7ImmX4() const { return Kind == Immediate &&
+ isUInt<7>(getImm()) &&
+ (getImm() & 3) == 0; }
+ bool isU8ImmX8() const { return Kind == Immediate &&
+ isUInt<8>(getImm()) &&
+ (getImm() & 7) == 0; }
+ bool isU16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isUInt<16>(getImmU16Context());
+ default:
+ return false;
+ }
+ }
+ bool isS16Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<16>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
bool isS16ImmX4() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
- bool isS17Imm() const { return Kind == Expression ||
- (Kind == Immediate && isInt<17>(getImm())); }
+ bool isS17Imm() const {
+ switch (Kind) {
+ case Expression:
+ return true;
+ case Immediate:
+ case ContextImmediate:
+ return isInt<17>(getImmS16Context());
+ default:
+ return false;
+ }
+ }
bool isTLSReg() const { return Kind == TLSRegister; }
- bool isDirectBr() const { return Kind == Expression ||
- (Kind == Immediate && isInt<26>(getImm()) &&
- (getImm() & 3) == 0); }
+ bool isDirectBr() const {
+ if (Kind == Expression)
+ return true;
+ if (Kind != Immediate)
+ return false;
+ // Operand must be 64-bit aligned, signed 27-bit immediate.
+ if ((getImm() & 3) != 0)
+ return false;
+ if (isInt<26>(getImm()))
+ return true;
+ if (!IsPPC64) {
+ // In 32-bit mode, large 32-bit quantities wrap around.
+ if (isUInt<32>(getImm()) && isInt<26>(static_cast<int32_t>(getImm())))
+ return true;
+ }
+ return false;
+ }
bool isCondBr() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
@@ -526,6 +587,36 @@ public:
Inst.addOperand(MCOperand::CreateExpr(getExpr()));
}
+ void addS16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::CreateImm(getImmS16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ break;
+ }
+ }
+
+ void addU16ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ switch (Kind) {
+ case Immediate:
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ break;
+ case ContextImmediate:
+ Inst.addOperand(MCOperand::CreateImm(getImmU16Context()));
+ break;
+ default:
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ break;
+ }
+ }
+
void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (Kind == Immediate)
@@ -566,9 +657,9 @@ public:
// explicitly.
void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
- Op->Tok.Data = (const char *)(Op.get() + 1);
+ Op->Tok.Data = reinterpret_cast<const char *>(Op.get() + 1);
Op->Tok.Length = Str.size();
- std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
+ std::memcpy(const_cast<char *>(Op->Tok.Data), Str.data(), Str.size());
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsPPC64 = IsPPC64;
@@ -607,6 +698,16 @@ public:
}
static std::unique_ptr<PPCOperand>
+ CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(ContextImmediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -615,6 +716,12 @@ public:
if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
return CreateTLSReg(SRE, S, E, IsPPC64);
+ if (const PPCMCExpr *TE = dyn_cast<PPCMCExpr>(Val)) {
+ int64_t Res;
+ if (TE->EvaluateAsConstant(Res))
+ return CreateContextImm(Res, S, E, IsPPC64);
+ }
+
return CreateExpr(Val, S, E, IsPPC64);
}
};
@@ -627,6 +734,7 @@ void PPCOperand::print(raw_ostream &OS) const {
OS << "'" << getToken() << "'";
break;
case Immediate:
+ case ContextImmediate:
OS << getImm();
break;
case Expression:
@@ -638,6 +746,29 @@ void PPCOperand::print(raw_ostream &OS) const {
}
}
+static void
+addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
+ if (Op.isImm()) {
+ Inst.addOperand(MCOperand::CreateImm(-Op.getImm()));
+ return;
+ }
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCUnaryExpr *UnExpr = dyn_cast<MCUnaryExpr>(Expr)) {
+ if (UnExpr->getOpcode() == MCUnaryExpr::Minus) {
+ Inst.addOperand(MCOperand::CreateExpr(UnExpr->getSubExpr()));
+ return;
+ }
+ } else if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+ if (BinExpr->getOpcode() == MCBinaryExpr::Sub) {
+ const MCExpr *NE = MCBinaryExpr::CreateSub(BinExpr->getRHS(),
+ BinExpr->getLHS(), Ctx);
+ Inst.addOperand(MCOperand::CreateExpr(NE));
+ return;
+ }
+ }
+ Inst.addOperand(MCOperand::CreateExpr(MCUnaryExpr::CreateMinus(Expr, Ctx)));
+}
+
void PPCAsmParser::ProcessInstruction(MCInst &Inst,
const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
@@ -653,41 +784,37 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
}
case PPC::SUBI: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDI);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBIS: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDIS);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBIC: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDIC);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
case PPC::SUBICo: {
MCInst TmpInst;
- int64_t N = Inst.getOperand(2).getImm();
TmpInst.setOpcode(PPC::ADDICo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
- TmpInst.addOperand(MCOperand::CreateImm(-N));
+ addNegOperand(TmpInst, Inst.getOperand(2), getContext());
Inst = TmpInst;
break;
}
@@ -921,7 +1048,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
@@ -939,7 +1066,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "unrecognized instruction mnemonic");
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -995,6 +1122,7 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
bool PPCAsmParser::
ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
@@ -1176,6 +1304,7 @@ ParseExpression(const MCExpr *&EVal) {
/// for this to be done at a higher level.
bool PPCAsmParser::
ParseDarwinExpression(const MCExpr *&EVal) {
+ MCAsmParser &Parser = getParser();
PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
switch (getLexer().getKind()) {
default:
@@ -1218,6 +1347,7 @@ ParseDarwinExpression(const MCExpr *&EVal) {
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
@@ -1412,6 +1542,10 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
if (IDVal == ".machine")
return ParseDirectiveMachine(DirectiveID.getLoc());
+ if (IDVal == ".abiversion")
+ return ParseDirectiveAbiVersion(DirectiveID.getLoc());
+ if (IDVal == ".localentry")
+ return ParseDirectiveLocalEntry(DirectiveID.getLoc());
} else {
if (IDVal == ".machine")
return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
@@ -1422,6 +1556,7 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -1446,6 +1581,7 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
/// ParseDirectiveTC
/// ::= .tc [ symbol (, expression)* ]
bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
// Skip TC symbol, which is only used with XCOFF.
while (getLexer().isNot(AsmToken::EndOfStatement)
&& getLexer().isNot(AsmToken::Comma))
@@ -1466,6 +1602,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
/// ParseDirectiveMachine (ELF platforms)
/// ::= .machine [ cpu | "push" | "pop" ]
bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier) &&
getLexer().isNot(AsmToken::String)) {
Error(L, "unexpected token in directive");
@@ -1500,6 +1637,7 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
/// ParseDarwinDirectiveMachine (Mach-o platforms)
/// ::= .machine cpu-identifier
bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier) &&
getLexer().isNot(AsmToken::String)) {
Error(L, "unexpected token in directive");
@@ -1534,6 +1672,64 @@ bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
return false;
}
+/// ParseDirectiveAbiVersion
+/// ::= .abiversion constant-expression
+bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
+ int64_t AbiVersion;
+ if (getParser().parseAbsoluteExpression(AbiVersion)){
+ Error(L, "expected constant expression");
+ return false;
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitAbiVersion(AbiVersion);
+
+ return false;
+}
+
+/// ParseDirectiveLocalEntry
+/// ::= .localentry symbol, expression
+bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name)) {
+ Error(L, "expected identifier in directive");
+ return false;
+ }
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(L, "expected expression");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitLocalEntry(Sym, Expr);
+
+ return false;
+}
+
+
+
/// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
@@ -1558,6 +1754,10 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
case MCK_1: ImmVal = 1; break;
case MCK_2: ImmVal = 2; break;
case MCK_3: ImmVal = 3; break;
+ case MCK_4: ImmVal = 4; break;
+ case MCK_5: ImmVal = 5; break;
+ case MCK_6: ImmVal = 6; break;
+ case MCK_7: ImmVal = 7; break;
default: return Match_InvalidOperand;
}
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index ea4de63..47a9474 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -2,9 +2,8 @@ set(LLVM_TARGET_DEFINITIONS PPC.td)
tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
@@ -16,7 +15,6 @@ add_public_tablegen_target(PowerPCCommonTableGen)
add_llvm_target(PowerPCCodeGen
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
- PPCCodeEmitter.cpp
PPCCTRLoops.cpp
PPCHazardRecognizers.cpp
PPCInstrInfo.cpp
@@ -24,7 +22,6 @@ add_llvm_target(PowerPCCodeGen
PPCISelLowering.cpp
PPCFastISel.cpp
PPCFrameLowering.cpp
- PPCJITInfo.cpp
PPCMCInstLower.cpp
PPCMachineFunctionInfo.cpp
PPCRegisterInfo.cpp
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
index c1011ff..ea3e7ea 100644
--- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = PowerPCDisassembler
parent = PowerPC
-required_libraries = MC PowerPCDesc PowerPCInfo Support
+required_libraries = MCDisassembler PowerPCInfo Support
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a2305a9..5251b60 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -12,7 +12,6 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -28,13 +27,10 @@ public:
: MCDisassembler(STI, Ctx) {}
virtual ~PPCDisassembler() {}
- // Override MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -325,23 +321,19 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
#include "PPCGenDisassemblerTables.inc"
DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address, raw_ostream &OS,
+ raw_ostream &CS) const {
// Get the four bytes of the instruction.
- uint8_t Bytes[4];
Size = 4;
- if (Region.readBytes(Address, Size, Bytes) == -1) {
+ if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
// The instruction is big-endian encoded.
- uint32_t Inst = (Bytes[0] << 24) |
- (Bytes[1] << 16) |
- (Bytes[2] << 8) |
- (Bytes[3] << 0);
+ uint32_t Inst =
+ (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 7279b09..670c40a 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
@@ -207,6 +208,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 15 && "Invalid u4imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
@@ -260,7 +268,7 @@ void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
if (!MI->getOperand(OpNo).isImm())
return printOperand(MI, OpNo, O);
- O << (int)MI->getOperand(OpNo).getImm()*4;
+ O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
}
@@ -308,10 +316,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- printBranchOperand(MI, OpNo, O);
+ // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+ // come at the _end_ of the expression.
+ const MCOperand &Op = MI->getOperand(OpNo);
+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
+ O << refExp.getSymbol().getName();
O << '(';
printOperand(MI, OpNo+1, O);
O << ')';
+ if (refExp.getKind() != MCSymbolRefExpr::VK_None)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 211a628..b21aa22 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCINSTPRINTER_H
-#define PPCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
@@ -44,6 +44,7 @@ public:
raw_ostream &O, const char *Modifier = nullptr);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 12584be..c54d5e7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -9,7 +9,9 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
@@ -128,6 +130,30 @@ public:
}
}
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override {
+ switch ((PPC::Fixups)Fixup.getKind()) {
+ default: break;
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ // If the target symbol has a local entry point we must not attempt
+ // to resolve the fixup directly. Emit a relocation and leave
+ // resolution of the final target address to the linker.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol());
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
+ IsResolved = false;
+ }
+ break;
+ }
+ }
+
bool mayNeedRelaxation(const MCInst &Inst) const override {
// FIXME.
return false;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index cd3b4f4..b817394 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
@@ -23,13 +24,12 @@ namespace {
public:
PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
- virtual ~PPCELFObjectWriter();
protected:
- virtual unsigned getRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
+
+ bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const override;
};
}
@@ -38,9 +38,6 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
/*HasRelocationAddend*/ true) {}
-PPCELFObjectWriter::~PPCELFObjectWriter() {
-}
-
static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
const MCFixup &Fixup) {
const MCExpr *Expr = Fixup.getValue();
@@ -69,10 +66,9 @@ static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
llvm_unreachable("unknown PPCMCExpr kind");
}
-unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const
-{
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
// determine the type of the relocation
@@ -83,7 +79,18 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
- Type = ELF::R_PPC_REL24;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_PPC_PLTREL24;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LOCAL:
+ Type = ELF::R_PPC_LOCAL24PC;
+ break;
+ }
break;
case PPC::fixup_ppc_brcond14:
case PPC::fixup_ppc_brcond14abs:
@@ -224,7 +231,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
- Type = ELF::R_PPC64_GOT_TLSGD16;
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSGD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSGD16;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
Type = ELF::R_PPC64_GOT_TLSGD16_LO;
@@ -236,7 +246,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_GOT_TLSGD16_HA;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
- Type = ELF::R_PPC64_GOT_TLSLD16;
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSLD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSLD16;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
Type = ELF::R_PPC64_GOT_TLSLD16_LO;
@@ -332,13 +345,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_PPC_TLSGD:
- Type = ELF::R_PPC64_TLSGD;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSGD;
+ else
+ Type = ELF::R_PPC_TLSGD;
break;
case MCSymbolRefExpr::VK_PPC_TLSLD:
- Type = ELF::R_PPC64_TLSLD;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSLD;
+ else
+ Type = ELF::R_PPC_TLSLD;
break;
case MCSymbolRefExpr::VK_PPC_TLS:
- Type = ELF::R_PPC64_TLS;
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLS;
+ else
+ Type = ELF::R_PPC_TLS;
break;
}
break;
@@ -373,10 +395,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
return Type;
}
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- return getRelocTypeInner(Target, Fixup, IsPCRel);
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const {
+ switch (Type) {
+ default:
+ return false;
+
+ case ELF::R_PPC_REL24:
+ // If the target symbol has a local entry point, we must keep the
+ // target symbol to preserve that information for the linker.
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(SD) << 2;
+ return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
+ }
}
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 68de8c1..ae43e59 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_PPC_PPCFIXUPKINDS_H
-#define LLVM_PPC_PPCFIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index b95a2ac..893aae3 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -42,9 +42,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
UseIntegratedAssembler = true;
}
-void PPCLinuxMCAsmInfo::anchor() { }
+void PPCELFMCAsmInfo::anchor() { }
-PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
+PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
@@ -64,7 +64,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
DollarIsPC = true;
// Set up DWARF directives
- HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
MinInstAlignment = 4;
// Exceptions handling
@@ -73,6 +72,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
+ LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
if (T.getOS() == llvm::Triple::FreeBSD ||
(T.getOS() == llvm::Triple::NetBSD && !is64Bit) ||
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 754330b..9f0294d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCTARGETASMINFO_H
-#define PPCTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -26,10 +26,10 @@ class Triple;
explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
};
- class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
+ class PPCELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
public:
- explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
+ explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&);
};
} // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 435a93f..786b7fe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -66,6 +66,15 @@ public:
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -260,6 +269,54 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
}
+unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe8dis, which has the low 5-bits of (imm / 8)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 3;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe4dis, which has the low 5-bits of (imm / 4)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 2;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
+unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)
+ const {
+ // Encode (imm, reg) as a spe2dis, which has the low 5-bits of (imm / 2)
+ // as the displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm());
+ uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 1;
+ return reverseBits(Imm | RegBits) >> 22;
+}
+
+
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 3ac0aca..7204bef 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "PPCFixupKinds.h"
#include "PPCMCExpr.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -52,40 +53,56 @@ void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
}
bool
+PPCMCExpr::EvaluateAsConstant(int64_t &Res) const {
+ MCValue Value;
+
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, nullptr, nullptr))
+ return false;
+
+ if (!Value.isAbsolute())
+ return false;
+
+ Res = EvaluateAsInt64(Value.getConstant());
+ return true;
+}
+
+int64_t
+PPCMCExpr::EvaluateAsInt64(int64_t Value) const {
+ switch (Kind) {
+ case VK_PPC_LO:
+ return Value & 0xffff;
+ case VK_PPC_HI:
+ return (Value >> 16) & 0xffff;
+ case VK_PPC_HA:
+ return ((Value + 0x8000) >> 16) & 0xffff;
+ case VK_PPC_HIGHER:
+ return (Value >> 32) & 0xffff;
+ case VK_PPC_HIGHERA:
+ return ((Value + 0x8000) >> 32) & 0xffff;
+ case VK_PPC_HIGHEST:
+ return (Value >> 48) & 0xffff;
+ case VK_PPC_HIGHESTA:
+ return ((Value + 0x8000) >> 48) & 0xffff;
+ case VK_PPC_None:
+ break;
+ }
+ llvm_unreachable("Invalid kind!");
+}
+
+bool
PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
MCValue Value;
- if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout))
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout, Fixup))
return false;
if (Value.isAbsolute()) {
- int64_t Result = Value.getConstant();
- switch (Kind) {
- default:
- llvm_unreachable("Invalid kind!");
- case VK_PPC_LO:
- Result = Result & 0xffff;
- break;
- case VK_PPC_HI:
- Result = (Result >> 16) & 0xffff;
- break;
- case VK_PPC_HA:
- Result = ((Result + 0x8000) >> 16) & 0xffff;
- break;
- case VK_PPC_HIGHER:
- Result = (Result >> 32) & 0xffff;
- break;
- case VK_PPC_HIGHERA:
- Result = ((Result + 0x8000) >> 32) & 0xffff;
- break;
- case VK_PPC_HIGHEST:
- Result = (Result >> 48) & 0xffff;
- break;
- case VK_PPC_HIGHESTA:
- Result = ((Result + 0x8000) >> 48) & 0xffff;
- break;
- }
+ int64_t Result = EvaluateAsInt64(Value.getConstant());
+ if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) &&
+ (Result >= 0x8000))
+ return false;
Res = MCValue::get(Result);
} else {
if (!Layout)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index bca4085..f0a6bb9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCMCEXPR_H
-#define PPCMCEXPR_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCEXPR_H
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
@@ -34,6 +34,8 @@ private:
const MCExpr *Expr;
bool IsDarwin;
+ int64_t EvaluateAsInt64(int64_t Value) const;
+
explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
bool _IsDarwin)
: Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
@@ -78,7 +80,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
@@ -87,6 +90,8 @@ public:
// There are no TLS PPCMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+ bool EvaluateAsConstant(int64_t &Res) const;
+
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 7057797..00be8f4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -16,12 +16,16 @@
#include "PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
@@ -75,7 +79,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
if (TheTriple.isOSDarwin())
MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
else
- MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple);
+ MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
@@ -125,11 +129,20 @@ public:
void emitMachine(StringRef CPU) override {
OS << "\t.machine " << CPU << '\n';
}
+ void emitAbiVersion(int AbiVersion) override {
+ OS << "\t.abiversion " << AbiVersion << '\n';
+ }
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n';
+ }
};
class PPCTargetELFStreamer : public PPCTargetStreamer {
public:
PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
Streamer.EmitSymbolValue(&S, 8);
@@ -138,6 +151,39 @@ public:
// FIXME: Is there anything to do in here or does this directive only
// limit the parser?
}
+ void emitAbiVersion(int AbiVersion) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ Flags &= ~ELF::EF_PPC64_ABI;
+ Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
+ MCA.setELFHeaderEFlags(Flags);
+ }
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S);
+
+ int64_t Res;
+ if (!LocalOffset->EvaluateAsAbsolute(Res, MCA))
+ report_fatal_error(".localentry expression must be absolute.");
+
+ unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
+ if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
+ report_fatal_error(".localentry expression cannot be encoded.");
+
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+ Other |= Encoded;
+ MCELF::setOther(Data, Other >> 2);
+
+ // For GAS compatibility, unless we already saw a .abiversion directive,
+ // set e_flags to indicate ELFv2 ABI.
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ if ((Flags & ELF::EF_PPC64_ABI) == 0)
+ MCA.setELFHeaderEFlags(Flags | 2);
+ }
};
class PPCTargetMachOStreamer : public PPCTargetStreamer {
@@ -150,25 +196,27 @@ public:
// FIXME: We should update the CPUType, CPUSubType in the Object file if
// the new values are different from the defaults.
}
+ void emitAbiVersion(int AbiVersion) override {
+ llvm_unreachable("Unknown pseudo-op: .abiversion");
+ }
+ void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) override {
+ llvm_unreachable("Unknown pseudo-op: .localentry");
+ }
};
}
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
if (Triple(TT).isOSDarwin()) {
MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
new PPCTargetMachOStreamer(*S);
return S;
}
- MCStreamer *S =
- createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
new PPCTargetELFStreamer(*S);
return S;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 474395b..68f7f7a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCMCTARGETDESC_H
-#define PPCMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index cff27ba..df2f14a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -80,7 +80,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
}
/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
-/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+/// Outline based on PPCELFObjectWriter::GetRelocType().
static unsigned getRelocType(const MCValue &Target,
const MCFixupKind FixupKind, // from
// Fixup.getKind()
@@ -360,7 +360,7 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->Symbol->isUndefined())
+ if (!SD->getSymbol().isUndefined())
FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 10e328a..6075631 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
-#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCPREDICATES_H
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index c966748..cf516f4 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -13,7 +13,7 @@ TARGET = PPC
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
- PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
+ PPCGenAsmWriter.inc \
PPCGenInstrInfo.inc PPCGenDAGISel.inc \
PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index c42c5be..8fb33df 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_H
-#define LLVM_TARGET_POWERPC_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPC_H
+#define LLVM_LIB_TARGET_POWERPC_PPC_H
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
@@ -26,7 +26,6 @@ namespace llvm {
class PassRegistry;
class FunctionPass;
class ImmutablePass;
- class JITCodeEmitter;
class MachineInstr;
class AsmPrinter;
class MCInst;
@@ -41,8 +40,6 @@ namespace llvm {
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
- FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
- JITCodeEmitter &MCE);
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
@@ -60,10 +57,11 @@ namespace llvm {
// PPC Specific MachineOperand flags.
MO_NO_FLAG,
- /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the "FOO$stub" symbol. This is used for calls
- /// and jumps to external functions on Tiger and earlier.
- MO_DARWIN_STUB = 1,
+ /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is
+ /// used for calls and jumps to external functions on Tiger and earlier, and
+ /// for PIC calls on Linux and ELF systems.
+ MO_PLT_OR_STUB = 1,
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
@@ -95,7 +93,12 @@ namespace llvm {
MO_TOC_LO = 7 << 4,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
- MO_TLS = 8 << 4
+ MO_TLS = 8 << 4,
+
+ // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
+ // call sequences.
+ MO_TLSLD = 9 << 4,
+ MO_TLSGD = 10 << 4
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index a9842b2..46d56a4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -56,6 +56,8 @@ def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
"Use condition-register bits individually">;
def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
"Enable Altivec instructions">;
+def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true",
+ "Enable SPE instructions">;
def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
@@ -88,11 +90,23 @@ def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
+def FeatureMSYNC : SubtargetFeature<"msync", "HasOnlyMSYNC", "true",
+ "Has only the msync instruction instead of sync",
+ [FeatureBookE]>;
+def FeatureE500 : SubtargetFeature<"e500", "IsE500", "true",
+ "Enable E500/E500mc instructions">;
+def FeaturePPC4xx : SubtargetFeature<"ppc4xx", "IsPPC4xx", "true",
+ "Enable PPC 4xx instructions">;
+def FeaturePPC6xx : SubtargetFeature<"ppc6xx", "IsPPC6xx", "true",
+ "Enable PPC 6xx instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
+def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
+ "Enable POWER8 vector instructions",
+ [FeatureVSX, FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -105,7 +119,16 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
// CMPB p6, p6x, p7 cmpb
// DFP p6, p6x, p7 decimal floating-point instructions
// POPCNTB p5 through p7 popcntb and related instructions
-// VSX p7 vector-scalar instruction set
+
+//===----------------------------------------------------------------------===//
+// ABI Selection //
+//===----------------------------------------------------------------------===//
+
+def FeatureELFv1 : SubtargetFeature<"elfv1", "TargetABI", "PPC_ABI_ELFv1",
+ "Use the ELFv1 ABI">;
+
+def FeatureELFv2 : SubtargetFeature<"elfv2", "TargetABI", "PPC_ABI_ELFv2",
+ "Use the ELFv2 ABI">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -178,10 +201,12 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+ FeatureBookE, FeatureMSYNC,
+ DeprecatedMFTB]>;
def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE, DeprecatedMFTB]>;
+ FeatureBookE, FeatureMSYNC,
+ DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fd044d9..5648873 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
+#include "PPCMachineFunctionInfo.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCSubtarget.h"
@@ -27,10 +28,12 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -100,9 +103,11 @@ namespace {
}
bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
void EmitFunctionEntryLabel() override;
+ void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
};
@@ -142,7 +147,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -270,6 +275,18 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
printOperand(MI, OpNo, O);
return false;
}
+ case 'U': // Print 'u' for update form.
+ case 'X': // Print 'x' for indexed form.
+ {
+ // FIXME: Currently for PowerPC memory operands are always loaded
+ // into a register, so we never get an update or indexed form.
+ // This is bad even for offset forms, since even if we know we
+ // have a value in -16(r1), we will generate a load into r<n>
+ // and then load from 0(r<n>). Until that issue is fixed,
+ // tolerate 'U' and 'X' but don't output anything.
+ assert(MI->getOperand(OpNo).isReg());
+ return false;
+ }
}
}
@@ -285,7 +302,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
// To avoid name clash check if the name already exists.
@@ -306,12 +323,35 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
+ const Module *M = MF->getFunction()->getParent();
+ PICLevel::Level PL = M->getPICLevel();
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
+ case PPC::MoveGOTtoLR: {
+ // Transform %LR = MoveGOTtoLR
+ // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4
+ // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding
+ // _GLOBAL_OFFSET_TABLE_) has exactly one instruction:
+ // blrl
+ // This will return the pointer to _GLOBAL_OFFSET_TABLE_@local
+ MCSymbol *GOTSymbol =
+ OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol,
+ MCSymbolRefExpr::VK_PPC_LOCAL,
+ OutContext),
+ MCConstantExpr::Create(4, OutContext),
+ OutContext);
+
+ // Emit the 'bl'.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL).addExpr(OffsExpr));
+ return;
+ }
case PPC::MovePCtoLR:
case PPC::MovePCtoLR8: {
// Transform %LR = MovePCtoLR
@@ -330,11 +370,85 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitLabel(PICBase);
return;
}
+ case PPC::UpdateGBR: {
+ // Transform %Rd = UpdateGBR(%Rt, %Ri)
+ // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri)
+ // add %Rd, %Rt, %Ri
+ // Get the offset from the GOT Base Register to the GOT
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+ MCSymbol *PICOffset =
+ MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ TmpInst.setOpcode(PPC::LWZ);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(),
+ MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCOperand TR = TmpInst.getOperand(1);
+ const MCOperand PICR = TmpInst.getOperand(0);
+
+ // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri)
+ TmpInst.getOperand(1) =
+ MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp, PB, OutContext));
+ TmpInst.getOperand(0) = TR;
+ TmpInst.getOperand(2) = PICR;
+ EmitToStreamer(OutStreamer, TmpInst);
+
+ TmpInst.setOpcode(PPC::ADD4);
+ TmpInst.getOperand(0) = PICR;
+ TmpInst.getOperand(1) = TR;
+ TmpInst.getOperand(2) = PICR;
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LWZtoc: {
+ // Transform %R3 = LWZtoc <ga:@min1>, %R2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
+
+ // Change the opcode to LWZ, and the global address operand to be a
+ // reference to the GOT entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LWZ);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+
+ if (PL == PICLevel::Small) {
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_GOT,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ } else {
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+ OutContext);
+ Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ }
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
case PPC::LDtocJTI:
case PPC::LDtocCPT:
+ case PPC::LDtocBA:
case PPC::LDtoc: {
// Transform %X3 = LDtoc <ga:@min1>, %X2
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD, and the global address operand to be a
// reference to the TOC entry we will synthesize later.
@@ -342,7 +456,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(1);
// Map symbol -> label of TOC entry
- assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress());
MCSymbol *MOSymbol = nullptr;
if (MO.isGlobal())
MOSymbol = getSymbol(MO.getGlobal());
@@ -350,6 +464,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
@@ -363,7 +479,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::ADDIStocHA: {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to ADDIS8. If the global address is external, has
// common linkage, is a non-local function address, or is a jump table
@@ -371,7 +487,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
const MachineOperand &MO = MI->getOperand(2);
- assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
@@ -391,9 +508,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
+ else if (MO.isBlockAddress())
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
- MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
+ MO.isJTI() || MO.isBlockAddress() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -405,19 +525,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::LDtocL: {
// Transform %Xd = LDtocL <ga:@sym>, %Xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD. If the global address is external, has
// common linkage, or is a jump table address, then reference the
// associated TOC entry. Otherwise reference the symbol directly.
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
- assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() ||
+ MO.isBlockAddress()) &&
"Invalid operand for LDtocL!");
MCSymbol *MOSymbol = nullptr;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isBlockAddress()) {
+ MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
if (TM.getCodeModel() == CodeModel::Large)
@@ -442,7 +567,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDItocL: {
// Transform %Xd = ADDItocL %Xs, <ga:@sym>
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to ADDI8. If the global address is external, then
// generate a TOC entry and reference that. Otherwise reference the
@@ -493,7 +618,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::LDgotTprelL:
case PPC::LDgotTprelL32: {
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
// Change the opcode to LD.
TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
@@ -508,6 +633,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case PPC::PPC32PICGOT: {
+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ MCSymbol *GOTRef = OutContext.CreateTempSymbol();
+ MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
+ MCSymbolRefExpr::Create(GOTRef, OutContext),
+ OutContext);
+ OutStreamer.EmitLabel(GOTRef);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(NextInstr);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
case PPC::PPC32GOT: {
MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
const MCExpr *SymGotTlsL =
@@ -541,40 +694,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsGD));
return;
}
- case PPC::ADDItlsgdL: {
+ case PPC::ADDItlsgdL:
// Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDItlsgdL32: {
+ // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsgd
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
- OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymGotTlsGD));
- return;
- }
- case PPC::GETtlsADDR: {
- // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
- StringRef Name = "__tls_get_addr";
- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
- const MCSymbolRefExpr *TlsRef =
- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- const MachineOperand &MO = MI->getOperand(2);
- const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymVar =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
return;
}
case PPC::ADDIStlsldHA: {
@@ -593,72 +731,63 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsLD));
return;
}
- case PPC::ADDItlsldL: {
+ case PPC::ADDItlsldL:
// Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDItlsldL32: {
+ // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsld
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
- OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymGotTlsLD));
- return;
- }
- case PPC::GETtlsldADDR: {
- // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
-
- StringRef Name = "__tls_get_addr";
- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
- const MCSymbolRefExpr *TlsRef =
- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- const MachineOperand &MO = MI->getOperand(2);
- const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = getSymbol(GValue);
- const MCExpr *SymVar =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS)
- .addExpr(TlsRef)
- .addExpr(SymVar));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
return;
}
- case PPC::ADDISdtprelHA: {
+ case PPC::ADDISdtprelHA:
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDISdtprelHA32: {
+ // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
+ // Into: %Rd = ADDIS %R3, sym@dtprel@ha
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(PPC::X3)
- .addExpr(SymDtprel));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
+ .addExpr(SymDtprel));
return;
}
- case PPC::ADDIdtprelL: {
+ case PPC::ADDIdtprelL:
// Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
// Into: %Xd = ADDI8 %Xs, sym@dtprel@l
- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ case PPC::ADDIdtprelL32: {
+ // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@dtprel@l
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8)
- .addReg(MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addExpr(SymDtprel));
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
return;
}
case PPC::MFOCRF:
@@ -713,14 +842,77 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
- LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
EmitToStreamer(OutStreamer, TmpInst);
}
+void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget.isELFv2ABI()) {
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitAbiVersion(2);
+ }
+
+ if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ if (M.getPICLevel() == PICLevel::Small)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly()));
+
+ MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".LTOC"));
+ MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
+
+ OutStreamer.EmitLabel(CurrentPos);
+
+ // The GOT pointer points to the middle of the GOT, in order to reference the
+ // entire 64kB range. 0x8000 is the midpoint.
+ const MCExpr *tocExpr =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext),
+ MCConstantExpr::Create(0x8000, OutContext),
+ OutContext);
+
+ OutStreamer.EmitAssignment(TOCSym, tocExpr);
+
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
- if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label.
+ // linux/ppc32 - Normal entry label.
+ if (!Subtarget.isPPC64() &&
+ (TM.getRelocationModel() != Reloc::PIC_ ||
+ MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
return AsmPrinter::EmitFunctionEntryLabel();
-
+
+ if (!Subtarget.isPPC64()) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+ if (PPCFI->usesPICBase()) {
+ MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ OutStreamer.EmitLabel(RelocSymbol);
+
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".LTOC")),
+ OutContext),
+ MCSymbolRefExpr::Create(PICBase, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(CurrentFnSym);
+ return;
+ } else
+ return AsmPrinter::EmitFunctionEntryLabel();
+ }
+
+ // ELFv2 ABI - Normal entry label.
+ if (Subtarget.isELFv2ABI())
+ return AsmPrinter::EmitFunctionEntryLabel();
+
// Emit an official procedure descriptor.
MCSectionSubPair Current = OutStreamer.getCurrentSection();
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
@@ -752,15 +944,22 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
bool isPPC64 = TD->getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
- if (isPPC64 && !TOC.empty()) {
- const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
+ if (!TOC.empty()) {
+ const MCSectionELF *Section;
+
+ if (isPPC64)
+ Section = OutStreamer.getContext().getELFSection(".toc",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ else
+ Section = OutStreamer.getContext().getELFSection(".got2",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(Section);
@@ -768,8 +967,11 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
- MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
- TS.emitTCEntry(*S);
+ MCSymbol *S = I->first;
+ if (isPPC64)
+ TS.emitTCEntry(*S);
+ else
+ OutStreamer.EmitSymbolValue(S, 4);
}
}
@@ -795,6 +997,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
+/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
+void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+ // In the ELFv2 ABI, in functions that use the TOC register, we need to
+ // provide two entry points. The ABI guarantees that when calling the
+ // local entry point, r2 is set up by the caller to contain the TOC base
+ // for this function, and when calling the global entry point, r12 is set
+ // up by the caller to hold the address of the global entry point. We
+ // thus emit a prefix sequence along the following lines:
+ //
+ // func:
+ // # global entry point
+ // addis r2,r12,(.TOC.-func)@ha
+ // addi r2,r2,(.TOC.-func)@l
+ // .localentry func, .-func
+ // # local entry point, followed by function body
+ //
+ // This ensures we have r2 set up correctly while executing the function
+ // body, no matter which entry point is called.
+ if (Subtarget.isELFv2ABI()
+ // Only do all that if the function uses r2 in the first place.
+ && !MF->getRegInfo().use_empty(PPC::X2)) {
+
+ MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(GlobalEntryLabel);
+ const MCSymbolRefExpr *GlobalEntryLabelExp =
+ MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
+
+ MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
+ GlobalEntryLabelExp, OutContext);
+
+ const MCExpr *TOCDeltaHi =
+ PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
+
+ const MCExpr *TOCDeltaLo =
+ PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
+
+ MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(LocalEntryLabel);
+ const MCSymbolRefExpr *LocalEntryLabelExp =
+ MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
+ const MCExpr *LocalOffsetExp =
+ MCBinaryExpr::CreateSub(LocalEntryLabelExp,
+ GlobalEntryLabelExp, OutContext);
+
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
+ }
+}
+
/// EmitFunctionBodyEnd - Print the traceback table before the .size
/// directive.
///
@@ -886,7 +1150,8 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
bool isDarwin = Subtarget.isDarwin();
const TargetLoweringObjectFileMachO &TLOFMacho =
@@ -1022,7 +1287,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits() == 64;
// Darwin/PPC always uses mach-o.
const TargetLoweringObjectFileMachO &TLOFMacho =
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index ee90671..41594be 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-branch-select"
@@ -64,7 +65,7 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
const PPCInstrInfo *TII =
- static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index ec1e34d..5f3b176 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -214,7 +214,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
if (Function *F = CI->getCalledFunction()) {
// Most intrinsics don't become function calls, but some might.
@@ -384,10 +384,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
if (!TM)
return true;
- const TargetLowering *TLI = TM->getTargetLowering();
+ const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
- if (TLI->supportJumpTables() &&
- SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
return true;
}
}
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index d48164d..cf8fee4 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -14,9 +14,15 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A>
- : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+ : CCIf<!strconcat("static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
class CCIfNotSubtarget<string F, CCAction A>
- : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+ : CCIf<!strconcat("!static_cast<const PPCSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).",
+ F),
+ A>;
//===----------------------------------------------------------------------===//
// Return Value Calling Convention
@@ -31,13 +37,18 @@ def RetCC_PPC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ // Floating point types returned as "direct" go into F1 .. F8; note that
+ // only the ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
- CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
-
- // Vector types are always returned in V2.
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
- CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
+ // Vector types returned as "direct" go into V2 .. V9; note that only the
+ // ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;
@@ -69,10 +80,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfType<[i32], CCPromoteToType<i64>>,
CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
- CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>,
- CCIfType<[v2f64, v2i64], CCAssignToReg<[VSH2]>>
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
deleted file mode 100644
index 0875523..0000000
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
-// JIT-compile bitcode to native PowerPC.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPC.h"
-#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-namespace {
- class PPCCodeEmitter : public MachineFunctionPass {
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- MachineModuleInfo *MMI;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
-
- /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
- /// its address in the function into this pointer.
- void *MovePCtoLROffset;
- public:
-
- PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
-
- MachineRelocation GetRelocation(const MachineOperand &MO,
- unsigned RelocID) const;
-
- /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getAbsDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
- unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
-
- const char *getPassName() const override {
- return "PowerPC Machine Code Emitter";
- }
-
- /// runOnMachineFunction - emits the given MachineFunction to memory
- ///
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- /// emitBasicBlock - emits the given MachineBasicBlock to memory
- ///
- void emitBasicBlock(MachineBasicBlock &MBB);
- };
-}
-
-char PPCCodeEmitter::ID = 0;
-
-/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
-/// to the specified MCE object.
-FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new PPCCodeEmitter(TM, JCE);
-}
-
-bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
- MF.getTarget().getRelocationModel() != Reloc::Static) &&
- "JIT relocation model must be set to static or default!");
-
- MMI = &getAnalysis<MachineModuleInfo>();
- MCE.setModuleInfo(MMI);
- do {
- MovePCtoLROffset = nullptr;
- MCE.startFunction(MF);
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- emitBasicBlock(*BB);
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
- MCE.StartMachineBasicBlock(&MBB);
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
- const MachineInstr &MI = *I;
- MCE.processDebugLoc(MI.getDebugLoc(), true);
- switch (MI.getOpcode()) {
- default:
- MCE.emitWordBE(getBinaryCodeForInstr(MI));
- break;
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::EH_LABEL:
- MCE.emitLabel(MI.getOperand(0).getMCSymbol());
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- break; // pseudo opcode, no side effects
- case PPC::MovePCtoLR:
- case PPC::MovePCtoLR8:
- assert(TM.getRelocationModel() == Reloc::PIC_);
- MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
- MCE.emitWordBE(0x48000005); // bl 1
- break;
- }
- MCE.processDebugLoc(MI.getDebugLoc(), false);
- }
-}
-
-unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
- MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
- (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
-}
-
-MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
- unsigned RelocID) const {
- // If in PIC mode, we need to encode the negated address of the
- // 'movepctolr' into the unrelocated field. After relocation, we'll have
- // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
- // field, we get &gv. This doesn't happen for branch relocations, which are
- // always implicitly pc relative.
- intptr_t Cst = 0;
- if (TM.getRelocationModel() == Reloc::PIC_) {
- assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
- Cst = -(intptr_t)MovePCtoLROffset - 4;
- }
-
- if (MO.isGlobal())
- return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
- const_cast<GlobalValue *>(MO.getGlobal()),
- Cst, isa<Function>(MO.getGlobal()));
- if (MO.isSymbol())
- return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- RelocID, MO.getSymbolName(), Cst);
- if (MO.isCPI())
- return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- RelocID, MO.getIndex(), Cst);
-
- if (MO.isMBB())
- return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- RelocID, MO.getMBB());
-
- assert(MO.isJTI());
- return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- RelocID, MO.getIndex(), Cst);
-}
-
-unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
-}
-
-unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- unsigned RelocID;
- switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) {
- default: llvm_unreachable("Unsupported target operand flags!");
- case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break;
- case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break;
- }
-
- MCE.addRelocation(GetRelocation(MO, RelocID));
- return 0;
-}
-
-unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Encode (imm, reg) as a memri, which has the low 16-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
-
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isImm())
- return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
-
- // Add a fixup for the displacement field.
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
- return RegBits;
-}
-
-unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- // Encode (imm, reg) as a memrix, which has the low 14-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
-
- const MachineOperand &MO = MI.getOperand(OpNo);
- if (MO.isImm())
- return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits;
-
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
- return RegBits;
-}
-
-
-unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("TLS not supported on the old JIT.");
- return 0;
-}
-
-unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("TLS not supported on the old JIT.");
- return 0;
-}
-
-unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
-
- if (MO.isReg()) {
- // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
- // The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
- MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
- MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
- }
-
- assert(MO.isImm() &&
- "Relocation required in an instruction that we cannot encode!");
- return MO.getImm();
-}
-
-#include "PPCGenCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 92a0ec1..1149354 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -39,7 +39,7 @@
//===----------------------------------------------------------------------===//
//
// TBD:
-// FastLowerArguments: Handle simple cases.
+// fastLowerArguments: Handle simple cases.
// PPCMaterializeGV: Handle TLS.
// SelectCall: Handle function pointers.
// SelectCall: Handle multi-register return values.
@@ -92,30 +92,29 @@ class PPCFastISel final : public FastISel {
public:
explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo)
- : FastISel(FuncInfo, LibInfo),
- TM(FuncInfo.MF->getTarget()),
- TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()),
- PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
- Context(&FuncInfo.Fn->getContext()) { }
+ : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
+ TII(*TM.getSubtargetImpl()->getInstrInfo()),
+ TLI(*TM.getSubtargetImpl()->getTargetLowering()),
+ PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
+ Context(&FuncInfo.Fn->getContext()) {}
// Backend specific FastISel code.
private:
- bool TargetSelectInstruction(const Instruction *I) override;
- unsigned TargetMaterializeConstant(const Constant *C) override;
- unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool fastSelectInstruction(const Instruction *I) override;
+ unsigned fastMaterializeConstant(const Constant *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
- bool FastLowerArguments() override;
- unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
- unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ bool fastLowerArguments() override;
+ unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+ unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm);
- unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
- unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
@@ -153,7 +152,7 @@ class PPCFastISel final : public FastISel {
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
- unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+ unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
@@ -560,7 +559,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -707,7 +706,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- FastEmitBranch(FBB, DbgLoc);
+ fastEmitBranch(FBB, DbgLoc);
FuncInfo.MBB->addSuccessor(TBB);
return true;
@@ -715,7 +714,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- FastEmitBranch(Target, DbgLoc);
+ fastEmitBranch(Target, DbgLoc);
return true;
}
@@ -838,7 +837,7 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
return false;
// No code is generated for a FP extend.
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -860,12 +859,12 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
.addReg(SrcReg);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
@@ -898,10 +897,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
if (SrcVT == MVT::i32) {
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
- Addr.Offset = 4;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
} else if (PPCSubTarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
- Addr.Offset = 4;
+ Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
}
}
@@ -979,13 +978,13 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(FPReg);
- UpdateValueMap(I, DestReg);
+ updateValueMap(I, DestReg);
return true;
}
// Move the floating-point value in SrcReg into an integer destination
// register, and return the register (or zero if we can't handle it).
-// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// FIXME: When direct register moves are implemented (see PowerISA 2.07),
// those should be used instead of moving via a stack slot when the
// subtarget permits.
unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
@@ -1080,7 +1079,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (IntReg == 0)
return false;
- UpdateValueMap(I, IntReg);
+ updateValueMap(I, IntReg);
return true;
}
@@ -1169,7 +1168,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
ResultReg)
.addReg(SrcReg1)
.addImm(Imm);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -1185,7 +1184,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1200,10 +1199,12 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
unsigned &NumBytes,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
CCInfo.AllocateStack(LinkageSize, 8);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
@@ -1232,6 +1233,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
NumBytes = std::max(NumBytes, LinkageSize + 64);
// Issue CALLSEQ_START.
@@ -1318,7 +1320,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
// any real difficulties there.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
CCValAssign &VA = RVLocs[0];
assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
@@ -1364,7 +1366,7 @@ void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
assert(ResultReg && "ResultReg unset!");
UsedRegs.push_back(SourcePhysReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
}
}
@@ -1408,7 +1410,7 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
RetVT != MVT::f64) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
if (RVLocs.size() > 1)
return false;
@@ -1498,6 +1500,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (PPCSubTarget->isELFv2ABI())
+ MIB.addReg(PPC::X2, RegState::Implicit);
+
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
@@ -1531,7 +1537,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
const Value *RV = Ret->getOperand(0);
@@ -1541,13 +1547,23 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
// Special case for returning a constant integer of any size.
// Materialize the constant as an i64 and copy it to the return
- // register. This avoids an unnecessary extend or truncate.
+ // register. We still need to worry about properly extending the sign. E.g:
+ // If the constant has only one bit, it means it is a boolean. Therefore
+ // we can't use PPCMaterializeInt because it extends the sign which will
+ // cause negations of the returned value to be incorrect as they are
+ // implemented as the flip of the least significant bit.
if (isa<ConstantInt>(*RV)) {
const Constant *C = cast<Constant>(RV);
- unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
- unsigned RetReg = ValLocs[0].getLocReg();
+
+ CCValAssign &VA = ValLocs[0];
+
+ unsigned RetReg = VA.getLocReg();
+ unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
+ VA.getLocInfo() == CCValAssign::SExt);
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+ TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+
RetRegs.push_back(RetReg);
} else {
@@ -1714,7 +1730,7 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
SrcReg = ResultReg;
}
- UpdateValueMap(I, SrcReg);
+ updateValueMap(I, SrcReg);
return true;
}
@@ -1753,13 +1769,13 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
// Attempt to fast-select an instruction that wasn't handled by
// the table-generated machinery.
-bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
+bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
case Instruction::Load:
@@ -2007,7 +2023,8 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
+ bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
@@ -2031,7 +2048,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
- .addImm(CI->getSExtValue());
+ .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
return ImmReg;
}
@@ -2048,7 +2065,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -2067,7 +2084,7 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
// Materialize the address created by an alloca into a register, and
// return the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -2167,7 +2184,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
// Attempt to lower call arguments in a faster way than done by
// the selection DAG code.
-bool PPCFastISel::FastLowerArguments() {
+bool PPCFastISel::fastLowerArguments() {
// Defer to normal argument lowering for now. It's reasonably
// efficient. Consider doing something like ARM to handle the
// case where all args fit in registers, no varargs, no float
@@ -2177,7 +2194,7 @@ bool PPCFastISel::FastLowerArguments() {
// Handle materializing integer constants into a register. This is not
// automatically generated for PowerPC, so must be explicitly created here.
-unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
if (Opc != ISD::Constant)
return 0;
@@ -2214,7 +2231,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
// assigning R0 or X0 to the output register for GPRC and G8RC
// register classes, as any such result could be used in ADDI, etc.,
// where those regs have another meaning.
-unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
@@ -2227,27 +2244,27 @@ unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+ return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
Op0, Op0IsKill, Imm);
}
// Override for instructions with one register operand to avoid use of
// R0/X0. The automatic infrastructure isn't aware of the context so
// we must be conservative.
-unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill) {
const TargetRegisterClass *UseRC =
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+ return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
}
// Override for instructions with two register operands to avoid use
// of R0/X0. The automatic infrastructure isn't aware of the context
// so we must be conservative.
-unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass* RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
@@ -2255,7 +2272,7 @@ unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
- return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
Op1, Op1IsKill);
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 65e9cf2..dc87a6c 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -254,7 +254,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// transform this into the appropriate ORI instruction.
static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
MachineFunction *MF = MI->getParent()->getParent();
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned UsedRegMask = 0;
@@ -372,7 +372,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -400,7 +400,8 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Maximum call frame needs to be at least big enough for linkage area.
unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI());
+ Subtarget.isDarwinABI(),
+ Subtarget.isELFv2ABI());
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -459,9 +460,9 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
bool HasBP = RegInfo->hasBasePointer(MF);
- unsigned BPReg = HasBP ? (unsigned) PPC::R30 : FPReg;
+ unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -497,21 +498,23 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
bool needsFrameMoves = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
assert((isDarwinABI || isSVR4ABI) &&
"Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
@@ -546,7 +549,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
@@ -602,7 +605,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
}
}
@@ -623,6 +628,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
"Prologue CR saving supported only in 64-bit mode");
if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
+ // If only one or two CR fields are clobbered, it could be more
+ // efficient to use mfocrf to selectively save just those fields.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
@@ -791,8 +799,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
+ // the whole CR word. In the ELFv2 ABI, every CR that was
+ // actually saved gets its own CFI record.
+ unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8));
+ nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
@@ -812,9 +824,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no terminator");
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl;
@@ -839,6 +851,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -849,7 +862,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
@@ -890,7 +903,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BPOffset = FFI->getObjectOffset(BPIndex);
} else {
BPOffset =
- PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
}
}
@@ -1054,7 +1069,7 @@ void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1067,6 +1082,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FPSI = FI->getFramePointerSaveIndex();
bool isPPC64 = Subtarget.isPPC64();
bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
MachineFrameInfo *MFI = MF.getFrameInfo();
// If the frame pointer save index hasn't been defined yet.
@@ -1081,7 +1097,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int BPSI = FI->getBasePointerSaveIndex();
if (!BPSI && RegInfo->hasBasePointer(MF)) {
- int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC);
// Allocate the frame index for the base pointer save area.
BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
// Save the result.
@@ -1185,7 +1201,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
int64_t LowerBound = 0;
@@ -1220,7 +1236,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const PPCRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
if (RegInfo->hasBasePointer(MF)) {
HasGPSaveArea = true;
@@ -1368,7 +1384,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -1430,7 +1446,7 @@ restoreCRs(bool isPPC64, bool is31,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL;
unsigned RestoreOp, MoveReg;
@@ -1463,7 +1479,7 @@ void PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
I->getOpcode() == PPC::ADJCALLSTACKUP) {
// Add (actually subtract) back the amount the callee popped on return.
@@ -1513,7 +1529,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ *static_cast<const PPCInstrInfo *>(MF->getSubtarget().getInstrInfo());
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 7a226f7..c482588 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_FRAMEINFO_H
-#define POWERPC_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
#include "PPC.h"
#include "llvm/ADT/STLExtras.h"
@@ -76,8 +76,8 @@ public:
/// getTOCSaveOffset - Return the previous frame offset to save the
/// TOC register -- 64-bit SVR4 ABI only.
- static unsigned getTOCSaveOffset(void) {
- return 40;
+ static unsigned getTOCSaveOffset(bool isELFv2ABI) {
+ return isELFv2ABI ? 24 : 40;
}
/// getFramePointerSaveOffset - Return the previous frame offset to save the
@@ -97,19 +97,22 @@ public:
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ static unsigned getBasePointerSaveOffset(bool isPPC64,
+ bool isDarwinABI,
+ bool isPIC) {
if (isDarwinABI)
return isPPC64 ? -16U : -8U;
// SVR4 ABI: First slot in the general register save area.
- return isPPC64 ? -16U : -8U;
+ return isPPC64 ? -16U : isPIC ? -12U : -8U;
}
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
- static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
+ bool isELFv2ABI) {
if (isDarwinABI || isPPC64)
- return 6 * (isPPC64 ? 8 : 4);
+ return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
// SVR4 ABI:
return 8;
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 23f76c1..4b50214 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCHAZRECS_H
-#define PPCHAZRECS_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
+#define LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
#include "PPCInstrInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -76,10 +76,10 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
public:
PPCHazardRecognizer970(const ScheduleDAG &DAG);
- virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
- virtual void EmitInstruction(SUnit *SU) override;
- virtual void AdvanceCycle() override;
- virtual void Reset() override;
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void Reset() override;
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4881b3f..49ba58b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -26,6 +27,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -56,16 +58,16 @@ namespace {
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm),
- PPCLowering(TM.getTargetLowering()),
- PPCSubTarget(TM.getSubtargetImpl()) {
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(TM.getSubtargetImpl()->getTargetLowering()),
+ PPCSubTarget(TM.getSubtargetImpl()) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
- PPCLowering = TM.getTargetLowering();
+ PPCLowering = TM.getSubtargetImpl()->getTargetLowering();
PPCSubTarget = TM.getSubtargetImpl();
SelectionDAGISel::runOnMachineFunction(MF);
@@ -232,7 +234,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
@@ -268,16 +270,34 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
- const TargetInstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ const Module *M = MF->getFunction()->getParent();
DebugLoc dl;
if (PPCLowering->getPointerTy() == MVT::i32) {
- GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
- BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
- BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ if (PPCSubTarget->isTargetELF()) {
+ GlobalBaseReg = PPC::R30;
+ if (M->getPICLevel() == PICLevel::Small) {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+ .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ }
+ } else {
+ GlobalBaseReg =
+ RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ }
} else {
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
@@ -650,94 +670,105 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
- bool HasVSX) {
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETUEQ:
- case ISD::SETNE:
- case ISD::SETUNE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPEQUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPEQUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPEQUW;
- // v4f32 != v4f32 could be translate to unordered not equal
- else if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPEQDP;
- break;
- case ISD::SETLT:
- case ISD::SETGT:
- case ISD::SETLE:
- case ISD::SETGE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTSB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTSH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTSW;
- else if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGTDP;
- break;
- case ISD::SETULT:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTUW;
- break;
- case ISD::SETOEQ:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPEQDP;
- break;
- case ISD::SETOLT:
- case ISD::SETOGT:
- case ISD::SETOLE:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGTDP;
- break;
- case ISD::SETOGE:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGEDP;
- break;
- default:
- break;
- }
- llvm_unreachable("Invalid integer vector compare condition");
-}
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+ bool HasVSX, bool &Swap, bool &Negate) {
+ Swap = false;
+ Negate = false;
-// getVCmpEQInst: return the equal compare instruction for the specified vector
-// type. Since this is for altivec specific code, only support the altivec
-// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
- switch (VecVT) {
- case MVT::v16i8:
- return PPC::VCMPEQUB;
- case MVT::v8i16:
- return PPC::VCMPEQUH;
- case MVT::v4i32:
- return PPC::VCMPEQUW;
- case MVT::v4f32:
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- case MVT::v2f64:
- return PPC::XVCMPEQDP;
- default:
- llvm_unreachable("Invalid integer vector compare condition");
+ if (VecVT.isFloatingPoint()) {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+ case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+ case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid floating-point vector compare condition");
+ } else {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+ case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETUEQ:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPEQUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPEQUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPEQUW;
+ break;
+ case ISD::SETGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTSB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTSH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTSW;
+ break;
+ case ISD::SETUGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTUW;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid integer vector compare condition");
}
}
@@ -829,60 +860,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
- MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX());
-
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETOEQ:
- case ISD::SETUEQ:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETNE:
- case ISD::SETONE:
- case ISD::SETUNE: {
- SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
- PPC::VNOR,
- VecVT, VCmp, VCmp);
- }
- case ISD::SETLT:
- case ISD::SETOLT:
- case ISD::SETULT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
- case ISD::SETGT:
- case ISD::SETOGT:
- case ISD::SETUGT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETGE:
- case ISD::SETOGE:
- case ISD::SETUGE: {
- // Small optimization: Altivec provides a 'Vector Compare Greater Than
- // or Equal To' instruction (vcmpgefp), so in this case there is no
- // need for extra logic for the equal compare.
- if (VecVT.getSimpleVT().isFloatingPoint()) {
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- } else {
- SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
- PPC::VOR,
- VecVT, VCmpGT, VCmpEQ);
- }
- }
- case ISD::SETLE:
- case ISD::SETOLE:
- case ISD::SETULE: {
- SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
- PPC::VOR,
- VecVT, VCmpLE, VCmpEQ);
- }
- default:
- llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+ bool Swap, Negate;
+ unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+ PPCSubTarget->hasVSX(), Swap, Negate);
+ if (Swap)
+ std::swap(LHS, RHS);
+
+ if (Negate) {
+ SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+ PPC::VNOR,
+ VecVT, VCmp, VCmp);
}
+
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
}
if (PPCSubTarget->useCRBits())
@@ -924,6 +915,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return nullptr; // Already selected.
}
+ // In case any misguided DAG-level optimizations form an ADD with a
+ // TargetConstant operand, crash here instead of miscompiling (by selecting
+ // an r+r add instead of some kind of r+i add).
+ if (N->getOpcode() == ISD::ADD &&
+ N->getOperand(1).getOpcode() == ISD::TargetConstant)
+ llvm_unreachable("Invalid ADD with TargetConstant operand");
+
switch (N->getOpcode()) {
default: break;
@@ -1331,7 +1329,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
else if (N->getValueType(0) == MVT::f32)
SelectCCOp = PPC::SELECT_CC_F4;
else if (N->getValueType(0) == MVT::f64)
- SelectCCOp = PPC::SELECT_CC_F8;
+ if (PPCSubTarget->hasVSX())
+ SelectCCOp = PPC::SELECT_CC_VSFRC;
+ else
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else if (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)
+ SelectCCOp = PPC::SELECT_CC_VSRC;
else
SelectCCOp = PPC::SELECT_CC_VRRC;
@@ -1445,11 +1449,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
case PPCISD::TOC_ENTRY: {
- assert (PPCSubTarget->isPPC64() && "Only supported for 64-bit ABI");
+ assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
+ "Only supported for 64-bit ABI and 32-bit SVR4");
+ if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
+ SDValue GA = N->getOperand(0);
+ return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
+ N->getOperand(1));
+ }
// For medium and large code model, we generate two instructions as
// described below. Otherwise we allow SelectCodeCommon to handle this,
- // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
CodeModel::Model CModel = TM.getCodeModel();
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
@@ -1466,7 +1476,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
- if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+ if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
+ CModel == CodeModel::Large)
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
@@ -1483,6 +1494,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
+ case PPCISD::PPC32_PICGOT: {
+ // Generate a PIC-safe GOT reference.
+ assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+ return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32);
+ }
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
@@ -1683,7 +1700,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
- case PPC::SELECT_VRRC: {
+ case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSRC: {
SDValue Op = MachineNode->getOperand(0);
if (Op.isMachineOpcode()) {
if (Op.getMachineOpcode() == PPC::CRSET)
@@ -1989,6 +2008,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
case PPC::SELECT_F4:
case PPC::SELECT_F8:
case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSRC:
if (Op1Set)
ResNode = MachineNode->getOperand(1).getNode();
else if (Op1Unset)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bc057bf..e93bdaf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -39,6 +39,10 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+// FIXME: Remove this once soft-float is supported.
+static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
+cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
+
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -51,19 +55,10 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- // If it isn't a Mach-O file then it's going to be a linux ELF
- // object file.
- if (TT.isOSDarwin())
- return new TargetLoweringObjectFileMachO();
-
- return new PPC64LinuxTargetObjectFile();
-}
-
-PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
+ : TargetLowering(TM),
Subtarget(*TM.getSubtargetImpl()) {
- setPow2DivIsCheap();
+ setPow2SDivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
@@ -453,6 +448,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
@@ -526,11 +523,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
-
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
@@ -561,11 +553,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
-
setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
@@ -617,15 +604,22 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
}
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ if (!isPPC64) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
// Altivec instructions set fields to all zeros or all ones.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ if (!isPPC64) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+ }
+
if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
@@ -685,11 +679,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
- if (isPPC64 && Subtarget.isJITCodeModel())
- // Temporary workaround for the inability of PPC64 JIT to handle jump
- // tables.
- setSupportJumpTables(false);
-
setInsertFencesForAtomic(true);
if (Subtarget.enableMachineScheduler())
@@ -782,6 +771,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
+ case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
@@ -811,10 +802,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
- case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
- case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
@@ -828,6 +817,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
return VT.changeVectorElementTypeToInteger();
}
+bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+ assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
@@ -853,14 +847,27 @@ static bool isConstantOrUndef(int Op, int Val) {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
- if (!isUnary) {
+ bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
for (unsigned i = 0; i != 16; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+j))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
- } else {
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 1;
for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
@@ -871,27 +878,34 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- unsigned j, k;
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
- j = 0;
- k = 1;
- } else {
- j = 2;
- k = 3;
- }
- if (!isUnary) {
+ bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ if (ShuffleKind == 0) {
+ if (IsLE)
+ return false;
for (unsigned i = 0; i != 16; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+k))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
- } else {
+ } else if (ShuffleKind == 2) {
+ if (!IsLE)
+ return false;
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = IsLE ? 0 : 2;
for (unsigned i = 0; i != 8; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
- !isConstantOrUndef(N->getMaskElt(i+9), i*2+k))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
return false;
}
return true;
@@ -919,38 +933,63 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
- if (!isUnary)
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ else
+ return false;
} else {
- if (!isUnary)
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ else
+ return false;
}
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG) {
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
- if (!isUnary)
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 2) // swapped
return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ else
+ return false;
} else {
- if (!isUnary)
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 0) // normal
return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ else
+ return false;
}
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
+/// The ShuffleKind distinguishes between big-endian operations with two
+/// different inputs (0), either-endian operations with two identical inputs
+/// (1), and little-endian operations with two different inputs (2). For the
+/// latter, the input operands are swapped (see PPCInstrAltivec.td).
+int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::v16i8)
return -1;
@@ -968,38 +1007,26 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
- if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
-
- ShiftAmt += i;
-
- if (!isUnary) {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
- return -1;
- } else {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
- return -1;
- }
-
- } else { // Big Endian
+ ShiftAmt -= i;
+ bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
+ isLittleEndian();
+
+ if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else if (ShuffleKind == 1) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ } else
+ return -1;
- ShiftAmt -= i;
+ if (ShuffleKind == 2 && isLE)
+ ShiftAmt = 16 - ShiftAmt;
- if (!isUnary) {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
- return -1;
- } else {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
- return -1;
- }
- }
return ShiftAmt;
}
@@ -1055,7 +1082,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
@@ -1331,7 +1358,13 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
- Base = N.getOperand(0);
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
Disp = DAG.getTargetConstant(imm, N.getValueType());
return true;
}
@@ -1491,10 +1524,9 @@ static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
- // Don't use the pic base if not in PIC relocation model. Or if we are on a
- // non-darwin platform. We don't support PIC on other platforms yet.
- bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
- TM.getSubtarget<PPCSubtarget>().isDarwin();
+ // Don't use the pic base if not in PIC relocation model.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+
if (isPIC) {
HiOpFlags |= PPCII::MO_PIC_FLAG;
LoOpFlags |= PPCII::MO_PIC_FLAG;
@@ -1550,6 +1582,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(CP);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
SDValue CPIHi =
DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
SDValue CPILo =
@@ -1571,6 +1612,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(GA);
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
@@ -1579,8 +1629,16 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
+ BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
+ const BlockAddress *BA = BASDN->getBlockAddress();
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual BlockAddress is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
@@ -1589,6 +1647,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
+// Generate a call to __tls_get_addr for the given GOT entry Op.
+std::pair<SDValue,SDValue>
+PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
+ SelectionDAG &DAG) const {
+
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Op;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, IntPtrTy,
+ DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
+ std::move(Args), 0);
+
+ return LowerCallTo(CLI);
+}
+
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
@@ -1601,6 +1680,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
bool is64bit = Subtarget.isPPC64();
+ const Module *M = DAG.getMachineFunction().getFunction()->getParent();
+ PICLevel::Level picLevel = M->getPICLevel();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
@@ -1632,50 +1713,46 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::GeneralDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
- GOTReg, TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLSGD);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
- GOTEntryHi, TGA);
-
- // We need a chain node, and don't have one handy. The underlying
- // call has no side effects, so using the function entry node
- // suffices.
- SDValue Chain = DAG.getEntryNode();
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
- PtrVT, ParmReg, TGA);
- // The return value from GET_TLS_ADDR really is in X3 already, but
- // some hacks are needed here to tie everything together. The extra
- // copies dissolve during subsequent transforms.
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
- return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ GOTPtr, TGA);
+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+ return CallResult.first;
}
if (Model == TLSModel::LocalDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
- GOTReg, TGA);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLSLD);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ if (picLevel == PICLevel::Small)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
- GOTEntryHi, TGA);
-
- // We need a chain node, and don't have one handy. The underlying
- // call has no side effects, so using the function entry node
- // suffices.
- SDValue Chain = DAG.getEntryNode();
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
- PtrVT, ParmReg, TGA);
- // The return value from GET_TLSLD_ADDR really is in X3 already, but
- // some hacks are needed here to tie everything together. The extra
- // copies dissolve during subsequent transforms.
- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ GOTPtr, TGA);
+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
+ SDValue TLSAddr = CallResult.first;
+ SDValue Chain = CallResult.second;
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
- Chain, ParmReg, TGA);
+ Chain, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
@@ -1700,6 +1777,14 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+ GSDN->getOffset(),
+ PPCII::MO_PIC_FLAG);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+ }
+
SDValue GAHi =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
SDValue GALo =
@@ -1794,7 +1879,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// gpr_index
SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
VAListPtr, MachinePointerInfo(SV), MVT::i8,
- false, false, 0);
+ false, false, false, 0);
InChain = GprIndex.getValue(1);
if (VT == MVT::i64) {
@@ -1817,7 +1902,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
// fpr
SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
FprPtr, MachinePointerInfo(SV), MVT::i8,
- false, false, 0);
+ false, false, false, 0);
InChain = FprIndex.getValue(1);
SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
@@ -2127,14 +2212,19 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
unsigned ArgSize = ArgVT.getStoreSize();
if (Flags.isByVal())
ArgSize = Flags.getByValSize();
- ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ // Round up to multiples of the pointer size, except for array members,
+ // which are always packed.
+ if (!Flags.isInConsecutiveRegs())
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
return ArgSize;
}
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
/// on the stack.
-static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
unsigned PtrByteSize) {
unsigned Align = PtrByteSize;
@@ -2156,14 +2246,78 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
}
}
+ // Array members are always packed to their original alignment.
+ if (Flags.isInConsecutiveRegs()) {
+ // If the array member was split into multiple registers, the first
+ // needs to be aligned to the size of the full type. (Except for
+ // ppcf128, which is only aligned as its f64 components.)
+ if (Flags.isSplit() && OrigVT != MVT::ppcf128)
+ Align = OrigVT.getStoreSize();
+ else
+ Align = ArgVT.getStoreSize();
+ }
+
return Align;
}
+/// CalculateStackSlotUsed - Return whether this argument will use its
+/// stack slot (instead of being passed in registers). ArgOffset,
+/// AvailableFPRs, and AvailableVRs must hold the current argument
+/// position, and will be updated to account for this argument.
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize,
+ unsigned LinkageSize,
+ unsigned ParamAreaSize,
+ unsigned &ArgOffset,
+ unsigned &AvailableFPRs,
+ unsigned &AvailableVRs) {
+ bool UseMemory = false;
+
+ // Respect alignment of argument on the stack.
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ // If there's no space left in the argument save area, we must
+ // use memory (this check also catches zero-sized arguments).
+ if (ArgOffset >= LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // Allocate argument on the stack.
+ ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // If we overran the argument save area, we must use memory
+ // (this check catches arguments passed partially in memory)
+ if (ArgOffset > LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // However, if the argument is actually passed in an FPR or a VR,
+ // we don't use memory after all.
+ if (!Flags.isByVal()) {
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ if (AvailableFPRs > 0) {
+ --AvailableFPRs;
+ return false;
+ }
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ if (AvailableVRs > 0) {
+ --AvailableVRs;
+ return false;
+ }
+ }
+
+ return UseMemory;
+}
+
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
/// ensure minimum alignment required for target.
static unsigned EnsureStackAlignment(const TargetMachine &Target,
unsigned NumBytes) {
- unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign =
+ Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign - 1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
return NumBytes;
@@ -2240,11 +2394,11 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2315,7 +2469,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// caller's stack frame, right above the parameter list area.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -2348,7 +2502,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
- const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+ unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+ if (DisablePPCFloatInVariadic)
+ NumFPArgRegs = 0;
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
NumGPArgRegs));
@@ -2357,7 +2513,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Make room for NumGPArgRegs and NumFPArgRegs.
int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
- NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+ NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
FuncInfo->setVarArgsStackOffset(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
@@ -2399,7 +2555,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
- SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
PtrVT);
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
@@ -2437,6 +2593,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2448,8 +2605,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
- unsigned ArgOffset = LinkageSize;
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2471,12 +2628,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+ // Do a first pass over the arguments to determine whether the ABI
+ // guarantees that our caller has allocated the parameter save area
+ // on its stack frame. In the ELFv1 ABI, this is always the case;
+ // in the ELFv2 ABI, it is true if this is a vararg function or if
+ // any parameter is located in a stack slot.
+
+ bool HasParameterArea = !isELFv2ABI || isVarArg;
+ unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
+ unsigned NumBytes = LinkageSize;
+ unsigned AvailableFPRs = Num_FPR_Regs;
+ unsigned AvailableVRs = Num_VR_Regs;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+ if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytes, AvailableFPRs, AvailableVRs))
+ HasParameterArea = true;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@@ -2484,6 +2658,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
+ EVT OrigVT = Ins[ArgNo].ArgVT;
unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
@@ -2492,7 +2667,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
/* Respect alignment of argument on the stack. */
unsigned Align =
- CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
+ CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
unsigned CurArgOffset = ArgOffset;
@@ -2520,15 +2695,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
- // All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize < PtrByteSize && !isLittleEndian)
- CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
- // The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ // Create a stack object covering all stack doublewords occupied
+ // by the argument. If the argument is (fully or partially) on
+ // the stack, or if the argument is fully in registers but the
+ // caller has allocated the parameter save anyway, we can refer
+ // directly to the caller's stack frame. Otherwise, create a
+ // local copy in our own frame.
+ int FI;
+ if (HasParameterArea ||
+ ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
+ else
+ FI = MFI->CreateStackObject(ArgSize, Align, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(FIN);
- if (ObjSize < 8) {
+ // Handle aggregates smaller than 8 bytes.
+ if (ObjSize < PtrByteSize) {
+ // The value of the object is its address, which differs from the
+ // address of the enclosing doubleword on big-endian systems.
+ SDValue Arg = FIN;
+ if (!isLittleEndian) {
+ SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
+ Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
+ }
+ InVals.push_back(Arg);
+
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -2537,18 +2728,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
- Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
MachinePointerInfo(FuncArg),
ObjType, false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
// store the whole register as-is to the parameter save area
- // slot. The address of the parameter was already calculated
- // above (InVals.push_back(FIN)) to be the right-justified
- // offset within the slot. For this store, we need a new
- // frame index that points at the beginning of the slot.
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ // slot.
Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(FuncArg),
false, false, 0);
@@ -2562,27 +2748,29 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
+ // The value of the object is its address, which is the address of
+ // its first stack doubleword.
+ InVals.push_back(FIN);
+
+ // Store whatever pieces of the object are in registers to memory.
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
- // Store whatever pieces of the object are in registers
- // to memory. ArgOffset will be the address of the beginning
- // of the object.
- if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg;
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, j),
- false, false, 0);
- MemOps.push_back(Store);
- ++GPR_idx;
- ArgOffset += PtrByteSize;
- } else {
- ArgOffset += ArgSize - j;
+ if (GPR_idx == Num_GPR_Regs)
break;
+
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Addr = FIN;
+ if (j) {
+ SDValue Off = DAG.getConstant(j, PtrVT);
+ Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
}
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(FuncArg, j),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
}
+ ArgOffset += ArgSize;
continue;
}
@@ -2591,6 +2779,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
@@ -2608,6 +2799,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::f32:
case MVT::f64:
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
@@ -2620,12 +2814,32 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
+ } else if (GPR_idx != Num_GPR_Regs) {
+ // This can only ever happen in the presence of f32 array types,
+ // since otherwise we never run out of FPRs before running out
+ // of GPRs.
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::f32) {
+ if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
+ ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
} else {
needsLoad = true;
- ArgSize = PtrByteSize;
}
- ArgOffset += 8;
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
break;
case MVT::v4f32:
case MVT::v4i32:
@@ -2633,6 +2847,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
@@ -2662,7 +2879,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea;
- MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+ if (HasParameterArea)
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+ else
+ MinReservedArea = LinkageSize;
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
@@ -2723,7 +2943,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
@@ -2849,7 +3070,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -3336,6 +3557,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -3352,42 +3574,41 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
}
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
- // Use indirect calls for ALL functions calls in JIT mode, since the
- // far-call stubs may be outside relocation limits for a BL instruction.
- if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
- unsigned OpFlags = 0;
- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (Subtarget.getTargetTriple().isMacOSX() &&
- Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
- (G->getGlobal()->isDeclaration() ||
- G->getGlobal()->isWeakForLinker())) {
- // PC-relative references to external symbols should go through $stub,
- // unless we're building with the leopard linker or later, which
- // automatically synthesizes these stubs.
- OpFlags = PPCII::MO_DARWIN_STUB;
- }
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType(),
- 0, OpFlags);
- needIndirectCall = false;
+ unsigned OpFlags = 0;
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ !G->getGlobal()->hasLocalLinkage() &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_PLT_OR_STUB;
}
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(), 0, OpFlags);
+ needIndirectCall = false;
}
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (Subtarget.getTargetTriple().isMacOSX() &&
- Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
- OpFlags = PPCII::MO_DARWIN_STUB;
+ OpFlags = PPCII::MO_PLT_OR_STUB;
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
@@ -3400,7 +3621,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
- if (isSVR4ABI && isPPC64) {
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
@@ -3480,7 +3701,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
- if (isSVR4ABI && isPPC64)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@@ -3491,6 +3712,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
+
+ // If this is a call to __tls_get_addr, find the symbol whose address
+ // is to be taken and add it to the list. This will be used to
+ // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
+ // We find the symbol by walking the chain to the CopyFromReg, walking
+ // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
+ // pulling the symbol from that node.
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
+ assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
+ SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
+ SDValue TGTAddr = AddI->getOperand(1);
+ assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
+ "Didn't find target global TLS address where we expected one");
+ Ops.push_back(TGTAddr);
+ CallOpc = PPCISD::CALL_TLS;
+ }
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
@@ -3502,6 +3740,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (Callee.getNode() && isELFv2ABI)
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
return CallOpc;
}
@@ -3522,8 +3764,8 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
@@ -3571,6 +3813,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals) const {
+
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
@@ -3589,7 +3833,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3636,7 +3881,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
- }
+ } else if (CallOpc == PPCISD::CALL_TLS)
+ // For 64-bit SVR4, TLS calls are always non-local.
+ CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
@@ -3646,7 +3893,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
@@ -3735,11 +3982,12 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+ PtrByteSize);
if (isVarArg) {
// Handle fixed and variable vector arguments differently.
@@ -3776,7 +4024,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ByValArgLocs, *DAG.getContext());
+ ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
@@ -3948,6 +4196,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
@@ -3966,21 +4215,27 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with at least 48 bytes, which
- // is reserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
+ // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
+ // area is 32 bytes reserved space for [SP][CR][LR][TOC].
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
unsigned NumBytes = LinkageSize;
// Add up all the space actually used.
for (unsigned i = 0; i != NumOps; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
/* Respect alignment of argument on the stack. */
- unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
NumBytes = ((NumBytes + Align - 1) / Align) * Align;
NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
}
unsigned NumBytesActuallyUsed = NumBytes;
@@ -3990,6 +4245,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
// Tail call needs the stack to be aligned.
@@ -4056,10 +4312,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
/* Respect alignment of argument on the stack. */
unsigned Align =
- CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
/* Compute GPR index associated with argument offset. */
@@ -4103,7 +4361,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
@@ -4199,6 +4457,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::i1:
case MVT::i32:
case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != NumGPRs) {
RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
} else {
@@ -4209,39 +4470,70 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
ArgOffset += PtrByteSize;
break;
case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
+ case MVT::f64: {
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
+
+ // Named arguments go into FPRs first, and once they overflow, the
+ // remaining arguments go into GPRs and then the parameter save area.
+ // Unnamed arguments for vararg functions always go to GPRs and
+ // then the parameter save area. For now, put all arguments to vararg
+ // routines always in both locations (FPR *and* GPR or stack slot).
+ bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+
+ // First load the argument into the next available FPR.
+ if (FPR_idx != NumFPRs)
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
- if (isVarArg) {
- // A single float or an aggregate containing only a single float
- // must be passed right-justified in the stack doubleword, and
- // in the GPR, if one is available.
- SDValue StoreOff;
- if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
- !isLittleEndian) {
- SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
- StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- } else
- StoreOff = PtrOff;
-
- SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
- MachinePointerInfo(), false, false, 0);
- MemOpChains.push_back(Store);
-
- // Float varargs are always shadowed in available integer registers
- if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
- MachinePointerInfo(), false, false,
- false, 0);
- MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
- }
- }
+ // Next, load the argument into GPR or stack slot if needed.
+ if (!NeedGPROrStack)
+ ;
+ else if (GPR_idx != NumGPRs) {
+ // In the non-vararg case, this can only ever happen in the
+ // presence of f32 array types, since otherwise we never run
+ // out of FPRs before running out of GPRs.
+ SDValue ArgVal;
+
+ // Double values are always passed in a single GPR.
+ if (Arg.getValueType() != MVT::f32) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+
+ // Non-array float values are extended and passed in a GPR.
+ } else if (!Flags.isInConsecutiveRegs()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+
+ // If we have an array of floats, we collect every odd element
+ // together with its predecessor into one GPR.
+ } else if (ArgOffset % PtrByteSize != 0) {
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ if (!isLittleEndian)
+ std::swap(Lo, Hi);
+ ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+
+ // The final element, if even, goes into the first half of a GPR.
+ } else if (Flags.isInConsecutiveRegsLast()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+ if (!isLittleEndian)
+ ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+
+ // Non-final even elements are skipped; they will be handled
+ // together the with subsequent argument on the next go-around.
+ } else
+ ArgVal = SDValue();
+
+ if (ArgVal.getNode())
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
} else {
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
- if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
+ if (Arg.getValueType() == MVT::f32 &&
+ !isLittleEndian && !Flags.isInConsecutiveRegs()) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
@@ -4250,14 +4542,25 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
}
- ArgOffset += 8;
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
break;
+ }
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+
// For a varargs call, named arguments go into VRs or on the stack as
// usual; unnamed arguments always go to the stack or the corresponding
// GPRs when within range. For now, we always put the value in both
@@ -4328,11 +4631,16 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
+ // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
+ // This does not mean the MTCTR instruction must use R12; it's easier
+ // to model this as an extra parameter, so do that.
+ if (isELFv2ABI)
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -4383,7 +4691,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
unsigned NumBytes = LinkageSize;
// Add up all the space actually used.
@@ -4522,7 +4831,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -4751,8 +5060,7 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
- RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_PPC);
}
@@ -4764,8 +5072,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
SDLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
SDValue Flag;
@@ -5773,15 +6081,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
- PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
- PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
- PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) {
+ PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
return Op;
}
}
@@ -5789,15 +6097,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
- if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
- PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
- PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG))
+ unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
+ if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
@@ -6252,11 +6561,44 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
// Other Lowering Code
//===----------------------------------------------------------------------===//
+static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Function *Func = Intrinsic::getDeclaration(M, Id);
+ return Builder.CreateCall(Func);
+}
+
+// The mappings for emitLeading/TrailingFence is taken from
+// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (Ord == SequentiallyConsistent)
+ return callIntrinsic(Builder, Intrinsic::ppc_sync);
+ else if (isAtLeastRelease(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ else
+ return nullptr;
+}
+
+Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord, bool IsStore,
+ bool IsLoad) const {
+ if (IsLoad && isAtLeastAcquire(Ord))
+ return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
+ // FIXME: this is too conservative, a dependent branch + isync is enough.
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ else
+ return nullptr;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -6318,7 +6660,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
bool is8bit, // operation
unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
@@ -6446,7 +6789,8 @@ llvm::MachineBasicBlock*
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -6545,7 +6889,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
const PPCRegisterInfo *TRI =
- static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+ getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -6594,7 +6938,8 @@ MachineBasicBlock *
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -6613,7 +6958,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
- unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 :
+ (Subtarget.isSVR4ABI() &&
+ MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
+ PPC::R29 : PPC::R30);
MachineInstrBuilder MIB;
@@ -6703,7 +7051,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return emitEHSjLjLongJmp(MI, BB);
}
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
@@ -6726,7 +7075,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII =
+ getTargetMachine().getSubtargetImpl()->getInstrInfo();
TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
Cond, MI->getOperand(2).getReg(),
MI->getOperand(3).getReg());
@@ -6735,11 +7085,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_VSRC ||
MI->getOpcode() == PPC::SELECT_I4 ||
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
- MI->getOpcode() == PPC::SELECT_VRRC) {
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
@@ -6770,7 +7124,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
- MI->getOpcode() == PPC::SELECT_VRRC) {
+ MI->getOpcode() == PPC::SELECT_VRRC ||
+ MI->getOpcode() == PPC::SELECT_VSFRC ||
+ MI->getOpcode() == PPC::SELECT_VSRC) {
BuildMI(BB, dl, TII->get(PPC::BC))
.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
} else {
@@ -7131,151 +7487,54 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
-SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
- DAGCombinerInfo &DCI) const {
- if (DCI.isAfterLegalizeVectorOps())
- return SDValue();
-
- EVT VT = Op.getValueType();
-
- if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
- (VT == MVT::f64 && Subtarget.hasFRE()) ||
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal, we need to find the zero of the function:
- // F(X) = A X - 1 [which has a zero at X = 1/A]
- // =>
- // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
- // does not require additional intermediate precision]
-
// Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. The minimum architected relative
- // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
- // 23 digits and double has 52 digits.
- int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ // correct after every iteration. For both FRE and FRSQRTE, the minimum
+ // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+ // 2^-14. IEEE float has 23 digits and double has 52 digits.
+ RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- ++Iterations;
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(Op);
-
- SDValue FPOne =
- DAG.getConstantFP(1.0, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 &&
- "Unknown vector type");
- FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- FPOne, FPOne, FPOne, FPOne);
- }
-
- SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
- DCI.AddToWorklist(Est.getNode());
-
- // Newton iterations: Est = Est + Est (1 - Arg * Est)
- for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
- DCI.AddToWorklist(Est.getNode());
- }
-
- return Est;
+ ++RefinementSteps;
+ UseOneConstNR = true;
+ return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
-
return SDValue();
}
-SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
- DAGCombinerInfo &DCI) const {
- if (DCI.isAfterLegalizeVectorOps())
- return SDValue();
-
- EVT VT = Op.getValueType();
-
- if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ EVT VT = Operand.getValueType();
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal sqrt, we need to find the zero of the function:
- // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
- // =>
- // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
- // As a result, we precompute A/2 prior to the iteration loop.
-
// Convergence is quadratic, so we essentially double the number of digits
- // correct after every iteration. The minimum architected relative
- // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
- // 23 digits and double has 52 digits.
- int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ // correct after every iteration. For both FRE and FRSQRTE, the minimum
+ // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+ // 2^-14. IEEE float has 23 digits and double has 52 digits.
+ RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- ++Iterations;
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(Op);
-
- SDValue FPThreeHalves =
- DAG.getConstantFP(1.5, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 &&
- "Unknown vector type");
- FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- FPThreeHalves, FPThreeHalves,
- FPThreeHalves, FPThreeHalves);
- }
-
- SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
- DCI.AddToWorklist(Est.getNode());
-
- // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
- // this entire sequence requires only one FP constant.
- SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
- DCI.AddToWorklist(HalfArg.getNode());
-
- HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
- DCI.AddToWorklist(HalfArg.getNode());
-
- // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
- for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
- DCI.AddToWorklist(NewEst.getNode());
-
- Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
- DCI.AddToWorklist(Est.getNode());
- }
-
- return Est;
+ ++RefinementSteps;
+ return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
-
return SDValue();
}
-// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
-// not enforce equality of the chain operands.
-static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
unsigned Bytes, int Dist,
SelectionDAG &DAG) {
- EVT VT = LS->getMemoryVT();
if (VT.getSizeInBits() / 8 != Bytes)
return false;
- SDValue Loc = LS->getBasePtr();
SDValue BaseLoc = Base->getBasePtr();
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
@@ -7306,11 +7565,77 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
return false;
}
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
+ EVT VT = LS->getMemoryVT();
+ SDValue Loc = LS->getBasePtr();
+ return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_vsx_lxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
+ }
+
+ if (N->getOpcode() == ISD::INTRINSIC_VOID) {
+ EVT VT;
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default: return false;
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_vsx_stxvw4x:
+ VT = MVT::v4i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ }
+
+ return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
+ }
+
+ return false;
+}
+
// Return true is there is a nearyby consecutive load to the one provided
// (regardless of alignment). We search up and down the chain, looking though
-// token factors and other loads (but nothing else). As a result, a true
-// results indicates that it is safe to create a new consecutive load adjacent
-// to the load provided.
+// token factors and other loads (but nothing else). As a result, a true result
+// indicates that it is safe to create a new consecutive load adjacent to the
+// load provided.
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
SDValue Chain = LD->getChain();
EVT VT = LD->getMemoryVT();
@@ -7324,10 +7649,10 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
// nodes just above the top-level loads and token factors.
while (!Queue.empty()) {
SDNode *ChainNext = Queue.pop_back_val();
- if (!Visited.insert(ChainNext))
+ if (!Visited.insert(ChainNext).second)
continue;
- if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
@@ -7355,17 +7680,17 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
while (!Queue.empty()) {
SDNode *LoadRoot = Queue.pop_back_val();
- if (!Visited.insert(LoadRoot))
+ if (!Visited.insert(LoadRoot).second)
continue;
- if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
for (SDNode::use_iterator UI = LoadRoot->use_begin(),
UE = LoadRoot->use_end(); UI != UE; ++UI)
- if (((isa<LoadSDNode>(*UI) &&
- cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ if (((isa<MemSDNode>(*UI) &&
+ cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
Queue.push_back(*UI);
}
@@ -7485,7 +7810,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
SDValue BinOp = BinOps.back();
BinOps.pop_back();
- if (!Visited.insert(BinOp.getNode()))
+ if (!Visited.insert(BinOp.getNode()).second)
continue;
PromOps.push_back(BinOp);
@@ -7699,7 +8024,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
SDValue BinOp = BinOps.back();
BinOps.pop_back();
- if (!Visited.insert(BinOp.getNode()))
+ if (!Visited.insert(BinOp.getNode()).second)
continue;
PromOps.push_back(BinOp);
@@ -7936,92 +8261,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETCC:
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
- case ISD::FDIV: {
- assert(TM.Options.UnsafeFPMath &&
- "Reciprocal estimates require UnsafeFPMath");
-
- if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
- DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
- N->getValueType(0), RV);
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
- SDValue RV =
- DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
- DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
- N->getValueType(0), RV,
- N->getOperand(1).getOperand(1));
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
- }
-
- SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
- N->getOperand(0), RV);
- }
-
- }
- break;
- case ISD::FSQRT: {
- assert(TM.Options.UnsafeFPMath &&
- "Reciprocal estimates require UnsafeFPMath");
-
- // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
- // reciprocal sqrt.
- SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
- if (RV.getNode()) {
- DCI.AddToWorklist(RV.getNode());
- RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0. Select out
- // this case and force the answer to 0.
-
- EVT VT = RV.getValueType();
-
- SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
- if (VT.isVector()) {
- assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
- Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
- }
-
- SDValue ZeroCmp =
- DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
- N->getOperand(0), Zero, ISD::SETEQ);
- DCI.AddToWorklist(ZeroCmp.getNode());
- DCI.AddToWorklist(RV.getNode());
-
- RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
- ZeroCmp, Zero, RV);
- return RV;
- }
- }
-
- }
- break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -8112,6 +8351,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ // P8 and later hardware should just use LOAD.
+ !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
(VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
@@ -8149,17 +8390,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Intrinsic::ppc_altivec_lvsl);
SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
- // Refine the alignment of the original load (a "new" load created here
- // which was identical to the first except for the alignment would be
- // merged with the existing node regardless).
+ // Create the new MMO for the new base load. It is like the original MMO,
+ // but represents an area in memory almost twice the vector size centered
+ // on the original address. If the address is unaligned, we might start
+ // reading up to (sizeof(vector)-1) bytes below the address of the
+ // original unaligned load.
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(LD->getPointerInfo(),
- LD->getMemOperand()->getFlags(),
- LD->getMemoryVT().getStoreSize(),
- ABIAlignment);
- LD->refineAlignment(MMO);
- SDValue BaseLoad = SDValue(LD, 0);
+ MachineMemOperand *BaseMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ -LD->getMemoryVT().getStoreSize()+1,
+ 2*LD->getMemoryVT().getStoreSize()-1);
+
+ // Create the new base load.
+ SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
+ getPointerTy());
+ SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
+ SDValue BaseLoad =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v4i32, MVT::Other),
+ BaseLoadOps, MVT::v4i32, BaseMMO);
// Note that the value of IncOffset (which is provided to the next
// load's pointer info offset value, and thus used to calculate the
@@ -8181,21 +8430,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ MachineMemOperand *ExtraMMO =
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ 1, 2*LD->getMemoryVT().getStoreSize()-1);
+ SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue ExtraLoad =
- DAG.getLoad(VT, dl, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(IncOffset),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), ABIAlignment);
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
+ DAG.getVTList(MVT::v4i32, MVT::Other),
+ ExtraLoadOps, MVT::v4i32, ExtraMMO);
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
BaseLoad.getValue(1), ExtraLoad.getValue(1));
- if (BaseLoad.getValueType() != MVT::v4i32)
- BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
-
- if (ExtraLoad.getValueType() != MVT::v4i32)
- ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
-
// Because vperm has a big-endian bias, we must reverse the order
// of the input vectors and complement the permute control vector
// when generating little endian code. We have already handled the
@@ -8212,36 +8458,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
- // Now we need to be really careful about how we update the users of the
- // original load. We cannot just call DCI.CombineTo (or
- // DAG.ReplaceAllUsesWith for that matter), because the load still has
- // uses created here (the permutation for example) that need to stay.
- SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- while (UI != UE) {
- SDUse &Use = UI.getUse();
- SDNode *User = *UI;
- // Note: BaseLoad is checked here because it might not be N, but a
- // bitcast of N.
- if (User == Perm.getNode() || User == BaseLoad.getNode() ||
- User == TF.getNode() || Use.getResNo() > 1) {
- ++UI;
- continue;
- }
-
- SDValue To = Use.getResNo() ? TF : Perm;
- ++UI;
-
- SmallVector<SDValue, 8> Ops;
- for (const SDUse &O : User->ops()) {
- if (O == Use)
- Ops.push_back(To);
- else
- Ops.push_back(O);
- }
-
- DAG.UpdateNodeOperands(User, Ops);
- }
-
+ // The output of the permutation is our loaded result, the TokenFactor is
+ // our new chain.
+ DCI.CombineTo(N, Perm, TF);
return SDValue(N, 0);
}
}
@@ -8659,7 +8878,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
@@ -8872,6 +9092,92 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_lvx:
+ case Intrinsic::ppc_altivec_lvxl:
+ case Intrinsic::ppc_altivec_lvebx:
+ case Intrinsic::ppc_altivec_lvehx:
+ case Intrinsic::ppc_altivec_lvewx:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ case Intrinsic::ppc_vsx_lxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_lvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_lvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_lvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_lxvd2x:
+ VT = MVT::v2f64;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_altivec_stvx:
+ case Intrinsic::ppc_altivec_stvxl:
+ case Intrinsic::ppc_altivec_stvebx:
+ case Intrinsic::ppc_altivec_stvehx:
+ case Intrinsic::ppc_altivec_stvewx:
+ case Intrinsic::ppc_vsx_stxvd2x:
+ case Intrinsic::ppc_vsx_stxvw4x: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_altivec_stvebx:
+ VT = MVT::i8;
+ break;
+ case Intrinsic::ppc_altivec_stvehx:
+ VT = MVT::i16;
+ break;
+ case Intrinsic::ppc_altivec_stvewx:
+ VT = MVT::i32;
+ break;
+ case Intrinsic::ppc_vsx_stxvd2x:
+ VT = MVT::v2f64;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = -VT.getStoreSize()+1;
+ Info.size = 2*VT.getStoreSize()-1;
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
@@ -8931,9 +9237,10 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<16>(Imm) || isUInt<16>(Imm);
}
-bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
if (DisablePPCUnaligned)
return false;
@@ -8948,7 +9255,8 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
- if (VT != MVT::v2f64 && VT != MVT::v2i64)
+ if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
+ VT != MVT::v4f32 && VT != MVT::v4i32)
return false;
} else {
return false;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index df05aa5..bb4d1f1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
-#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
#include "PPC.h"
#include "PPCInstrInfo.h"
@@ -99,6 +99,10 @@ namespace llvm {
/// SVR4 calls.
CALL, CALL_NOP,
+ /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
+ /// to access TLS variables.
+ CALL_TLS, CALL_NOP_TLS,
+
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
@@ -181,6 +185,10 @@ namespace llvm {
/// on PPC32.
PPC32_GOT,
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS on PPC32.
+ PPC32_PICGOT,
+
/// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
/// TLS model, produces an ADDIS8 instruction that adds the GOT
/// base to sym\@got\@tprel\@ha.
@@ -210,10 +218,6 @@ namespace llvm {
/// sym\@got\@tlsgd\@l.
ADDI_TLSGD_L,
- /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsgd).
- GET_TLS_ADDR,
-
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
@@ -224,10 +228,6 @@ namespace llvm {
/// sym\@got\@tlsld\@l.
ADDI_TLSLD_L,
- /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym\@tlsld).
- GET_TLSLD_ADDR,
-
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
/// local-dynamic TLS model, produces an ADDIS8 instruction
/// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
@@ -297,27 +297,28 @@ namespace llvm {
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
- bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
- bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG);
+ unsigned ShuffleKind, SelectionDAG &DAG);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary, SelectionDAG &DAG);
+ unsigned ShuffleKind, SelectionDAG &DAG);
- /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
- /// amount, otherwise return -1.
- int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG);
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
+ /// shift amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
@@ -344,7 +345,7 @@ namespace llvm {
const PPCSubtarget &Subtarget;
public:
- explicit PPCTargetLowering(PPCTargetMachine &TM);
+ explicit PPCTargetLowering(const PPCTargetMachine &TM);
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
@@ -355,6 +356,11 @@ namespace llvm {
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ /// Return true if target always beneficiates from combining into FMA for a
+ /// given value type. This must typically return false on targets where FMA
+ /// takes more cycles to execute than FADD.
+ bool enableAggressiveFMAFusion(EVT VT) const override;
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -403,6 +409,11 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+ Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+ bool IsStore, bool IsLoad) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
@@ -472,6 +483,10 @@ namespace llvm {
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const override;
+
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
@@ -490,9 +505,10 @@ namespace llvm {
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- bool allowsUnalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align = 1,
+ bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -510,6 +526,20 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const override;
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+ // We support any array type as "consecutive" block in the parameter
+ // save area. The element type defines the alignment requirement and
+ // whether the argument should go in GPRs, FPRs, or VRs if available.
+ //
+ // Note that clang uses this capability both to implement the ELFv2
+ // homogeneous float/vector aggregate ABI, and to avoid having to use
+ // "byval" when passing aggregates that might fully fit in registers.
+ return Ty->isArrayTy();
+ }
+
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -533,6 +563,8 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
+ SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
@@ -666,8 +698,12 @@ namespace llvm {
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
- SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const override;
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9318f70..9a19abb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -188,6 +188,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
+ (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
+
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
@@ -786,7 +789,7 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
-// The following three definitions are selected for small code model only.
+// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
@@ -801,8 +804,12 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocCPT",
+ [(set i64:$rD,
+ (PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
"ld 2, $src", IIC_LdStLD,
[(PPCload_toc ixaddr:$src)]>, isPPC64;
@@ -872,11 +879,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsADDR",
- [(set i64:$rD,
- (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
@@ -887,11 +889,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
- "#GETtlsldADDR",
- [(set i64:$rD,
- (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
- isPPC64;
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
@@ -1135,3 +1132,9 @@ def : Pat<(i64 (unaligned4load xoaddr:$src)),
def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
+// 64-bits atomic loads and stores
+def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
+def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
+
+def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index dce46d8..4ef08eb 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -22,110 +22,143 @@ def vnot_ppc : PatFrag<(ops node:$in),
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
- *CurDAG);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
}]>;
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
- *CurDAG);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
- *CurDAG);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
- *CurDAG);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
}]>;
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
}]>;
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
}]>;
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
}]>;
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
}]>;
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
}]>;
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
}]>;
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
}]>;
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
}]>;
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
- *CurDAG);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
}]>;
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
}]>;
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
}]>;
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
- *CurDAG);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
}]>;
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG));
}]>;
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1;
+ return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1;
}], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG));
}]>;
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1;
+ return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1;
}], VSLDOI_unary_get_imm>;
+/// VSLDOI_swapped* - These fragments are provided for little-endian, where
+/// the inputs must be swapped for correct semantics.
+def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG));
+}]>;
+def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
@@ -242,48 +275,64 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
-let isCodeGenOnly = 1 in {
-def DSS : DSS_Form<822, (outs),
- (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSSALL : DSS_Form<822, (outs),
- (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DST : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTT : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTST : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
-def DSTSTT : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
- Deprecated<DeprecatedDST>;
+def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
+ "dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>,
+ Deprecated<DeprecatedDST> {
+ let A = 0;
+ let B = 0;
+}
-def DST64 : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+def DSSALL : DSS_Form<1, 822, (outs), (ins),
+ "dssall", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dssall)]>,
+ Deprecated<DeprecatedDST> {
+ let STRM = 0;
+ let A = 0;
+ let B = 0;
+}
+
+def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTT64 : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTST64 : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTSTT64 : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+
+def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
+
+let isCodeGenOnly = 1 in {
+ // The very same instructions as above, but formally matching 64bit registers.
+ def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
+
+ def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i64:$rA, i32:$rB,
+ imm:$STRM)]>,
+ Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
@@ -731,30 +780,6 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
// Additional Altivec Patterns
//
-// DS* intrinsics
-def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
-def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
-
-// * 32-bit
-def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
- (DST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
- (DSTT 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
- (DSTST 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
- (DSTSTT 1, imm:$STRM, $rA, $rB)>;
-
-// * 64-bit
-def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
- (DST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
- (DSTT64 1, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
- (DSTST64 0, imm:$STRM, $rA, $rB)>;
-def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
- (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
-
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -789,6 +814,16 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
(VPKUHUM $vA, $vA)>;
+// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
+// These fragments are matched for little-endian, where the inputs must
+// be swapped for correct semantics.
+def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB),
+ (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>;
+def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUWUM $vB, $vA)>;
+def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUHUM $vB, $vA)>;
+
// Match vmrg*(x,x)
def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
(VMRGLB $vA, $vA)>;
@@ -803,6 +838,22 @@ def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
(VMRGHW $vA, $vA)>;
+// Match vmrg*(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be
+// swapped for correct semantics.
+def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLB $vB, $vA)>;
+def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLH $vB, $vA)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLW $vB, $vA)>;
+def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHB $vB, $vA)>;
+def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHH $vB, $vA)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHW $vB, $vA)>;
+
// Logical Operations
def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index b424d11..cf71b1c 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -17,8 +17,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_INSTRBUILDER_H
-#define POWERPC_INSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRBUILDER_H
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 1e4396c..aa68497 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -380,6 +380,11 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
let Inst{31} = RC;
}
+class XForm_tlb<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : XForm_base_r3xo<31, xo, OOL, IOL, asmstr, itin, []> {
+ let RST = 0;
+}
+
// This is the same as XForm_base_r3xo, but the first two operands are swapped
// when code is emitted.
class XForm_base_r3xo_swapped
@@ -417,6 +422,22 @@ class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let B = 0;
}
+class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<1> WS;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{20} = WS;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -457,6 +478,52 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_icbt<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<4> CT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6} = 0;
+ let Inst{7-10} = CT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_sr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<4> SR;
+
+ let Inst{6-10} = RS;
+ let Inst{12-15} = SR;
+ let Inst{21-30} = xo;
+}
+
+class XForm_mbar<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> MO;
+
+ let Inst{6-10} = MO;
+ let Inst{21-30} = xo;
+}
+
+class XForm_srin<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<5> RB;
+
+ let Inst{6-10} = RS;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+}
+
class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -764,10 +831,9 @@ class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
// DSS_Form - Form X instruction, used for altivec dss* instructions.
-class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<31, OOL, IOL, asmstr, itin> {
- bits<1> T;
bits<2> STRM;
bits<5> A;
bits<5> B;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9bac91d..daf8790 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -75,7 +75,7 @@ PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
const InstrItineraryData *II =
- &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
+ static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG);
}
@@ -331,6 +331,11 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opcode));
}
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(PPC::NOP);
+}
+
// Branch analysis.
// Note: If the condition register is set to CTR or CTR8 then this is a
// BDNZ (imm == 1) or BDZ (imm == 0) branch.
@@ -1617,6 +1622,7 @@ protected:
bool Changed = false;
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I) {
MachineInstr *MI = I;
@@ -1682,16 +1688,26 @@ protected:
// In theory, there could be other uses of the addend copy before this
// fma. We could deal with this, but that would require additional
// logic below and I suspect it will not occur in any relevant
- // situations.
- bool OtherUsers = false;
+ // situations. Additionally, check whether the copy source is killed
+ // prior to the fma. In order to replace the addend here with the
+ // source of the copy, it must still be live here. We can't use
+ // interval testing for a physical register, so as long as we're
+ // walking the MIs we may as well test liveness here.
+ bool OtherUsers = false, KillsAddendSrc = false;
for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
- J != JE; --J)
+ J != JE; --J) {
if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
OtherUsers = true;
break;
}
+ if (J->modifiesRegister(AddendSrcReg, TRI) ||
+ J->killsRegister(AddendSrcReg, TRI)) {
+ KillsAddendSrc = true;
+ break;
+ }
+ }
- if (OtherUsers)
+ if (OtherUsers || KillsAddendSrc)
continue;
// Find one of the product operands that is killed by this instruction.
@@ -1712,10 +1728,11 @@ protected:
if (!KilledProdOp)
continue;
- // In order to replace the addend here with the source of the copy,
- // it must still be live here.
- if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx))
- continue;
+ // For virtual registers, verify that the addend source register
+ // is live here (as should have been assured above).
+ assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) ||
+ LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) &&
+ "Addend source register is not live!");
// Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
@@ -1737,6 +1754,12 @@ protected:
unsigned OldFMAReg = MI->getOperand(0).getReg();
+ // The transformation doesn't work well with things like:
+ // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5;
+ // so leave such things alone.
+ if (OldFMAReg == KilledProdReg)
+ continue;
+
assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
"Addend copy not tied to old FMA output!");
@@ -1827,7 +1850,7 @@ public:
LIS = &getAnalysis<LiveIntervals>();
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -1980,7 +2003,7 @@ public:
// If we don't have VSX on the subtarget, don't do anything.
if (!TM->getSubtargetImpl()->hasVSX())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -2057,7 +2080,7 @@ public:
// If we don't have VSX don't bother doing anything here.
if (!TM->getSubtargetImpl()->hasVSX())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
@@ -2214,7 +2237,7 @@ protected:
public:
bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 83f14c6..4d310fe 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC_INSTRUCTIONINFO_H
-#define POWERPC_INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
#include "PPC.h"
#include "PPCRegisterInfo.h"
@@ -228,6 +228,8 @@ public:
/// instruction may be. This returns the maximum number of bytes.
///
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index c2e3382..8c76c46 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -57,6 +57,9 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
+def tocentry32 : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
@@ -107,10 +110,8 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
-def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
-def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
[SDNPHasChain]>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
@@ -133,9 +134,15 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -417,6 +424,15 @@ def u2imm : Operand<i32> {
let PrintMethod = "printU2ImmOperand";
let ParserMatchClass = PPCU2ImmAsmOperand;
}
+
+def PPCU4ImmAsmOperand : AsmOperandClass {
+ let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u4imm : Operand<i32> {
+ let PrintMethod = "printU4ImmOperand";
+ let ParserMatchClass = PPCU4ImmAsmOperand;
+}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
@@ -446,7 +462,7 @@ def u6imm : Operand<i32> {
}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
@@ -456,7 +472,7 @@ def s16imm : Operand<i32> {
}
def PPCU16ImmAsmOperand : AsmOperandClass {
let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addU16ImmOperands";
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
@@ -466,7 +482,7 @@ def u16imm : Operand<i32> {
}
def PPCS17ImmAsmOperand : AsmOperandClass {
let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def s17imm : Operand<i32> {
// This operand type is used for addis/lis to allow the assembler parser
@@ -542,7 +558,7 @@ def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
def PPCDispRIOperand : AsmOperandClass {
let Name = "DispRI"; let PredicateMethod = "isS16Imm";
- let RenderMethod = "addImmOperands";
+ let RenderMethod = "addS16ImmOperands";
}
def dispRI : Operand<iPTR> {
let ParserMatchClass = PPCDispRIOperand;
@@ -554,6 +570,27 @@ def PPCDispRIXOperand : AsmOperandClass {
def dispRIX : Operand<iPTR> {
let ParserMatchClass = PPCDispRIXOperand;
}
+def PPCDispSPE8Operand : AsmOperandClass {
+ let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE8 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE8Operand;
+}
+def PPCDispSPE4Operand : AsmOperandClass {
+ let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE4 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE4Operand;
+}
+def PPCDispSPE2Operand : AsmOperandClass {
+ let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
+ let RenderMethod = "addImmOperands";
+}
+def dispSPE2 : Operand<iPTR> {
+ let ParserMatchClass = PPCDispSPE2Operand;
+}
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
@@ -571,6 +608,21 @@ def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let EncoderMethod = "getMemRIXEncoding";
let DecoderMethod = "decodeMemRIXOperands";
}
+def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE8DisEncoding";
+}
+def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE4DisEncoding";
+}
+def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getSPE2DisEncoding";
+}
// A single-register address. This is used with the SjLj
// pseudo-instructions.
@@ -585,6 +637,12 @@ def tlsreg32 : Operand<i32> {
let EncoderMethod = "getTLSRegEncoding";
let ParserMatchClass = PPCTLSRegOperand;
}
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
// PowerPC Predicate operand.
def pred : Operand<OtherVT> {
@@ -611,6 +669,12 @@ def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
+def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">;
+def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">;
+def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">;
+def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
+def IsE500 : Predicate<"PPCSubTarget->isE500()">;
+def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -967,6 +1031,9 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let Defs = [LR] in
def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
+let Defs = [LR] in
+ def MoveGOTtoLR : Pseudo<(outs), (ins), "#MoveGOTtoLR", []>,
+ PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
@@ -1068,6 +1135,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
"bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
let isCodeGenOnly = 1 in {
+ def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
+ "bl $func", IIC_BrB, []>;
def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
@@ -1243,8 +1312,15 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
PPC970_DGroup_Single;
+def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
+ "icbt $CT, $src", IIC_LdStLoad>, Requires<[IsBookE]>;
+
def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
- (DCBT xoaddr:$dst)>;
+ (DCBT xoaddr:$dst)>; // data prefetch for loads
+def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)),
+ (DCBTST xoaddr:$dst)>; // data prefetch for stores
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)),
+ (ICBT 0, xoaddr:$dst)>; // inst prefetch (for read)
// Atomic operations
let usesCustomInserter = 1 in {
@@ -1628,17 +1704,19 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
"stmw $rS, $dst", IIC_LdStLMW, []>;
def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
- "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>;
+ "sync $L", IIC_LdStSync, []>;
let isCodeGenOnly = 1 in {
def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
- "msync", IIC_LdStSync, []>, Requires<[IsBookE]> {
+ "msync", IIC_LdStSync, []> {
let L = 0;
}
}
-def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
+def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
@@ -2355,6 +2433,8 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
+def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
+ (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -2393,13 +2473,47 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
[(set i32:$rD, (PPCppc32GOT))]>;
+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
+// This uses two output registers, the first as the real output, the second as a
+// temporary register, used internally in code generation.
+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+ []>, NoEncode<"$rT">;
+
def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
- "#LDgotTprelL32",
- [(set i32:$rD,
- (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+ "#LDgotTprelL32",
+ [(set i32:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
(ADD4TLS $in, tglobaltlsaddr:$g)>;
+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsgdL32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsldL32",
+ [(set i32:$rD,
+ (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDIdtprelL32",
+ [(set i32:$rD,
+ (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDISdtprelHA32",
+ [(set i32:$rD,
+ (PPCaddisDtprelHA i32:$reg,
+ tglobaltlsaddr:$disp))]>;
+
+// Support for Position-independent code
+def LWZtoc : Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+ "#LWZtoc",
+ [(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+// Get Global (GOT) Base Register offset, from the word immediately preceding
+// the function label.
+def UpdateGBR : Pseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+
+
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
@@ -2434,8 +2548,15 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
-def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
+// Only seq_cst fences require the heavyweight sync (SYNC 0).
+// All others can use the lightweight sync (SYNC 1).
+// source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+// The rule for seq_cst is duplicated to work with both 64 bits and 32 bits
+// versions of Power.
+def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -2454,6 +2575,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
(FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
include "PPCInstrAltivec.td"
+include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
@@ -2970,6 +3092,16 @@ def : Pat<(i1 (not (trunc i64:$in))),
// PowerPC Instructions used for assembler/disassembler only
//
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def LSWI : XForm_base_r3xo<31, 597, (outs gprc:$RT), (ins gprc:$A, u5imm:$B),
+ "lswi $RT, $A, $B", IIC_LdStLoad, []>;
+
+// FIXME: For B=0 or B > 8, the registers following RT are used.
+// WARNING: Do not add patterns for this instruction without fixing this.
+def STSWI : XForm_base_r3xo<31, 725, (outs), (ins gprc:$RT, gprc:$A, u5imm:$B),
+ "stswi $RT, $A, $B", IIC_LdStLoad, []>;
+
def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
"isync", IIC_SprISYNC, []>;
@@ -2982,9 +3114,47 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
"wait $L", IIC_LdStLoad, []>;
+def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO),
+ "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR),
+ "mtsr $SR, $RS", IIC_SprMTSR>;
+
+def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR),
+ "mfsr $RS, $SR", IIC_SprMFSR>;
+
+def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB),
+ "mtsrin $RS, $RB", IIC_SprMTSR>;
+
+def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB),
+ "mfsrin $RS, $RB", IIC_SprMFSR>;
+
def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
"mtmsr $RS, $L", IIC_SprMTMSR>;
+def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS),
+ "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> {
+ let L = 0;
+}
+
+def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>,
+ Requires<[IsBookE]> {
+ bits<1> E;
+
+ let Inst{16} = E;
+ let Inst{21-30} = 163;
+}
+
+def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B),
+ "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B),
+ "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
+
def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
"mfmsr $RT", IIC_SprMFMSR, []>;
@@ -3002,15 +3172,66 @@ def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
+def TLBIA : XForm_0<31, 370, (outs), (ins),
+ "tlbia", IIC_SprTLBIA, []>;
+
def TLBSYNC : XForm_0<31, 566, (outs), (ins),
"tlbsync", IIC_SprTLBSYNC, []>;
def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
"tlbiel $RB", IIC_SprTLBIEL, []>;
+def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB),
+ "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
+ "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
+
def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
"tlbie $RB,$RS", IIC_SprTLBIE, []>;
+def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B",
+ IIC_LdStLoad>, Requires<[IsBookE]>;
+
+def TLBRE : XForm_24_eieio<31, 946, (outs), (ins),
+ "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBWE : XForm_24_eieio<31, 978, (outs), (ins),
+ "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>;
+
+def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS),
+ "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS),
+ "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+
+def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>;
+def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
+ (ins gprc:$RST, gprc:$A, gprc:$B),
+ "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
+ Requires<[IsPPC4xx]>, isDOT;
+
+def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
+
+def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>,
+ Requires<[IsBookE]>;
+def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>,
+ Requires<[IsBookE]>;
+
+def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>,
+ Requires<[IsE500]>;
+def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>,
+ Requires<[IsE500]>;
+
+def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>;
+def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR),
+ "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
+
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
//
@@ -3033,15 +3254,17 @@ class PPCAsmPseudo<string asm, dag iops>
def : InstAlias<"sc", (SC 0)>;
-def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
-def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
-def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
+def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[HasSYNC]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>;
def : InstAlias<"wait", (WAIT 0)>;
def : InstAlias<"waitrsv", (WAIT 1)>;
def : InstAlias<"waitimpl", (WAIT 2)>;
+def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>;
+
def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -3050,9 +3273,57 @@ def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>
def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>;
+def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>;
+
+def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>;
+def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>;
+
+def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>;
+def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>;
+
+def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>;
+def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>;
+
+def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>;
+def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>;
+
+def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>;
+def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>;
+
+def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>;
+def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>;
+
+def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>;
+def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>;
+
+def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>;
+def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>;
+
+def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>;
+def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>;
+
+def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>;
+def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>;
+
def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>;
def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>;
+def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>;
+
+def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+
def : InstAlias<"xnop", (XORI R0, R0, 0)>;
def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
@@ -3063,6 +3334,60 @@ def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+foreach BATR = 0-3 in {
+ def : InstAlias<"mtdbatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtdbatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfdbatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatu "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatu $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mtibatl "#BATR#", $Rx",
+ (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>,
+ Requires<[IsPPC6xx]>;
+ def : InstAlias<"mfibatl $Rx, "#BATR,
+ (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>,
+ Requires<[IsPPC6xx]>;
+}
+
+foreach BR = 0-7 in {
+ def : InstAlias<"mfbr"#BR#" $Rx",
+ (MFDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+ def : InstAlias<"mtbr"#BR#" $Rx",
+ (MTDCR gprc:$Rx, !add(BR, 0x80))>,
+ Requires<[IsPPC4xx]>;
+}
+
+def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>;
+
+def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>;
+def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>;
+
+def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>;
+def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>;
+
def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
@@ -3082,25 +3407,25 @@ def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
-def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
-
-def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
-def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
-def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
-def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>;
+def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>;
-def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
-
-def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
-def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
-def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
-def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+foreach SPRG = 0-3 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>;
+}
+foreach SPRG = 4-7 in {
+ def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+ def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>,
+ Requires<[IsBookE]>;
+}
def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
@@ -3119,6 +3444,15 @@ def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>,
+ Requires<[IsPPC4xx]>;
+def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>,
+ Requires<[IsPPC4xx]>;
+
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
@@ -3367,3 +3701,18 @@ defm : TrapExtendedMnemonic<"lnl", 5>;
defm : TrapExtendedMnemonic<"lng", 6>;
defm : TrapExtendedMnemonic<"u", 31>;
+// Atomic loads
+def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>;
+def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>;
+def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>;
+def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>;
+def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>;
+def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>;
+
+// Atomic stores
+def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>;
+def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;
diff --git a/lib/Target/PowerPC/PPCInstrSPE.td b/lib/Target/PowerPC/PPCInstrSPE.td
new file mode 100644
index 0000000..cc3a4d2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrSPE.td
@@ -0,0 +1,447 @@
+//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Signal Processing Engine extension to
+// the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+class EVXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : EVXForm_1<xo, OOL, IOL, asmstr, itin> {
+ let RB = 0;
+}
+
+class EVXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<3> crD;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = [];
+
+ let Inst{6-8} = crD;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-31} = xo;
+}
+
+class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin> : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<21> D;
+
+ let Pattern = [];
+
+ let Inst{6-10} = RT;
+ let Inst{20} = D{0};
+ let Inst{19} = D{1};
+ let Inst{18} = D{2};
+ let Inst{17} = D{3};
+ let Inst{16} = D{4};
+ let Inst{15} = D{5};
+ let Inst{14} = D{6};
+ let Inst{13} = D{7};
+ let Inst{12} = D{8};
+ let Inst{11} = D{9};
+ let Inst{11-20} = D{0-9};
+ let Inst{21-31} = xo;
+}
+
+let Predicates = [HasSPE], isAsmParserOnly = 1 in {
+
+def EVLDD : EVXForm_D<769, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldd $RT, $dst", IIC_VecFP>;
+def EVLDW : EVXForm_D<771, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldw $RT, $dst", IIC_VecFP>;
+def EVLDH : EVXForm_D<773, (outs gprc:$RT), (ins spe8dis:$dst),
+ "evldh $RT, $dst", IIC_VecFP>;
+def EVLHHESPLAT : EVXForm_D<777, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhesplat $RT, $dst", IIC_VecFP>;
+def EVLHHOUSPLAT : EVXForm_D<781, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhousplat $RT, $dst", IIC_VecFP>;
+def EVLHHOSSPLAT : EVXForm_D<783, (outs gprc:$RT), (ins spe2dis:$dst),
+ "evlhhossplat $RT, $dst", IIC_VecFP>;
+def EVLWHE : EVXForm_D<785, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhe $RT, $dst", IIC_VecFP>;
+def EVLWHOU : EVXForm_D<789, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhou $RT, $dst", IIC_VecFP>;
+def EVLWHOS : EVXForm_D<791, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhos $RT, $dst", IIC_VecFP>;
+def EVLWWSPLAT : EVXForm_D<793, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwwsplat $RT, $dst", IIC_VecFP>;
+def EVLWHSPLAT : EVXForm_D<797, (outs gprc:$RT), (ins spe4dis:$dst),
+ "evlwhsplat $RT, $dst", IIC_VecFP>;
+
+def EVSTDD : EVXForm_D<801, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdd $RT, $dst", IIC_VecFP>;
+def EVSTDH : EVXForm_D<805, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdh $RT, $dst", IIC_VecFP>;
+def EVSTDW : EVXForm_D<803, (outs), (ins gprc:$RT, spe8dis:$dst),
+ "evstdw $RT, $dst", IIC_VecFP>;
+def EVSTWHE : EVXForm_D<817, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwhe $RT, $dst", IIC_VecFP>;
+def EVSTWHO : EVXForm_D<821, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwho $RT, $dst", IIC_VecFP>;
+def EVSTWWE : EVXForm_D<825, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwe $RT, $dst", IIC_VecFP>;
+def EVSTWWO : EVXForm_D<829, (outs), (ins gprc:$RT, spe4dis:$dst),
+ "evstwwo $RT, $dst", IIC_VecFP>;
+
+def EVMRA : EVXForm_1<1220, (outs gprc:$RT), (ins gprc:$RA),
+ "evmra $RT, $RA", IIC_VecFP> {
+ let RB = 0;
+}
+
+def BRINC : EVXForm_1<527, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "brinc $RT, $RA, $RB", IIC_VecFP>;
+def EVABS : EVXForm_2<520, (outs gprc:$RT), (ins gprc:$RA),
+ "evabs $RT, $RA", IIC_VecFP>;
+
+def EVADDIW : EVXForm_1<514, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evaddiw $RT, $RB, $RA", IIC_VecFP>;
+def EVADDSMIAAW : EVXForm_2<1225, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddsmiaaw $RT, $RA", IIC_VecFP>;
+def EVADDSSIAAW : EVXForm_2<1217, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddssiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUSIAAW : EVXForm_2<1216, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddusiaaw $RT, $RA", IIC_VecFP>;
+def EVADDUMIAAW : EVXForm_2<1224, (outs gprc:$RT), (ins gprc:$RA),
+ "evaddumiaaw $RT, $RA", IIC_VecFP>;
+def EVADDW : EVXForm_1<512, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evaddw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVAND : EVXForm_1<529, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evand $RT, $RA, $RB", IIC_VecFP>;
+def EVANDC : EVXForm_1<530, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evandc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVCMPEQ : EVXForm_3<564, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpeq $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTS : EVXForm_3<561, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPGTU : EVXForm_3<560, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpgtu $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTS : EVXForm_3<563, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmplts $crD, $RA, $RB", IIC_VecFP>;
+def EVCMPLTU : EVXForm_3<562, (outs crrc:$crD), (ins gprc:$RA, gprc:$RB),
+ "evcmpltu $crD, $RA, $RB", IIC_VecFP>;
+
+def EVCNTLSW : EVXForm_2<526, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlsw $RT, $RA", IIC_VecFP>;
+def EVCNTLZW : EVXForm_2<525, (outs gprc:$RT), (ins gprc:$RA),
+ "evcntlzw $RT, $RA", IIC_VecFP>;
+
+def EVDIVWS : EVXForm_1<1222, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivws $RT, $RA, $RB", IIC_VecFP>;
+def EVDIVWU : EVXForm_1<1223, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evdivwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEQV : EVXForm_1<537, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "eveqv $RT, $RA, $RB", IIC_VecFP>;
+
+def EVEXTSB : EVXForm_2<522, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsb $RT, $RA", IIC_VecFP>;
+def EVEXTSH : EVXForm_2<523, (outs gprc:$RT), (ins gprc:$RA),
+ "evextsh $RT, $RA", IIC_VecFP>;
+
+def EVLDDX : EVXForm_1<768, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlddx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDWX : EVXForm_1<770, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldwx $RT, $RA, $RB", IIC_VecFP>;
+def EVLDHX : EVXForm_1<772, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evldhx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHESPLATX : EVXForm_1<776, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhesplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOUSPLATX : EVXForm_1<780, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhousplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLHHOSSPLATX : EVXForm_1<782, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlhhossplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHEX : EVXForm_1<784, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOUX : EVXForm_1<788, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhoux $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHOSX : EVXForm_1<790, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhosx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWWSPLATX : EVXForm_1<792, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwwsplatx $RT, $RA, $RB", IIC_VecFP>;
+def EVLWHSPLATX : EVXForm_1<796, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evlwhsplatx $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMERGEHI : EVXForm_1<556, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehi $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELO : EVXForm_1<557, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGEHILO : EVXForm_1<558, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergehilo $RT, $RA, $RB", IIC_VecFP>;
+def EVMERGELOHI : EVXForm_1<559, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmergelohi $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHEGSMFAA : EVXForm_1<1323, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMFAN : EVXForm_1<1451, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAA : EVXForm_1<1321, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGSMIAN : EVXForm_1<1449, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAA : EVXForm_1<1320, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEGUMIAN : EVXForm_1<1448, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhegumian $RT, $RA, $RB", IIC_VecFP>;
+
+def EVMHESMF : EVXForm_1<1035, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFA : EVXForm_1<1067, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFAAW : EVXForm_1<1291, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMFANW : EVXForm_1<1419, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMI : EVXForm_1<1033, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIA : EVXForm_1<1065, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIAAW : EVXForm_1<1289, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESMIANW : EVXForm_1<1417, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhesmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSF : EVXForm_1<1027, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFA : EVXForm_1<1059, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFAAW : EVXForm_1<1283, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSFANW : EVXForm_1<1411, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIAAW : EVXForm_1<1281, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHESSIANW : EVXForm_1<1409, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhessianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMI : EVXForm_1<1032, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIA : EVXForm_1<1064, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIAAW : EVXForm_1<1288, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUMIANW : EVXForm_1<1416, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIAAW : EVXForm_1<1280, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHEUSIANW : EVXForm_1<1408, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmheusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAA : EVXForm_1<1327, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMFAN : EVXForm_1<1455, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAA : EVXForm_1<1325, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGSMIAN : EVXForm_1<1453, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAA : EVXForm_1<1324, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOGUMIAN : EVXForm_1<1452, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhogumian $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMF : EVXForm_1<1039, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFA : EVXForm_1<1071, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFAAW : EVXForm_1<1295, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMFANW : EVXForm_1<1423, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMI : EVXForm_1<1037, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIA : EVXForm_1<1069, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIAAW : EVXForm_1<1293, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSMIANW : EVXForm_1<1421, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhosmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSF : EVXForm_1<1031, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossf $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFA : EVXForm_1<1063, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFAAW : EVXForm_1<1287, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSFANW : EVXForm_1<1415, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossfanw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIAAW : EVXForm_1<1285, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOSSIANW : EVXForm_1<1413, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhossianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMI : EVXForm_1<1036, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIA : EVXForm_1<1068, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIAAW : EVXForm_1<1292, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUMIANW : EVXForm_1<1420, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhoumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIAAW : EVXForm_1<1284, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMHOUSIANW : EVXForm_1<1412, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmhousianw $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVMWHSMF : EVXForm_1<1103, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMFA : EVXForm_1<1135, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMI : EVXForm_1<1101, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSMIA : EVXForm_1<1133, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSF : EVXForm_1<1095, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHSSFA : EVXForm_1<1127, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMI : EVXForm_1<1100, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWHUMIA : EVXForm_1<1132, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwhumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIAAW : EVXForm_1<1353, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSMIANW : EVXForm_1<1481, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlsmianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIAAW : EVXForm_1<1345, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLSSIANW : EVXForm_1<1473, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlssianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMI : EVXForm_1<1096, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIA : EVXForm_1<1128, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIAAW : EVXForm_1<1352, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUMIANW : EVXForm_1<1480, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlumianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIAAW : EVXForm_1<1344, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusiaaw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWLUSIANW : EVXForm_1<1472, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwlusianw $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMF : EVXForm_1<1115, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFA : EVXForm_1<1147, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAA : EVXForm_1<1371, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMFAN : EVXForm_1<1499, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMI : EVXForm_1<1113, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIA : EVXForm_1<1145, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAA : EVXForm_1<1369, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSMIAN : EVXForm_1<1497, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwsmian $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSF : EVXForm_1<1107, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssf $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFA : EVXForm_1<1139, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAA : EVXForm_1<1363, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWSSFAN : EVXForm_1<1491, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwssfan $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMI : EVXForm_1<1112, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumi $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIA : EVXForm_1<1144, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumia $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAA : EVXForm_1<1368, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumiaa $RT, $RA, $RB", IIC_VecFP>;
+def EVMWUMIAN : EVXForm_1<1496, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evmwumian $RT, $RA, $RB", IIC_VecFP>;
+
+
+def EVNAND : EVXForm_1<542, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnand $RT, $RA, $RB", IIC_VecFP>;
+
+def EVNEG : EVXForm_2<521, (outs gprc:$RT), (ins gprc:$RA),
+ "evneg $RT, $RA", IIC_VecFP>;
+
+def EVNOR : EVXForm_1<536, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evnor $RT, $RA, $RB", IIC_VecFP>;
+def EVOR : EVXForm_1<535, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evor $RT, $RA, $RB", IIC_VecFP>;
+def EVORC : EVXForm_1<539, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evorc $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRLWI : EVXForm_1<554, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evrlwi $RT, $RA, $RB", IIC_VecFP>;
+def EVRLW : EVXForm_1<552, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evrlw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVRNDW : EVXForm_2<524, (outs gprc:$RT), (ins gprc:$RA),
+ "evrndw $RT, $RA", IIC_VecFP>;
+
+def EVSLWI : EVXForm_1<550, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evslwi $RT, $RA, $RB", IIC_VecFP>;
+def EVSLW : EVXForm_1<548, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evslw $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSPLATFI : EVXForm_2<555, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplatfi $RT, $RA", IIC_VecFP>;
+def EVSPLATI : EVXForm_2<553, (outs gprc:$RT), (ins i32imm:$RA),
+ "evsplati $RT, $RA", IIC_VecFP>;
+
+def EVSRWIS : EVXForm_1<547, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwis $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWIU : EVXForm_1<546, (outs gprc:$RT), (ins gprc:$RA, u5imm:$RB),
+ "evsrwiu $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWS : EVXForm_1<545, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrws $RT, $RA, $RB", IIC_VecFP>;
+def EVSRWU : EVXForm_1<544, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsrwu $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSTDDX : EVXForm_1<800, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstddx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDHX : EVXForm_1<804, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdhx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTDWX : EVXForm_1<802, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstdwx $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHEX : EVXForm_1<816, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWHOX : EVXForm_1<820, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwhox $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWEX : EVXForm_1<824, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwex $RT, $RA, $RB", IIC_VecFP>;
+def EVSTWWOX : EVXForm_1<828, (outs), (ins gprc:$RT, gprc:$RA, gprc:$RB),
+ "evstwwox $RT, $RA, $RB", IIC_VecFP>;
+
+def EVSUBFSSIAAW : EVXForm_2<1219, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfssiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFSMIAAW : EVXForm_2<1227, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfsmiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUMIAAW : EVXForm_2<1226, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfumiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFUSIAAW : EVXForm_2<1218, (outs gprc:$RT), (ins gprc:$RA),
+ "evsubfusiaaw $RT, $RA", IIC_VecFP>;
+def EVSUBFW : EVXForm_1<516, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evsubfw $RT, $RA, $RB", IIC_VecFP>;
+def EVSUBIFW : EVXForm_1<518, (outs gprc:$RT), (ins u5imm:$RA, gprc:$RB),
+ "evsubifw $RT, $RA, $RB", IIC_VecFP>;
+def EVXOR : EVXForm_1<534, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "evxor $RT, $RA, $RB", IIC_VecFP>;
+
+} // HasSPE
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 49bcc48..2c8f998 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -47,23 +47,24 @@ let Uses = [RM] in {
// Load indexed instructions
let mayLoad = 1, canFoldAsLoad = 1 in {
- def LXSDX : XForm_1<31, 588,
+ def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
- def LXVD2X : XForm_1<31, 844,
+ def LXVD2X : XX1Form<31, 844,
(outs vsrc:$XT), (ins memrr:$src),
"lxvd2x $XT, $src", IIC_LdStLFD,
- [(set v2f64:$XT, (load xoaddr:$src))]>;
+ [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
- def LXVDSX : XForm_1<31, 332,
+ def LXVDSX : XX1Form<31, 332,
(outs vsrc:$XT), (ins memrr:$src),
"lxvdsx $XT, $src", IIC_LdStLFD, []>;
- def LXVW4X : XForm_1<31, 780,
+ def LXVW4X : XX1Form<31, 780,
(outs vsrc:$XT), (ins memrr:$src),
- "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+ "lxvw4x $XT, $src", IIC_LdStLFD,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
}
// Store indexed instructions
@@ -76,11 +77,12 @@ let Uses = [RM] in {
def STXVD2X : XX1Form<31, 972,
(outs), (ins vsrc:$XT, memrr:$dst),
"stxvd2x $XT, $dst", IIC_LdStSTFD,
- [(store v2f64:$XT, xoaddr:$dst)]>;
+ [(int_ppc_vsx_stxvd2x v2f64:$XT, xoaddr:$dst)]>;
def STXVW4X : XX1Form<31, 908,
(outs), (ins vsrc:$XT, memrr:$dst),
- "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+ "stxvw4x $XT, $dst", IIC_LdStSTFD,
+ [(int_ppc_vsx_stxvw4x v4i32:$XT, xoaddr:$dst)]>;
}
// Add/Mul Instructions
@@ -641,24 +643,36 @@ let Uses = [RM] in {
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmaxdp vsfrc:$XA, vsfrc:$XB))]>;
def XSMINDP : XX3Form<60, 168,
(outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
- "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xsmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsfrc:$XT,
+ (int_ppc_vsx_xsmindp vsfrc:$XA, vsfrc:$XB))]>;
def XVMAXDP : XX3Form<60, 224,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxdp vsrc:$XA, vsrc:$XB))]>;
def XVMINDP : XX3Form<60, 232,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmindp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmindp vsrc:$XA, vsrc:$XB))]>;
def XVMAXSP : XX3Form<60, 192,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvmaxsp vsrc:$XA, vsrc:$XB))]>;
def XVMINSP : XX3Form<60, 200,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+ "xvminsp $XT, $XA, $XB", IIC_VecFP,
+ [(set vsrc:$XT,
+ (int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
} // isCommutable
} // Uses = [RM]
@@ -715,6 +729,31 @@ let Uses = [RM] in {
(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
} // neverHasSideEffects
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+
+ def SELECT_CC_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+ "#SELECT_CC_VSRC",
+ []>;
+ def SELECT_VSRC: Pseudo<(outs vsrc:$dst),
+ (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+ "#SELECT_VSRC",
+ [(set v2f64:$dst,
+ (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+ def SELECT_CC_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSFRC",
+ []>;
+ def SELECT_VSFRC: Pseudo<(outs f8rc:$dst),
+ (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+ "#SELECT_VSFRC",
+ [(set f64:$dst,
+ (select i1:$cond, f64:$T, f64:$F))]>;
+} // usesCustomInserter
} // AddedComplexity
def : InstAlias<"xvmovdp $XT, $XB",
@@ -811,6 +850,49 @@ def : Pat<(sext_inreg v2i64:$C, v2i32),
def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
(XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+// Loads.
+def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+ (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+ (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+ (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+ (XVDIVDP $A, $B)>;
+
} // AddedComplexity
} // HasVSX
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
deleted file mode 100644
index e5f113a..0000000
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the 32-bit PowerPC target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCJITInfo.h"
-#include "PPCRelocations.h"
-#include "PPCSubtarget.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
- : Subtarget(STI), is64Bit(STI.isPPC64()) {
- useGOT = 0;
-}
-
-#define BUILD_ADDIS(RD,RS,IMM16) \
- ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-#define BUILD_ORI(RD,RS,UIMM16) \
- ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_ORIS(RD,RS,UIMM16) \
- ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
-#define BUILD_RLDICR(RD,RS,SH,ME) \
- ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
- (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
-#define BUILD_MTSPR(RS,SPR) \
- ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
-#define BUILD_BCCTRx(BO,BI,LINK) \
- ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
-#define BUILD_B(TARGET, LINK) \
- ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
-
-// Pseudo-ops
-#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
-#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
-#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
-#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
-
-static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
- intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
- unsigned *AtI = (unsigned*)(intptr_t)At;
-
- if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
- AtI[0] = BUILD_B(Offset, isCall); // b/bl target
- } else if (!is64Bit) {
- AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
- AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
- AtI[2] = BUILD_MTCTR(12); // mtctr r12
- AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
- } else {
- AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
- AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
- AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
- AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
- AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
- AtI[5] = BUILD_MTCTR(12); // mtctr r12
- AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
- }
-}
-
-extern "C" void PPC32CompilationCallback();
-extern "C" void PPC64CompilationCallback();
-
-// The first clause of the preprocessor directive looks wrong, but it is
-// necessary when compiling this code on non-PowerPC hosts.
-#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__)
-void PPC32CompilationCallback() {
- llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!");
-}
-#elif !defined(__ELF__)
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
- ".text\n"
- ".align 2\n"
- ".globl _PPC32CompilationCallback\n"
-"_PPC32CompilationCallback:\n"
- // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
- // FIXME: need to save v[0-19] for altivec?
- // FIXME: could shrink frame
- // Set up a proper stack frame
- // FIXME Layout
- // PowerPC32 ABI linkage - 24 bytes
- // parameters - 32 bytes
- // 13 double registers - 104 bytes
- // 8 int registers - 32 bytes
- "mflr r0\n"
- "stw r0, 8(r1)\n"
- "stwu r1, -208(r1)\n"
- // Save all int arg registers
- "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
- "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
- "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
- "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
- // Save all call-clobbered FP regs.
- "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
- "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
- "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
- "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
- "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
- "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
- "stfd f1, 72(r1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 0.
- "mr r3, r0\n"
- "lwz r2, 208(r1)\n" // stub's frame
- "lwz r4, 8(r2)\n" // stub's lr
- "li r5, 0\n" // 0 == 32 bit
- "bl _LLVMPPCCompilationCallback\n"
- "mtctr r3\n"
- // Restore all int arg registers
- "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
- "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
- "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
- "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
- // Restore all FP arg registers
- "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
- "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
- "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
- "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
- "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
- "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
- "lfd f1, 72(r1)\n"
- // Pop 3 frames off the stack and branch to target
- "lwz r1, 208(r1)\n"
- "lwz r2, 8(r1)\n"
- "mtlr r2\n"
- "bctr\n"
- );
-
-#else
-// ELF PPC 32 support
-
-// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
-// write our own wrapper, which does things our way, so we have complete control
-// over register saving and restoring.
-asm(
- ".text\n"
- ".align 2\n"
- ".globl PPC32CompilationCallback\n"
-"PPC32CompilationCallback:\n"
- // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
- // FIXME: need to save v[0-19] for altivec?
- // FIXME: could shrink frame
- // Set up a proper stack frame
- // FIXME Layout
- // 8 double registers - 64 bytes
- // 8 int registers - 32 bytes
- "mflr 0\n"
- "stw 0, 4(1)\n"
- "stwu 1, -104(1)\n"
- // Save all int arg registers
- "stw 10, 100(1)\n" "stw 9, 96(1)\n"
- "stw 8, 92(1)\n" "stw 7, 88(1)\n"
- "stw 6, 84(1)\n" "stw 5, 80(1)\n"
- "stw 4, 76(1)\n" "stw 3, 72(1)\n"
- // Save all call-clobbered FP regs.
- "stfd 8, 64(1)\n"
- "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
- "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
- "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
- "stfd 1, 8(1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 0.
- "mr 3, 0\n"
- "lwz 5, 104(1)\n" // stub's frame
- "lwz 4, 4(5)\n" // stub's lr
- "li 5, 0\n" // 0 == 32 bit
- "bl LLVMPPCCompilationCallback\n"
- "mtctr 3\n"
- // Restore all int arg registers
- "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
- "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
- "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
- "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
- // Restore all FP arg registers
- "lfd 8, 64(1)\n"
- "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
- "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
- "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
- "lfd 1, 8(1)\n"
- // Pop 3 frames off the stack and branch to target
- "lwz 1, 104(1)\n"
- "lwz 0, 4(1)\n"
- "mtlr 0\n"
- "bctr\n"
- );
-#endif
-
-#if !defined(__powerpc64__) && !defined(__ppc64__)
-void PPC64CompilationCallback() {
- llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!");
-}
-#else
-# ifdef __ELF__
-asm(
- ".text\n"
- ".align 2\n"
- ".globl PPC64CompilationCallback\n"
-#if _CALL_ELF == 2
- ".type PPC64CompilationCallback,@function\n"
-"PPC64CompilationCallback:\n"
-#else
- ".section \".opd\",\"aw\",@progbits\n"
- ".align 3\n"
-"PPC64CompilationCallback:\n"
- ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
- ".size PPC64CompilationCallback,24\n"
- ".previous\n"
- ".align 4\n"
- ".type PPC64CompilationCallback,@function\n"
-".L.PPC64CompilationCallback:\n"
-#endif
-# else
-asm(
- ".text\n"
- ".align 2\n"
- ".globl _PPC64CompilationCallback\n"
-"_PPC64CompilationCallback:\n"
-# endif
- // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
- // FIXME: need to save v[0-19] for altivec?
- // Set up a proper stack frame
- // Layout
- // PowerPC64 ABI linkage - 48 bytes
- // parameters - 64 bytes
- // 13 double registers - 104 bytes
- // 8 int registers - 64 bytes
- "mflr 0\n"
- "std 0, 16(1)\n"
- "stdu 1, -280(1)\n"
- // Save all int arg registers
- "std 10, 272(1)\n" "std 9, 264(1)\n"
- "std 8, 256(1)\n" "std 7, 248(1)\n"
- "std 6, 240(1)\n" "std 5, 232(1)\n"
- "std 4, 224(1)\n" "std 3, 216(1)\n"
- // Save all call-clobbered FP regs.
- "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
- "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
- "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
- "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
- "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
- "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
- "stfd 1, 112(1)\n"
- // Arguments to Compilation Callback:
- // r3 - our lr (address of the call instruction in stub plus 4)
- // r4 - stub's lr (address of instruction that called the stub plus 4)
- // r5 - is64Bit - always 1.
- "mr 3, 0\n" // return address (still in r0)
- "ld 5, 280(1)\n" // stub's frame
- "ld 4, 16(5)\n" // stub's lr
- "li 5, 1\n" // 1 == 64 bit
-# ifdef __ELF__
- "bl LLVMPPCCompilationCallback\n"
- "nop\n"
-# else
- "bl _LLVMPPCCompilationCallback\n"
-# endif
- "mtctr 3\n"
- // Restore all int arg registers
- "ld 10, 272(1)\n" "ld 9, 264(1)\n"
- "ld 8, 256(1)\n" "ld 7, 248(1)\n"
- "ld 6, 240(1)\n" "ld 5, 232(1)\n"
- "ld 4, 224(1)\n" "ld 3, 216(1)\n"
- // Restore all FP arg registers
- "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
- "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
- "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
- "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
- "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
- "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
- "lfd 1, 112(1)\n"
- // Pop 3 frames off the stack and branch to target
- "ld 1, 280(1)\n"
- "ld 0, 16(1)\n"
- "mtlr 0\n"
- // XXX: any special TOC handling in the ELF case for JIT?
- "bctr\n"
- );
-#endif
-
-extern "C" {
-LLVM_LIBRARY_VISIBILITY void *
-LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
- // Adjust the pointer to the address of the call instruction in the stub
- // emitted by emitFunctionStub, rather than the instruction after it.
- unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
- unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
-
- void *Target = JITCompilerFunction(StubCallAddr);
-
- // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
- // it to branch directly to the destination. If so, rewrite it so it does not
- // need to go through the stub anymore.
- unsigned OrigCallInst = *OrigCallAddr;
- if ((OrigCallInst >> 26) == 18) { // Direct call.
- intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
-
- if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
- // Clear the original target out.
- OrigCallInst &= (63 << 26) | 3;
- // Fill in the new target.
- OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
- // Replace the call.
- *OrigCallAddr = OrigCallInst;
- }
- }
-
- // Assert that we are coming from a stub that was created with our
- // emitFunctionStub.
- if ((*StubCallAddr >> 26) == 18)
- StubCallAddr -= 3;
- else {
- assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
- StubCallAddr -= is64Bit ? 9 : 6;
- }
-
- // Rewrite the stub with an unconditional branch to the target, for any users
- // who took the address of the stub.
- EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
-
- // Put the address of the target function to call and the address to return to
- // after calling the target function in a place that is easy to get on the
- // stack after we restore all regs.
- return Target;
-}
-}
-
-
-
-TargetJITInfo::LazyResolverFn
-PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
- JITCompilerFunction = Fn;
- return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
-}
-
-TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
- // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
- // instructions to save the caller's address if this is a lazy-compilation
- // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
- // into a register and jump through it.
- StubLayout Result = {10*4, 4};
- return Result;
-}
-
-#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
-defined(__APPLE__)
-extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
-#endif
-
-void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
- // If this is just a call to an external function, emit a branch instead of a
- // call. The code is the same except for one bit of the last instruction.
- if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
- Fn != (void*)(intptr_t)PPC64CompilationCallback) {
- void *Addr = (void*)JCE.getCurrentPCValue();
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(Addr, 7*4);
- return Addr;
- }
-
- void *Addr = (void*)JCE.getCurrentPCValue();
- if (is64Bit) {
- JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
- } else if (Subtarget.isDarwinABI()){
- JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
- } else {
- JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
- JCE.emitWordBE(0x7d6802a6); // mflr r11
- JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
- }
- intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- JCE.emitWordBE(0);
- EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
- sys::Memory::InvalidateInstructionCache(Addr, 10*4);
- return Addr;
-}
-
-
-void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
- intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
- switch ((PPC::RelocationType)MR->getRelocationType()) {
- default: llvm_unreachable("Unknown relocation type!");
- case PPC::reloc_pcrel_bx:
- // PC-relative relocation for b and bl instructions.
- ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
- assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
- "Relocation out of range!");
- *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
- break;
- case PPC::reloc_pcrel_bcx:
- // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
- // bcx instructions.
- ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
- assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
- "Relocation out of range!");
- *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
- break;
- case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
- case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
- ResultPtr += MR->getConstantVal();
-
- // If this is a high-part access, get the high-part.
- if (MR->getRelocationType() == PPC::reloc_absolute_high) {
- // If the low part will have a carry (really a borrow) from the low
- // 16-bits into the high 16, add a bit to borrow from.
- if (((int)ResultPtr << 16) < 0)
- ResultPtr += 1 << 16;
- ResultPtr >>= 16;
- }
-
- // Do the addition then mask, so the addition does not overflow the 16-bit
- // immediate section of the instruction.
- unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
- unsigned HighBits = *RelocPos & ~65535;
- *RelocPos = LowBits | HighBits; // Slam into low 16-bits
- break;
- }
- case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
- ResultPtr += MR->getConstantVal();
- // Do the addition then mask, so the addition does not overflow the 16-bit
- // immediate section of the instruction.
- unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
- unsigned HighBits = *RelocPos & 0xFFFF0003;
- *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
- break;
- }
- }
- }
-}
-
-void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
- sys::Memory::InvalidateInstructionCache(Old, 7*4);
-}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
deleted file mode 100644
index b6b37ff..0000000
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PowerPC implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef POWERPC_JITINFO_H
-#define POWERPC_JITINFO_H
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class PPCSubtarget;
-class PPCJITInfo : public TargetJITInfo {
-protected:
- PPCSubtarget &Subtarget;
- bool is64Bit;
-
-public:
- PPCJITInfo(PPCSubtarget &STI);
-
- StubLayout getStubLayout() override;
- void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) override;
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
- void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
- unsigned char *GOTBase) override;
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-};
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index f8e84a5..880b520 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
@@ -37,14 +38,16 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
const TargetMachine &TM = AP.TM;
Mangler *Mang = AP.Mang;
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
MCContext &Ctx = AP.OutContext;
+ bool isDarwin = Triple(TM.getTargetTriple()).isOSDarwin();
SmallString<128> Name;
StringRef Suffix;
- if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB)
- Suffix = "$stub";
- else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) {
+ if (isDarwin)
+ Suffix = "$stub";
+ } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
Suffix = "$non_lazy_ptr";
if (!Suffix.empty())
@@ -68,7 +71,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
// If the target flags on the operand changes the name of the symbol, do that
// before we return the symbol.
- if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) {
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI(AP).getFnStubEntry(Sym);
if (StubSym.getPointer())
@@ -134,8 +137,17 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
+ case PPCII::MO_TLSGD:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
+ break;
+ case PPCII::MO_TLSLD:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
+ break;
}
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
+ RefKind = MCSymbolRefExpr::VK_PLT;
+
const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
if (!MO.isJTI() && MO.getOffset())
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 6a0aec8..4aff95a 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,8 +8,17 @@
//===----------------------------------------------------------------------===//
#include "PPCMachineFunctionInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
void PPCFunctionInfo::anchor() { }
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+ const DataLayout *DL = MF.getSubtarget().getDataLayout();
+ return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
+ Twine(MF.getFunctionNumber())+"$poff");
+}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 33f843d..83de799 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPC_MACHINE_FUNCTION_INFO_H
-#define PPC_MACHINE_FUNCTION_INFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
@@ -92,6 +92,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// 64-bit SVR4 ABI.
SmallVector<unsigned, 3> MustSaveCRs;
+ /// Hold onto our MachineFunction context.
+ MachineFunction &MF;
+
+ /// Whether this uses the PIC Base register or not.
+ bool UsesPICBase;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -109,7 +115,9 @@ public:
VarArgsStackOffset(0),
VarArgsNumGPR(0),
VarArgsNumFPR(0),
- CRSpillFrameIndex(0) {}
+ CRSpillFrameIndex(0),
+ MF(MF),
+ UsesPICBase(0) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -170,6 +178,11 @@ public:
const SmallVectorImpl<unsigned> &
getMustSaveCRs() const { return MustSaveCRs; }
void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+
+ void setUsesPICBase(bool uses) { UsesPICBase = uses; }
+ bool usesPICBase() const { return UsesPICBase; }
+
+ MCSymbol *getPICOffsetSymbol() const;
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 17b836d..8a1d680 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCPERFECTSHUFFLE_H
+
// 31 entries have cost 0
// 292 entries have cost 1
// 1384 entries have cost 2
@@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index eca774e..9b9966f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -140,8 +140,8 @@ PPCRegisterInfo::getNoPreservedMask() const {
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
@@ -199,7 +199,16 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (PPCFI->needsFP(MF))
Reserved.set(PPC::R31);
- if (hasBasePointer(MF))
+ if (hasBasePointer(MF)) {
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ Reserved.set(PPC::R29);
+ else
+ Reserved.set(PPC::R30);
+ }
+
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -214,7 +223,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned
PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const unsigned DefaultSafety = 1;
switch (RC->getID()) {
@@ -278,7 +287,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Determine whether 64-bit pointers are used.
bool LP64 = Subtarget.isPPC64();
DebugLoc dl = MI.getDebugLoc();
@@ -289,7 +298,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned FrameSize = MFI->getStackSize();
// Get stack alignments.
- unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
"Maximum call-frame size not sufficiently aligned");
@@ -394,7 +406,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -438,7 +450,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -511,7 +523,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -554,7 +566,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -601,7 +613,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -626,7 +638,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -706,7 +718,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
// Get the instruction info.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -831,7 +843,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (!Subtarget.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
@@ -843,7 +855,14 @@ unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
if (!hasBasePointer(MF))
return getFrameRegister(MF);
- return Subtarget.isPPC64() ? PPC::X30 : PPC::R30;
+ if (Subtarget.isPPC64())
+ return PPC::X30;
+
+ if (Subtarget.isSVR4ABI() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ return PPC::R29;
+
+ return PPC::R30;
}
bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
@@ -868,7 +887,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -885,16 +907,6 @@ bool PPCRegisterInfo::
needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
assert(Offset < 0 && "Local offset must be negative");
- unsigned FIOperandNum = 0;
- while (!MI->getOperand(FIOperandNum).isFI()) {
- ++FIOperandNum;
- assert(FIOperandNum < MI->getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
- Offset += MI->getOperand(OffsetOperandNo).getImm();
-
// It's the load/store FI references that cause issues, as it can be difficult
// to materialize the offset if it won't fit in the literal field. Estimate
// based on the size of the local frame and some conservative assumptions
@@ -916,8 +928,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- const PPCFrameLowering *PPCFI =
- static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
unsigned StackEst =
PPCFI->determineFrameLayout(MF, false, true);
@@ -951,7 +963,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
const MachineFunction &MF = *MBB->getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
@@ -976,7 +988,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const MCInstrDesc &MCID = MI.getDesc();
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.constrainRegClass(BaseReg,
@@ -985,6 +997,16 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
int64_t Offset) const {
+ unsigned FIOperandNum = 0;
+ while (!MI->getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+ Offset += MI->getOperand(OffsetOperandNo).getImm();
+
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 13a35f6..c182f95 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPC32_REGISTERINFO_H
-#define POWERPC32_REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
#include "PPC.h"
#include "llvm/ADT/DenseMap.h"
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
deleted file mode 100644
index 0b392f9..0000000
--- a/lib/Target/PowerPC/PPCRelocations.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC 32-bit target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCRELOCATIONS_H
-#define PPCRELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-// Hack to rid us of a PPC pre-processor symbol which is erroneously
-// defined in a PowerPC header file (bug in Linux/PPC)
-#ifdef PPC
-#undef PPC
-#endif
-
-namespace llvm {
- namespace PPC {
- enum RelocationType {
- // reloc_vanilla - A standard relocation, where the address of the
- // relocated object completely overwrites the address of the relocation.
- reloc_vanilla,
-
- // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
- reloc_pcrel_bx,
-
- // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
- // and other bcx instructions.
- reloc_pcrel_bcx,
-
- // reloc_absolute_high - Absolute relocation, for the loadhi instruction
- // (which is really addis). Add the high 16-bits of the specified global
- // address into the low 16-bits of the instruction.
- reloc_absolute_high,
-
- // reloc_absolute_low - Absolute relocation, for the la instruction (which
- // is really an addi). Add the low 16-bits of the specified global
- // address into the low 16-bits of the instruction.
- reloc_absolute_low,
-
- // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
- // instruction which have two implicit zero bits.
- reloc_absolute_low_ix
- };
- }
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 1221d41..7f80121 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -106,6 +106,7 @@ def IIC_SprSLBIE : InstrItinClass;
def IIC_SprSLBMTE : InstrItinClass;
def IIC_SprSLBMFEE : InstrItinClass;
def IIC_SprSLBIA : InstrItinClass;
+def IIC_SprTLBIA : InstrItinClass;
def IIC_SprTLBIEL : InstrItinClass;
def IIC_SprTLBIE : InstrItinClass;
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index b2e7f3b..2c1378d 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPCCSELECTIONDAGINFO_H
-#define POWERPCCSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 2e1b74a..04e7ec6 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -33,13 +33,12 @@ using namespace llvm;
#include "PPCGenSubtargetInfo.inc"
/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
- const Triple &T = ST.getTargetTriple();
-
+static std::string getDataLayoutString(const Triple &T) {
+ bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
std::string Ret;
// Most PPC* platforms are big endian, PPC64LE is little endian.
- if (ST.isLittleEndian())
+ if (T.getArch() == Triple::ppc64le)
Ret = "e";
else
Ret = "E";
@@ -48,18 +47,18 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
// PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
// pointers.
- if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ if (!is64Bit || T.getOS() == Triple::Lv2)
Ret += "-p:32:32";
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
- if (ST.isPPC64() || ST.isSVR4ABI())
+ if (is64Bit || !T.isOSDarwin())
Ret += "-i64:64";
else
Ret += "-f64:32:64";
// PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
- if (ST.isPPC64())
+ if (is64Bit)
Ret += "-n32:64";
else
Ret += "-n32";
@@ -70,47 +69,20 @@ static std::string getDataLayoutString(const PPCSubtarget &ST) {
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, FS);
return *this;
}
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, PPCTargetMachine &TM,
- bool is64Bit, CodeGenOpt::Level OptLevel)
- : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
- OptLevel(OptLevel),
- FrameLowering(initializeSubtargetDependencies(CPU, FS)),
- DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ const std::string &FS, const PPCTargetMachine &TM)
+ : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT),
+ DL(getDataLayoutString(TargetTriple)),
+ IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le),
+ TargetABI(PPC_ABI_UNKNOWN),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this),
TLInfo(TM), TSInfo(&DL) {}
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void PPCSubtarget::SetJITMode() {
- // JIT mode doesn't want lazy resolver stubs, it knows exactly where
- // everything is. This matters for PPC64, which codegens in PIC mode without
- // stubs.
- HasLazyResolverStubs = false;
-
- // Calls to external functions need to use indirect calls
- IsJITCodeModel = true;
-}
-
-void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-cpu");
- Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-features");
- std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
- std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty()) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
- }
-}
-
void PPCSubtarget::initializeEnvironment() {
StackAlignment = 16;
DarwinDirective = PPC::DIR_NONE;
@@ -119,8 +91,10 @@ void PPCSubtarget::initializeEnvironment() {
Use64BitRegs = false;
UseCRBits = false;
HasAltivec = false;
+ HasSPE = false;
HasQPX = false;
HasVSX = false;
+ HasP8Vector = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -136,13 +110,16 @@ void PPCSubtarget::initializeEnvironment() {
HasPOPCNTD = false;
HasLDBRX = false;
IsBookE = false;
+ HasOnlyMSYNC = false;
+ IsPPC4xx = false;
+ IsPPC6xx = false;
+ IsE500 = false;
DeprecatedMFTB = false;
DeprecatedDST = false;
HasLazyResolverStubs = false;
- IsJITCodeModel = false;
}
-void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
if (CPUName.empty())
@@ -156,35 +133,13 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
- // Make sure 64-bit features are available when CPUname is generic
- std::string FullFS = FS;
-
- // If we are generating code for ppc64, verify that options make sense.
- if (IsPPC64) {
- Has64BitSupport = true;
- // Silently force 64-bit register use on ppc64.
- Use64BitRegs = true;
- if (!FullFS.empty())
- FullFS = "+64bit," + FullFS;
- else
- FullFS = "+64bit";
- }
-
- // At -O2 and above, track CR bits as individual registers.
- if (OptLevel >= CodeGenOpt::Default) {
- if (!FullFS.empty())
- FullFS = "+crbits," + FullFS;
- else
- FullFS = "+crbits";
- }
-
// Parse features string.
- ParseSubtargetFeatures(CPUName, FullFS);
+ ParseSubtargetFeatures(CPUName, FS);
// If the user requested use of 64-bit regs, but the cpu selected doesn't
// support it, ignore.
- if (use64BitRegs() && !has64BitSupport())
- Use64BitRegs = false;
+ if (IsPPC64 && has64BitSupport())
+ Use64BitRegs = true;
// Set up darwin-specific properties.
if (isDarwin())
@@ -201,8 +156,20 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// FIXME: For now, we disable VSX in little-endian mode until endian
// issues in those instructions can be addressed.
- if (IsLittleEndian)
+ if (IsLittleEndian) {
HasVSX = false;
+ HasP8Vector = false;
+ }
+
+ // Determine default ABI.
+ if (TargetABI == PPC_ABI_UNKNOWN) {
+ if (!isDarwin() && IsPPC64) {
+ if (IsLittleEndian)
+ TargetABI = PPC_ABI_ELFv2;
+ else
+ TargetABI = PPC_ABI_ELFv1;
+ }
+ }
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -213,31 +180,13 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
// We never have stubs if HasLazyResolverStubs=false or if in static mode.
if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
return false;
- // If symbol visibility is hidden, the extra load is not needed if
- // the symbol is definitely defined in the current translation unit.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ bool isDecl = GV->isDeclaration();
if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
return false;
return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage() || isDecl;
}
-bool PPCSubtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_ALL;
-
- CriticalPathRCs.clear();
-
- if (isPPC64())
- CriticalPathRCs.push_back(&PPC::G8RCRegClass);
- else
- CriticalPathRCs.push_back(&PPC::GPRCRegClass);
-
- return OptLevel >= CodeGenOpt::Default;
-}
-
// Embedded cores need aggressive scheduling (and some others also benefit).
static bool needsAggressiveScheduling(unsigned Directive) {
switch (Directive) {
@@ -259,6 +208,19 @@ bool PPCSubtarget::enableMachineScheduler() const {
return needsAggressiveScheduling(DarwinDirective);
}
+// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
+
+PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
+ return TargetSubtargetInfo::ANTIDEP_ALL;
+}
+
+void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(isPPC64() ?
+ &PPC::G8RCRegClass : &PPC::GPRCRegClass);
+}
+
void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 2a16699..1df19c3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef POWERPCSUBTARGET_H
-#define POWERPCSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
+#define LLVM_LIB_TARGET_POWERPC_PPCSUBTARGET_H
#include "PPCFrameLowering.h"
#include "PPCInstrInfo.h"
#include "PPCISelLowering.h"
-#include "PPCJITInfo.h"
#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
@@ -66,6 +65,12 @@ class TargetMachine;
class PPCSubtarget : public PPCGenSubtargetInfo {
protected:
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ // Calculates type size & alignment
+ const DataLayout DL;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned StackAlignment;
@@ -83,8 +88,10 @@ protected:
bool UseCRBits;
bool IsPPC64;
bool HasAltivec;
+ bool HasSPE;
bool HasQPX;
bool HasVSX;
+ bool HasP8Vector;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -97,22 +104,23 @@ protected:
bool HasPOPCNTD;
bool HasLDBRX;
bool IsBookE;
+ bool HasOnlyMSYNC;
+ bool IsE500;
+ bool IsPPC4xx;
+ bool IsPPC6xx;
bool DeprecatedMFTB;
bool DeprecatedDST;
bool HasLazyResolverStubs;
- bool IsJITCodeModel;
bool IsLittleEndian;
- /// TargetTriple - What processor and OS we're targeting.
- Triple TargetTriple;
-
- /// OptLevel - What default optimization level we're emitting code for.
- CodeGenOpt::Level OptLevel;
+ enum {
+ PPC_ABI_UNKNOWN,
+ PPC_ABI_ELFv1,
+ PPC_ABI_ELFv2
+ } TargetABI;
PPCFrameLowering FrameLowering;
- const DataLayout DL;
PPCInstrInfo InstrInfo;
- PPCJITInfo JITInfo;
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
@@ -121,17 +129,12 @@ public:
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
- CodeGenOpt::Level OptLevel);
+ const std::string &FS, const PPCTargetMachine &TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// SetJITMode - This is called to inform the subtarget info that we are
- /// producing code for the JIT.
- void SetJITMode();
-
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
@@ -143,24 +146,32 @@ public:
/// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-
- const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const DataLayout *getDataLayout() const { return &DL; }
- const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
- PPCJITInfo *getJITInfo() { return &JITInfo; }
- const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
- const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ const PPCFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const PPCTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
- /// \brief Reset the features for the PowerPC target.
- void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
void initializeEnvironment();
- void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
@@ -186,9 +197,6 @@ public:
bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
- // isJITCodeModel - True if we're generating code for the JIT
- bool isJITCodeModel() const { return IsJITCodeModel; }
-
// isLittleEndian - True if generating little-endian code
bool isLittleEndian() const { return IsLittleEndian; }
@@ -205,13 +213,19 @@ public:
bool hasFPRND() const { return HasFPRND; }
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
+ bool hasSPE() const { return HasSPE; }
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
+ bool hasP8Vector() const { return HasP8Vector; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
+ bool hasOnlyMSYNC() const { return HasOnlyMSYNC; }
+ bool isPPC4xx() const { return IsPPC4xx; }
+ bool isPPC6xx() const { return IsPPC6xx; }
+ bool isE500() const { return IsE500; }
bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
@@ -222,18 +236,22 @@ public:
/// isBGQ - True if this is a BG/Q platform.
bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
-
- /// enablePostRAScheduler - True at 'More' optimization.
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const override;
+ bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
bool enableEarlyIfConversion() const override { return hasISEL(); }
// Scheduling customization.
bool enableMachineScheduler() const override;
+ // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ bool enablePostMachineScheduler() const override;
+ AntiDepBreakMode getAntiDepBreakMode() const override;
+ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9563b90..f15189c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
#include "PPC.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
@@ -37,15 +39,54 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
+static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, StringRef TT) {
+ std::string FullFS = FS;
+ Triple TargetTriple(TT);
+
+ // Make sure 64-bit features are available when CPUname is generic
+ if (TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le) {
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
+
+ if (OL >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+ return FullFS;
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
+ return make_unique<TargetLoweringObjectFileMachO>();
+
+ return make_unique<PPC64LinuxTargetObjectFile>();
+}
+
+// The FeatureString here is a little subtle. We are modifying the feature string
+// with what are (currently) non-function specific overrides as it goes into the
+// LLVMTargetMachine constructor and then using the stored value in the
+// Subtarget constructor below it.
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
+ CM, OL),
+ TLOF(createTLOF(Triple(getTargetTriple()))),
+ Subtarget(TT, CPU, TargetFS, *this) {
initAsmInfo();
}
+PPCTargetMachine::~PPCTargetMachine() {}
+
void PPC32TargetMachine::anchor() { }
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
@@ -53,7 +94,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
}
void PPC64TargetMachine::anchor() { }
@@ -63,9 +104,34 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
}
+const PPCSubtarget *
+PPCTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
@@ -86,6 +152,7 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addILPOpts() override;
bool addInstSelector() override;
@@ -99,6 +166,11 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
}
+void PPCPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getPPCTargetMachine()));
+ TargetPassConfig::addIRPasses();
+}
+
bool PPCPassConfig::addPreISel() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoops(getPPCTargetMachine()));
@@ -148,18 +220,6 @@ bool PPCPassConfig::addPreEmitPass() {
return false;
}
-bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
- // writing?
- Subtarget.SetJITMode();
-
- // Machine code emitter pass for PowerPC.
- PM.add(createPPCJITCodeEmitterPass(*this, JCE));
-
- return false;
-}
-
void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our PPC pass. This
// allows the PPC pass to delegate to the target independent layer when
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 4c7029c..5095d73 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPC_TARGETMACHINE_H
-#define PPC_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETMACHINE_H
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
@@ -24,46 +24,30 @@ namespace llvm {
/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
///
class PPCTargetMachine : public LLVMTargetMachine {
- PPCSubtarget Subtarget;
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ PPCSubtarget Subtarget;
+
+ mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64Bit);
+ CodeGenOpt::Level OL);
- const PPCInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const PPCFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
- const PPCTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const PPCRegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
+ ~PPCTargetMachine() override;
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
+ const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) override;
/// \brief Register PPC analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 3e71bbc..cd84da2 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
-#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 74b5f45..6493713 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef PPCTARGETSTREAMER_H
-#define PPCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
#include "llvm/MC/MCStreamer.h"
@@ -19,6 +19,8 @@ public:
virtual ~PPCTargetStreamer();
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
+ virtual void emitAbiVersion(int AbiVersion) = 0;
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0;
};
}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 007901b..37624ed 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -38,6 +38,7 @@ void initializePPCTTIPass(PassRegistry &);
namespace {
class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
+ const TargetMachine *TM;
const PPCSubtarget *ST;
const PPCTargetLowering *TLI;
@@ -47,16 +48,16 @@ public:
}
PPCTTI(const PPCTargetMachine *TM)
- : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializePPCTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
+ void initializePass() override {
pushTTIStack(this);
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -64,7 +65,7 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
@@ -79,33 +80,31 @@ public:
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const override;
- virtual PopcntSupportKind
- getPopcntSupport(unsigned TyWidth) const override;
- virtual void getUnrollingPreferences(
- Loop *L, UnrollingPreferences &UP) const override;
+ PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
+ void getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const override;
/// @}
/// \name Vector TTI Implementations
/// @{
- virtual unsigned getNumberOfRegisters(bool Vector) const override;
- virtual unsigned getRegisterBitWidth(bool Vector) const override;
- virtual unsigned getMaximumUnrollFactor() const override;
- virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind,
- OperandValueKind) const override;
- virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
- int Index, Type *SubTp) const override;
- virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override;
- virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const override;
- virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const override;
- virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const override;
+ unsigned getNumberOfRegisters(bool Vector) const override;
+ unsigned getRegisterBitWidth(bool Vector) const override;
+ unsigned getMaxInterleaveFactor() const override;
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
+ unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const override;
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const override;
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const override;
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const override;
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const override;
/// @}
};
@@ -271,8 +270,9 @@ unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
return PPCTTI::getIntImmCost(Imm, Ty);
}
-void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
- if (ST->getDarwinDirective() == PPC::DIR_A2) {
+void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const {
+ if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
UP.Partial = UP.Runtime = true;
@@ -297,7 +297,7 @@ unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
}
-unsigned PPCTTI::getMaximumUnrollFactor() const {
+unsigned PPCTTI::getMaxInterleaveFactor() const {
unsigned Directive = ST->getDarwinDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
@@ -318,14 +318,15 @@ unsigned PPCTTI::getMaximumUnrollFactor() const {
return 2;
}
-unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned PPCTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
- Op2Info);
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
}
unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 713fc4b..261075e 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_H
-#define AMDGPU_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPU_H
+#define LLVM_LIB_TARGET_R600_AMDGPU_H
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -39,6 +39,8 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSIShrinkInstructionsPass();
+FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRLiveRangesPass();
@@ -48,10 +50,14 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
+void initializeSILoadStoreOptimizerPass(PassRegistry &);
+extern char &SILoadStoreOptimizerID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+ModulePass *createAMDGPUAlwaysInlinePass();
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
ImmutablePass *
@@ -63,6 +69,14 @@ extern char &SIFixSGPRLiveRangesID;
extern Target TheAMDGPUTarget;
+namespace AMDGPU {
+enum TargetIndex {
+ TI_CONSTDATA_START
+};
+}
+
+#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
+
} // End namespace llvm
namespace ShaderType {
@@ -118,4 +132,4 @@ enum AddressSpaces {
} // namespace AMDGPUAS
-#endif // AMDGPU_H
+#endif
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 6ff9ab7..4cf1243 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -25,6 +25,11 @@ def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
"false",
"Disable IR Structurizer">;
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+ "EnablePromoteAlloca",
+ "true",
+ "Enable promote alloca pass">;
+
// Target features
def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
@@ -37,6 +42,20 @@ def FeatureFP64 : SubtargetFeature<"fp64",
"true",
"Enable double precision operations">;
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64Denormals",
+ "true",
+ "Enable double precision denormal handling",
+ [FeatureFP64]>;
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+ "FP32Denormals",
+ "true",
+ "Enable single precision denormal handling">;
+
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
"Is64bit",
"true",
@@ -62,6 +81,17 @@ def FeatureCFALUBug : SubtargetFeature<"cfalubug",
"true",
"GPU has CF_ALU bug">;
+// XXX - This should probably be removed once enabled by default
+def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
+ "EnableLoadStoreOpt",
+ "true",
+ "Enable SI load/store optimizer pass">;
+
+def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
+ "FlatAddressSpace",
+ "true",
+ "Support flat address space">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
@@ -111,19 +141,28 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
>;
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
- [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768]>;
+ [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
+ FeatureWavefrontSize64]>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
- [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536]>;
+ [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
let guessInstructionProperties = 1;
}
+def AMDGPUAsmParser : AsmParser {
+ // Some of the R600 registers have the same name, so this crashes.
+ // For example T0_XYZW and T0_XY both have the asm name T0.
+ let ShouldEmitMatchRegisterName = 0;
+}
+
def AMDGPU : Target {
// Pull in Instruction Info:
let InstructionSet = AMDGPUInstrInfo;
+ let AssemblyParsers = [AMDGPUAsmParser];
}
// Dummy Instruction itineraries for pseudo instructions
diff --git a/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
new file mode 100644
index 0000000..b545b45
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAlwaysInlinePass.cpp
@@ -0,0 +1,66 @@
+//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass marks all internal functions as always_inline and creates
+/// duplicates of all other functions a marks the duplicates as always_inline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAlwaysInline : public ModulePass {
+
+ static char ID;
+
+public:
+ AMDGPUAlwaysInline() : ModulePass(ID) { }
+ bool runOnModule(Module &M) override;
+ const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
+};
+
+} // End anonymous namespace
+
+char AMDGPUAlwaysInline::ID = 0;
+
+bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+
+ std::vector<Function*> FuncsToClone;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function &F = *I;
+ if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
+ FuncsToClone.push_back(&F);
+ }
+
+ for (Function *F : FuncsToClone) {
+ ValueToValueMapTy VMap;
+ Function *NewFunc = CloneFunction(F, VMap, false);
+ NewFunc->setLinkage(GlobalValue::InternalLinkage);
+ F->getParent()->getFunctionList().push_back(NewFunc);
+ F->replaceAllUsesWith(NewFunc);
+ }
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function &F = *I;
+ if (F.hasLocalLinkage()) {
+ F.addFnAttr(Attribute::AlwaysInline);
+ }
+ }
+ return false;
+}
+
+ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
+ return new AMDGPUAlwaysInline();
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index a6e217b..5511d7c 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -16,7 +16,6 @@
//===----------------------------------------------------------------------===//
//
-
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
@@ -26,6 +25,7 @@
#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -48,11 +48,28 @@ using namespace llvm;
// precision, and leaves single precision to flush all and does not report
// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
// CL_FP_DENORM for both.
-static uint32_t getFPMode(MachineFunction &) {
+//
+// FIXME: It seems some instructions do not support single precision denormals
+// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
+// and sin_f32, cos_f32 on most parts).
+
+// We want to use these instructions, and using fp32 denormals also causes
+// instructions to run at the double precision rate for the device so it's
+// probably best to just report no single precision denormals.
+static uint32_t getFPMode(const MachineFunction &F) {
+ const AMDGPUSubtarget& ST = F.getTarget().getSubtarget<AMDGPUSubtarget>();
+ // TODO: Is there any real use for the flush in only / flush out only modes?
+
+ uint32_t FP32Denormals =
+ ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
+ uint32_t FP64Denormals =
+ ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT;
+
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
- FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
- FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
+ FP_DENORM_MODE_SP(FP32Denormals) |
+ FP_DENORM_MODE_DP(FP64Denormals);
}
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
@@ -69,10 +86,24 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
}
+void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+
+ // This label is used to mark the end of the .text section.
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+ OutStreamer.SwitchSection(TLOF.getTextSection());
+ MCSymbol *EndOfTextLabel =
+ OutContext.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+ OutStreamer.EmitLabel(EndOfTextLabel);
+}
+
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+
+ // The starting address of all shader programs must be 256 bytes aligned.
+ MF.setAlignment(8);
+
SetupMachineFunction(MF);
- OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
+ EmitFunctionHeader();
MCContext &Context = getObjFileLowering().getContext();
const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
@@ -115,6 +146,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
false);
+ OutStreamer.emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
+ false);
} else {
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
OutStreamer.emitRawComment(
@@ -145,25 +178,21 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
+void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
unsigned MaxGPR = 0;
bool killPixel = false;
- const R600RegisterInfo * RI =
- static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ const R600RegisterInfo *RI = static_cast<const R600RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
if (MI.getOpcode() == AMDGPU::KILLGT)
killPixel = true;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- MachineOperand & MO = MI.getOperand(op_idx);
+ const MachineOperand &MO = MI.getOperand(op_idx);
if (!MO.isReg())
continue;
unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
@@ -179,7 +208,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
unsigned RsrcReg;
if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
// Evergreen / Northern Islands
- switch (MFI->ShaderType) {
+ switch (MFI->getShaderType()) {
default: // Fall through
case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
@@ -188,7 +217,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
}
} else {
// R600 / R700
- switch (MFI->ShaderType) {
+ switch (MFI->getShaderType()) {
default: // Fall through
case ShaderType::GEOMETRY: // Fall through
case ShaderType::COMPUTE: // Fall through
@@ -203,34 +232,30 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
- if (MFI->ShaderType == ShaderType::COMPUTE) {
+ if (MFI->getShaderType() == ShaderType::COMPUTE) {
OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
}
}
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
- MachineFunction &MF) const {
+ const MachineFunction &MF) const {
uint64_t CodeSize = 0;
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
- const SIRegisterInfo * RI =
- static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
+ bool FlatUsed = false;
+ const SIRegisterInfo *RI = static_cast<const SIRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
// TODO: CodeSize should account for multiple functions.
CodeSize += MI.getDesc().Size;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- MachineOperand &MO = MI.getOperand(op_idx);
+ const MachineOperand &MO = MI.getOperand(op_idx);
unsigned width = 0;
bool isSGPR = false;
@@ -242,6 +267,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
reg == AMDGPU::VCC_HI) {
VCCUsed = true;
continue;
+ } else if (reg == AMDGPU::FLAT_SCR ||
+ reg == AMDGPU::FLAT_SCR_LO ||
+ reg == AMDGPU::FLAT_SCR_HI) {
+ FlatUsed = true;
+ continue;
}
switch (reg) {
@@ -302,8 +332,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (VCCUsed)
MaxSGPR += 2;
- ProgInfo.NumVGPR = MaxVGPR;
- ProgInfo.NumSGPR = MaxSGPR;
+ if (FlatUsed)
+ MaxSGPR += 2;
+
+ // We found the maximum register index. They start at 0, so add one to get the
+ // number of registers.
+ ProgInfo.NumVGPR = MaxVGPR + 1;
+ ProgInfo.NumSGPR = MaxSGPR + 1;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
@@ -315,16 +350,21 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// Do not clamp NAN to 0.
ProgInfo.DX10Clamp = 0;
+ const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
+
+ ProgInfo.FlatUsed = FlatUsed;
+ ProgInfo.VCCUsed = VCCUsed;
ProgInfo.CodeLen = CodeSize;
}
-void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
+void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg;
- switch (MFI->ShaderType) {
+ switch (MFI->getShaderType()) {
default: // Fall through
case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
@@ -341,15 +381,31 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
LDSAlignShift = 9;
}
- unsigned LDSBlocks =
- RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+ unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
+ MFI->getMaximumWorkGroupSize(MF);
- if (MFI->ShaderType == ShaderType::COMPUTE) {
+ unsigned LDSBlocks =
+ RoundUpToAlignment(MFI->LDSSize + LDSSpillSize,
+ 1 << LDSAlignShift) >> LDSAlignShift;
+
+ // Scratch is allocated in 256 dword blocks.
+ unsigned ScratchAlignShift = 10;
+ // We need to program the hardware with the amount of scratch memory that
+ // is used by the entire wave. KernelInfo.ScratchSize is the amount of
+ // scratch memory used per thread.
+ unsigned ScratchBlocks =
+ RoundUpToAlignment(KernelInfo.ScratchSize * STM.getWavefrontSize(),
+ 1 << ScratchAlignShift) >> ScratchAlignShift;
+
+ unsigned VGPRBlocks = (KernelInfo.NumVGPR - 1) / 4;
+ unsigned SGPRBlocks = (KernelInfo.NumSGPR - 1) / 8;
+
+ if (MFI->getShaderType() == ShaderType::COMPUTE) {
OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
const uint32_t ComputePGMRSrc1 =
- S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
- S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
+ S_00B848_VGPRS(VGPRBlocks) |
+ S_00B848_SGPRS(SGPRBlocks) |
S_00B848_PRIORITY(KernelInfo.Priority) |
S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
S_00B848_PRIV(KernelInfo.Priv) |
@@ -360,14 +416,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
+ const uint32_t ComputePGMRSrc2 =
+ S_00B84C_LDS_SIZE(LDSBlocks) |
+ S_00B02C_SCRATCH_EN(ScratchBlocks > 0);
+
+ OutStreamer.EmitIntValue(ComputePGMRSrc2, 4);
+
+ OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
+ OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
+
+ // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
+ // 0" comment but I don't see a corresponding field in the register spec.
} else {
OutStreamer.EmitIntValue(RsrcReg, 4);
- OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
- S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
+ OutStreamer.EmitIntValue(S_00B028_VGPRS(VGPRBlocks) |
+ S_00B028_SGPRS(SGPRBlocks), 4);
}
- if (MFI->ShaderType == ShaderType::PIXEL) {
+ if (MFI->getShaderType() == ShaderType::PIXEL) {
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index c1acb6e..b9a0767 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -12,11 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_ASMPRINTER_H
-#define AMDGPU_ASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
+#define LLVM_LIB_TARGET_R600_AMDGPUASMPRINTER_H
#include "llvm/CodeGen/AsmPrinter.h"
-#include <string>
#include <vector>
namespace llvm {
@@ -33,6 +32,9 @@ private:
DX10Clamp(0),
DebugMode(0),
IEEEMode(0),
+ ScratchSize(0),
+ FlatUsed(false),
+ VCCUsed(false),
CodeLen(0) {}
// Fields set in PGM_RSRC1 pm4 packet.
@@ -44,20 +46,24 @@ private:
uint32_t DX10Clamp;
uint32_t DebugMode;
uint32_t IEEEMode;
+ uint32_t ScratchSize;
+
+ bool FlatUsed;
// Bonus information for debugging.
+ bool VCCUsed;
uint64_t CodeLen;
};
- void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const;
- void findNumUsedRegistersSI(MachineFunction &MF,
+ void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
+ void findNumUsedRegistersSI(const MachineFunction &MF,
unsigned &NumSGPR,
unsigned &NumVGPR) const;
/// \brief Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
- void EmitProgramInfoR600(MachineFunction &MF);
- void EmitProgramInfoSI(MachineFunction &MF, const SIProgramInfo &KernelInfo);
+ void EmitProgramInfoR600(const MachineFunction &MF);
+ void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
@@ -71,6 +77,8 @@ public:
/// Implemented in AMDGPUMCInstLower.cpp
void EmitInstruction(const MachineInstr *MI) override;
+ void EmitEndOfAsmFile(Module &M) override;
+
protected:
bool DisasmEnabled;
std::vector<std::string> DisasmLines, HexLines;
@@ -79,4 +87,4 @@ protected:
} // End anonymous llvm
-#endif //AMDGPU_ASMPRINTER_H
+#endif
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
index 5f8ad8c..6ffa7a0 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -59,16 +59,24 @@ def CC_AMDGPU_Kernel : CallingConv<[
]>;
def CC_AMDGPU : CallingConv<[
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() >= "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
- "ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() < "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
- "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
- "ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
- ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>,
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
- ".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_R600>>
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() >="
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+ "State.getMachineFunction().getInfo<SIMachineFunctionInfo>()"
+ "->getShaderType() == ShaderType::COMPUTE",
+ CCDelegateTo<CC_AMDGPU_Kernel>>,
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
+ "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()"
+ "->getShaderType() == ShaderType::COMPUTE",
+ CCDelegateTo<CC_AMDGPU_Kernel>>,
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+ CCDelegateTo<CC_SI>>,
+ CCIf<"static_cast<const AMDGPUSubtarget&>"
+ "(State.getMachineFunction().getSubtarget()).getGeneration() < "
+ "AMDGPUSubtarget::SOUTHERN_ISLANDS",
+ CCDelegateTo<CC_R600>>
]>;
diff --git a/lib/Target/R600/AMDGPUFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h
index d18ede5..15a6636 100644
--- a/lib/Target/R600/AMDGPUFrameLowering.h
+++ b/lib/Target/R600/AMDGPUFrameLowering.h
@@ -12,8 +12,8 @@
/// machine.
//
//===----------------------------------------------------------------------===//
-#ifndef AMDILFRAME_LOWERING_H
-#define AMDILFRAME_LOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -42,4 +42,4 @@ public:
bool hasFP(const MachineFunction &MF) const override;
};
} // namespace llvm
-#endif // AMDILFRAME_LOWERING_H
+#endif
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index b4d79e5..90b6672 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -16,9 +16,13 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
+#include "SIDefines.h"
#include "SIISelLowering.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Function.h"
@@ -61,6 +65,7 @@ private:
static bool checkPrivateAddress(const MachineMemOperand *Op);
static bool isGlobalStore(const StoreSDNode *N);
+ static bool isFlatStore(const StoreSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
static bool isLocalStore(const StoreSDNode *N);
static bool isRegionStore(const StoreSDNode *N);
@@ -68,24 +73,46 @@ private:
bool isCPLoad(const LoadSDNode *N) const;
bool isConstantLoad(const LoadSDNode *N, int cbID) const;
bool isGlobalLoad(const LoadSDNode *N) const;
+ bool isFlatLoad(const LoadSDNode *N) const;
bool isParamLoad(const LoadSDNode *N) const;
bool isPrivateLoad(const LoadSDNode *N) const;
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
- /// \returns True if the current basic block being selected is at control
- /// flow depth 0. Meaning that the current block dominates the
- // exit block.
- bool isCFDepth0() const;
-
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
- SDValue &ImmOffset) const;
+ bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+ unsigned OffsetBits) const;
+ bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+ bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+ SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
+ SDValue &TFE) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const;
+ bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &ImmOffset) const;
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
+ SDValue &Offset, SDValue &GLC, SDValue &SLC,
+ SDValue &TFE) const;
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset, SDValue &GLC) const;
+ SDNode *SelectAddrSpaceCast(SDNode *N);
+ bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Omod) const;
SDNode *SelectADD_SUB_I64(SDNode *N);
SDNode *SelectDIV_SCALE(SDNode *N);
@@ -125,7 +152,8 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
switch (N->getMachineOpcode()) {
default: {
- const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
+ const MCInstrDesc &Desc =
+ TM.getSubtargetImpl()->getInstrInfo()->get(N->getMachineOpcode());
unsigned OpIdx = Desc.getNumDefs() + OpNo;
if (OpIdx >= Desc.getNumOperands())
return nullptr;
@@ -133,15 +161,17 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
if (RegClass == -1)
return nullptr;
- return TM.getRegisterInfo()->getRegClass(RegClass);
+ return TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RegClass);
}
case AMDGPU::REG_SEQUENCE: {
unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID);
+ const TargetRegisterClass *SuperRC =
+ TM.getSubtargetImpl()->getRegisterInfo()->getRegClass(RCID);
SDValue SubRegOp = N->getOperand(OpNo + 1);
unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
- return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
+ return TM.getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
+ SuperRC, SubRegIdx);
}
}
}
@@ -229,10 +259,10 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
unsigned RegClassID;
- const AMDGPURegisterInfo *TRI =
- static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
- const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ const AMDGPURegisterInfo *TRI = static_cast<const AMDGPURegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
@@ -460,7 +490,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
case AMDGPUISD::DIV_SCALE: {
return SelectDIV_SCALE(N);
}
+ case ISD::CopyToReg: {
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+ Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+ break;
+ }
+ case ISD::ADDRSPACECAST:
+ return SelectAddrSpaceCast(N);
}
+
return SelectCode(N);
}
@@ -498,6 +537,10 @@ bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
+bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}
@@ -529,6 +572,10 @@ bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
+bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
+}
+
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}
@@ -558,23 +605,16 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
const Value *MemVal = N->getMemOperand()->getValue();
if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
!checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
- !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
+ !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
return true;
}
return false;
}
-bool AMDGPUDAGToDAGISel::isCFDepth0() const {
- // FIXME: Figure out a way to use DominatorTree analysis here.
- const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
- const Function *Fn = FuncInfo->Fn;
- return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
-}
-
-
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
@@ -677,14 +717,9 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
- unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
+ unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
- if (!isCFDepth0()) {
- Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
- CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
- }
-
SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
SDValue Carry(AddLo, 1);
SDNode *AddHi
@@ -711,71 +746,401 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
-
+ const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
SDValue Ops[] = {
- N->getOperand(0),
- N->getOperand(1),
- N->getOperand(2),
- Zero,
- Zero,
- Zero,
- Zero
+ Zero, // src0_modifiers
+ N->getOperand(0), // src0
+ Zero, // src1_modifiers
+ N->getOperand(1), // src1
+ Zero, // src2_modifiers
+ N->getOperand(2), // src2
+ False, // clamp
+ Zero // omod
};
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}
-static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
- return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
- Ptr), 0);
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+ unsigned OffsetBits) const {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
+ (OffsetBits == 8 && !isUInt<8>(Offset)))
+ return false;
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return CurDAG->SignBitIsZero(Base);
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
+ // (add n0, c0)
+ Base = N0;
+ Offset = N1;
+ return true;
+ }
+ }
+
+ // If we have a constant address, prefer to put the constant into the
+ // offset. This can save moves to load the constant address since multiple
+ // operations can share the zero base address register, and enables merging
+ // into read2 / write2 instructions.
+ if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ if (isUInt<16>(CAddr->getZExtValue())) {
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ SDLoc(Addr), MVT::i32, Zero);
+ Base = SDValue(MovZero, 0);
+ Offset = Addr;
+ return true;
+ }
+ }
+
+ // default case
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
+ SDValue &Offset0,
+ SDValue &Offset1) const {
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ unsigned DWordOffset0 = C1->getZExtValue() / 4;
+ unsigned DWordOffset1 = DWordOffset0 + 1;
+ // (add n0, c0)
+ if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
+ Base = N0;
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ return true;
+ }
+ }
+
+ if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
+ unsigned DWordOffset1 = DWordOffset0 + 1;
+ assert(4 * DWordOffset0 == CAddr->getZExtValue());
+
+ if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ MachineSDNode *MovZero
+ = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ SDLoc(Addr), MVT::i32, Zero);
+ Base = SDValue(MovZero, 0);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ return true;
+ }
+ }
+
+ // default case
+ Base = Addr;
+ Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(1, MVT::i8);
+ return true;
+}
+
+static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
+ return isUInt<12>(Imm->getZExtValue());
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
- SDValue &Offset,
- SDValue &ImmOffset) const {
+void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &Offset, SDValue &Offen,
+ SDValue &Idxen, SDValue &Addr64,
+ SDValue &GLC, SDValue &SLC,
+ SDValue &TFE) const {
SDLoc DL(Addr);
+ GLC = CurDAG->getTargetConstant(0, MVT::i1);
+ SLC = CurDAG->getTargetConstant(0, MVT::i1);
+ TFE = CurDAG->getTargetConstant(0, MVT::i1);
+
+ Idxen = CurDAG->getTargetConstant(0, MVT::i1);
+ Offen = CurDAG->getTargetConstant(0, MVT::i1);
+ Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
+ SOffset = CurDAG->getTargetConstant(0, MVT::i32);
+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
- if (isUInt<12>(C1->getZExtValue())) {
+ if (isLegalMUBUFImmOffset(C1)) {
if (N0.getOpcode() == ISD::ADD) {
- // (add (add N2, N3), C1)
+ // (add (add N2, N3), C1) -> addr64
SDValue N2 = N0.getOperand(0);
SDValue N3 = N0.getOperand(1);
- Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
- Offset = N3;
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
- return true;
+ Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
+ Ptr = N2;
+ VAddr = N3;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return;
}
- // (add N0, C1)
- Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
- Offset = N0;
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
- return true;
+ // (add N0, C1) -> offset
+ VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ Ptr = N0;
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return;
}
}
if (Addr.getOpcode() == ISD::ADD) {
- // (add N0, N1)
+ // (add N0, N1) -> addr64
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
- Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
- Offset = N1;
- ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+ Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
+ Ptr = N0;
+ VAddr = N1;
+ Offset = CurDAG->getTargetConstant(0, MVT::i16);
+ return;
+ }
+
+ // default case -> offset
+ VAddr = CurDAG->getTargetConstant(0, MVT::i32);
+ Ptr = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i16);
+
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr,
+ SDValue &Offset) const {
+ SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
+
+ SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE);
+
+ ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
+ if (C->getSExtValue()) {
+ SDLoc DL(Addr);
+
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
return true;
}
- // default case
- Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
- Offset = Addr;
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
+ SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const {
+ SLC = CurDAG->getTargetConstant(0, MVT::i1);
+
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &ImmOffset) const {
+
+ SDLoc DL(Addr);
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ unsigned ScratchPtrReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
+ unsigned ScratchOffsetReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+ Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
+ ScratchOffsetReg, MVT::i32);
+
+ SDValue ScratchPtr =
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
+ Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
+ SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
+
+ // (add n0, c1)
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+
+ if (isLegalMUBUFImmOffset(C1)) {
+ VAddr = Addr.getOperand(0);
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return true;
+ }
+ }
+
+ // (add FI, n0)
+ if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ isa<FrameIndexSDNode>(Addr.getOperand(0))) {
+ VAddr = Addr.getOperand(1);
+ ImmOffset = Addr.getOperand(0);
+ return true;
+ }
+
+ // (FI)
+ if (isa<FrameIndexSDNode>(Addr)) {
+ VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
+ CurDAG->getConstant(0, MVT::i32)), 0);
+ ImmOffset = Addr;
+ return true;
+ }
+
+ // (node)
+ VAddr = Addr;
ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
return true;
}
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+ SDValue &SOffset, SDValue &Offset,
+ SDValue &GLC, SDValue &SLC,
+ SDValue &TFE) const {
+ SDValue Ptr, VAddr, Offen, Idxen, Addr64;
+
+ SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE);
+
+ if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
+ !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
+ !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
+ uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
+ APInt::getAllOnesValue(32).getZExtValue(); // Size
+ SDLoc DL(Addr);
+
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset,
+ SDValue &GLC) const {
+ SDValue SLC, TFE;
+
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+}
+
+// FIXME: This is incorrect and only enough to be able to compile.
+SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
+ AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
+ SDLoc DL(N);
+
+ assert(Subtarget.hasFlatAddressSpace() &&
+ "addrspacecast only supported with flat address space!");
+
+ assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+ ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
+ "Cannot cast address space to / from constant address!");
+
+ assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
+ ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
+ "Can only cast to / from flat address space!");
+
+ // The flat instructions read the address as the index of the VGPR holding the
+ // address, so casting should just be reinterpreting the base VGPR, so just
+ // insert trunc / bitcast / zext.
+
+ SDValue Src = ASC->getOperand(0);
+ EVT DestVT = ASC->getValueType(0);
+ EVT SrcVT = Src.getValueType();
+
+ unsigned SrcSize = SrcVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+
+ if (SrcSize > DestSize) {
+ assert(SrcSize == 64 && DestSize == 32);
+ return CurDAG->getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG,
+ DL,
+ DestVT,
+ Src,
+ CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32));
+ }
+
+
+ if (DestSize > SrcSize) {
+ assert(SrcSize == 32 && DestSize == 64);
+
+ SDValue RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
+
+ const SDValue Ops[] = {
+ RC,
+ Src,
+ CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
+ CurDAG->getConstant(0, MVT::i32)), 0),
+ CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ };
+
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ SDLoc(N), N->getValueType(0), Ops);
+ }
+
+ assert(SrcSize == 64 && DestSize == 64);
+ return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+
+ unsigned Mods = 0;
+
+ Src = In;
+
+ if (Src.getOpcode() == ISD::FNEG) {
+ Mods |= SISrcMods::NEG;
+ Src = Src.getOperand(0);
+ }
+
+ if (Src.getOpcode() == ISD::FABS) {
+ Mods |= SISrcMods::ABS;
+ Src = Src.getOperand(0);
+ }
+
+ SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods, SDValue &Clamp,
+ SDValue &Omod) const {
+ // FIXME: Handle Clamp and Omod
+ Clamp = CurDAG->getTargetConstant(0, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, MVT::i32);
+
+ return SelectVOP3Mods(In, Src, SrcMods);
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
+ SDValue &SrcMods,
+ SDValue &Omod) const {
+ // FIXME: Handle Omod
+ Omod = CurDAG->getTargetConstant(0, MVT::i32);
+
+ return SelectVOP3Mods(In, Src, SrcMods);
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 0ada7a3..2f95b74 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -21,7 +21,6 @@
#include "AMDGPUSubtarget.h"
#include "R600MachineFunctionInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -104,7 +103,7 @@ EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
}
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
- TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ TargetLowering(TM) {
Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
@@ -131,6 +130,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FREM, MVT::f32, Custom);
+ setOperationAction(ISD::FREM, MVT::f64, Custom);
+
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -242,6 +244,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
}
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
setOperationAction(ISD::SREM, VT, Expand);
@@ -271,15 +279,23 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ROTL, MVT::i64, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i64, Expand);
setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ if (!Subtarget->hasFFBH())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+
+ if (!Subtarget->hasFFBL())
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+
static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
};
@@ -300,7 +316,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SUB, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
- // TODO: Implement custom UREM / SREM routines.
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
@@ -332,12 +347,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
for (MVT VT : FloatVectorTypes) {
setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FMINNUM, VT, Expand);
+ setOperationAction(ISD::FMAXNUM, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -360,21 +378,25 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::STORE);
setSchedulingPreference(Sched::RegPressure);
setJumpIsExpensive(true);
+ // SI at least has hardware support for floating point exceptions, but no way
+ // of using or handling them is implemented. They are also optional in OpenCL
+ // (Section 7.3)
+ setHasFloatingPointExceptions(false);
+
setSelectIsExpensive(false);
PredictableSelectIsExpensive = false;
// There are no integer divide instructions, and these expand to a pretty
// large sequence of instructions.
setIntDivIsCheap(false);
- setPow2DivIsCheap(false);
-
- // TODO: Investigate this when 64-bit divides are implemented.
- addBypassSlowDiv(64, 32);
+ setPow2SDivIsCheap(false);
// FIXME: Need to really handle these.
MaxStoresPerMemcpy = 4096;
@@ -426,12 +448,12 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
assert(VT.isFloatingPoint());
- return VT == MVT::f32;
+ return VT == MVT::f32 || VT == MVT::f64;
}
bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
assert(VT.isFloatingPoint());
- return VT == MVT::f32;
+ return VT == MVT::f32 || VT == MVT::f64;
}
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
@@ -531,16 +553,18 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
+ case ISD::FREM: return LowerFREM(Op, DAG);
case ISD::FCEIL: return LowerFCEIL(Op, DAG);
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
case ISD::FRINT: return LowerFRINT(Op, DAG);
case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
}
return Op;
}
@@ -595,7 +619,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
const SDValue &InitPtr,
SDValue Chain,
SelectionDAG &DAG) const {
- const DataLayout *TD = getTargetMachine().getDataLayout();
+ const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
SDLoc DL(InitPtr);
Type *InitTy = Init->getType();
@@ -668,22 +692,35 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
llvm_unreachable("Unhandled constant initializer");
}
+static bool hasDefinedInitializer(const GlobalValue *GV) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar || !GVar->hasInitializer())
+ return false;
+
+ if (isa<UndefValue>(GVar->getInitializer()))
+ return false;
+
+ return true;
+}
+
SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const {
- const DataLayout *TD = getTargetMachine().getDataLayout();
+ const DataLayout *TD = getTargetMachine().getSubtargetImpl()->getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
switch (G->getAddressSpace()) {
- default: llvm_unreachable("Global Address lowering not implemented for this "
- "address space");
case AMDGPUAS::LOCAL_ADDRESS: {
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
+ // TODO: We could emit code to handle the initialization somewhere.
+ if (hasDefinedInitializer(GV))
+ break;
+
unsigned Offset;
if (MFI->LocalMemoryObjects.count(GV) == 0) {
uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
@@ -695,7 +732,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
Offset = MFI->LocalMemoryObjects[GV];
}
- return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
+ return DAG.getConstant(Offset, getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
}
case AMDGPUAS::CONSTANT_ADDRESS: {
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
@@ -737,6 +774,12 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
}
}
+
+ const Function &Fn = *DAG.getMachineFunction().getFunction();
+ DiagnosticInfoUnsupported BadInit(Fn,
+ "initializer for address space");
+ DAG.getContext()->diagnose(BadInit);
+ return SDValue();
}
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
@@ -767,8 +810,8 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ getTargetMachine().getSubtargetImpl()->getFrameLowering());
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
@@ -810,13 +853,21 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// first parameter must be the same as the first instruction.
SDValue Numerator = Op.getOperand(1);
SDValue Denominator = Op.getOperand(2);
+
+ // Note this order is opposite of the machine instruction's operations,
+ // which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
+ // intrinsic has the numerator as the first operand to match a normal
+ // division operation.
+
SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
- return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
- Src0, Denominator, Numerator);
+ return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
+ Denominator, Numerator);
}
case Intrinsic::AMDGPU_div_fmas:
+ // FIXME: Dropping bool parameter. Work is needed to support the implicit
+ // read from VCC.
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
@@ -840,6 +891,10 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::AMDGPU_rsq_clamped:
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+ case Intrinsic::AMDGPU_ldexp:
+ return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+
case AMDGPUIntrinsic::AMDGPU_imax:
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -945,21 +1000,16 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
}
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
- SelectionDAG &DAG) const {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue True = N->getOperand(2);
- SDValue False = N->getOperand(3);
- SDValue CC = N->getOperand(4);
-
- if (VT != MVT::f32 ||
- !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ SelectionDAG &DAG) const {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
- }
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
@@ -975,15 +1025,19 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
case ISD::SETTRUE2:
case ISD::SETUO:
case ISD::SETO:
- llvm_unreachable("Operation should already be optimised!");
+ break;
case ISD::SETULE:
case ISD::SETULT:
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ // We need to permute the operands to get the correct NaN behavior. The
+ // selected operand is the second one based on the failing compare with NaN,
+ // so permute it based on the compare type the hardware uses.
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE:
@@ -991,8 +1045,9 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
case ISD::SETOGE:
case ISD::SETUGT:
case ISD::SETOGT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
}
case ISD::SETCC_INVALID:
llvm_unreachable("Invalid setcc condcode!");
@@ -1000,12 +1055,53 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
return SDValue();
}
-SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
- SelectionDAG &DAG) const {
- LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
- EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
+/// \brief Generate Min/Max node
+SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ SelectionDAG &DAG) const {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCOpcode) {
+ case ISD::SETULE:
+ case ISD::SETULT: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ case ISD::SETLE:
+ case ISD::SETLT: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ case ISD::SETGT:
+ case ISD::SETGE: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ case ISD::SETUGE:
+ case ISD::SETUGT: {
+ unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
+ default:
+ return SDValue();
+ }
+}
+
+SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
+ SelectionDAG &DAG) const {
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+ EVT MemVT = Load->getMemoryVT();
+ EVT MemEltVT = MemVT.getVectorElementType();
+
EVT LoadVT = Op.getValueType();
- EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT EltVT = LoadVT.getVectorElementType();
EVT PtrVT = Load->getBasePtr().getValueType();
unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
@@ -1013,17 +1109,19 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
SmallVector<SDValue, 8> Chains;
SDLoc SL(Op);
+ unsigned MemEltSize = MemEltVT.getStoreSize();
+ MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
- for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ for (unsigned i = 0; i < NumElts; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
- DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
+ DAG.getConstant(i * MemEltSize, PtrVT));
SDValue NewLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
Load->getChain(), Ptr,
- MachinePointerInfo(Load->getMemOperand()->getValue()),
+ SrcValue.getWithOffset(i * MemEltSize),
MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->getAlignment());
+ Load->isInvariant(), Load->getAlignment());
Loads.push_back(NewLoad.getValue(0));
Chains.push_back(NewLoad.getValue(1));
}
@@ -1036,6 +1134,55 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
return DAG.getMergeValues(Ops, SL);
}
+SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // If this is a 2 element vector, we really want to scalarize and not create
+ // weird 1 element vectors.
+ if (VT.getVectorNumElements() == 2)
+ return ScalarizeVectorLoad(Op, DAG);
+
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+ SDValue BasePtr = Load->getBasePtr();
+ EVT PtrVT = BasePtr.getValueType();
+ EVT MemVT = Load->getMemoryVT();
+ SDLoc SL(Op);
+ MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
+
+ EVT LoVT, HiVT;
+ EVT LoMemVT, HiMemVT;
+ SDValue Lo, Hi;
+
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+ SDValue LoLoad
+ = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
+ Load->getChain(), BasePtr,
+ SrcValue,
+ LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->isInvariant(), Load->getAlignment());
+
+ SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+
+ SDValue HiLoad
+ = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
+ Load->getChain(), HiPtr,
+ SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+ HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->isInvariant(), Load->getAlignment());
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
+ DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
+ LoLoad.getValue(1), HiLoad.getValue(1))
+ };
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -1094,8 +1241,8 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
Store->getAlignment());
}
-SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::ScalarizeVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
EVT EltVT = Store->getValue().getValueType().getVectorElementType();
@@ -1105,21 +1252,77 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SmallVector<SDValue, 8> Chains;
+ unsigned EltSize = MemEltVT.getStoreSize();
+ MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Store->getValue(), DAG.getConstant(i, MVT::i32));
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
- Store->getBasePtr(),
- DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
- PtrVT));
- Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
- MachinePointerInfo(Store->getMemOperand()->getValue()),
- MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
- Store->getAlignment()));
+ Store->getValue(),
+ DAG.getConstant(i, MVT::i32));
+
+ SDValue Offset = DAG.getConstant(i * MemEltVT.getStoreSize(), PtrVT);
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Store->getBasePtr(), Offset);
+ SDValue NewStore =
+ DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
+ SrcValue.getWithOffset(i * EltSize),
+ MemEltVT, Store->isNonTemporal(), Store->isVolatile(),
+ Store->getAlignment());
+ Chains.push_back(NewStore);
}
+
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
}
+SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ SDValue Val = Store->getValue();
+ EVT VT = Val.getValueType();
+
+ // If this is a 2 element vector, we really want to scalarize and not create
+ // weird 1 element vectors.
+ if (VT.getVectorNumElements() == 2)
+ return ScalarizeVectorStore(Op, DAG);
+
+ EVT MemVT = Store->getMemoryVT();
+ SDValue Chain = Store->getChain();
+ SDValue BasePtr = Store->getBasePtr();
+ SDLoc SL(Op);
+
+ EVT LoVT, HiVT;
+ EVT LoMemVT, HiMemVT;
+ SDValue Lo, Hi;
+
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
+
+ EVT PtrVT = BasePtr.getValueType();
+ SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(LoMemVT.getStoreSize(), PtrVT));
+
+ MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+ SDValue LoStore
+ = DAG.getTruncStore(Chain, SL, Lo,
+ BasePtr,
+ SrcValue,
+ LoMemVT,
+ Store->isNonTemporal(),
+ Store->isVolatile(),
+ Store->getAlignment());
+ SDValue HiStore
+ = DAG.getTruncStore(Chain, SL, Hi,
+ HiPtr,
+ SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+ HiMemVT,
+ Store->isNonTemporal(),
+ Store->isVolatile(),
+ Store->getAlignment());
+
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
+}
+
+
SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -1165,22 +1368,8 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, DL);
}
- // Lower loads constant address space global variable loads
- if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalVariable>(
- GetUnderlyingObject(Load->getMemOperand()->getValue()))) {
-
-
- SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL,
- getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
- Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
- DAG.getConstant(2, MVT::i32));
- return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
- Load->getChain(), Ptr,
- DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
- }
-
- if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
return SDValue();
@@ -1231,7 +1420,7 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
- return SplitVectorStore(Op, DAG);
+ return ScalarizeVectorStore(Op, DAG);
}
EVT MemVT = Store->getMemoryVT();
@@ -1276,249 +1465,179 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+// This is a shortcut for integer division because we have fast i32<->f32
+// conversions, and fast f32 reciprocal instructions. The fractional part of a
+// float is enough to accurately represent up to a 24-bit integer.
+SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const {
SDLoc DL(Op);
- EVT OVT = Op.getValueType();
+ EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
- unsigned bitsize = OVT.getScalarType().getSizeInBits();
- // char|short jq = ia ^ ib;
- SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+ MVT IntVT = MVT::i32;
+ MVT FltVT = MVT::f32;
+
+ ISD::NodeType ToFp = sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
+ ISD::NodeType ToInt = sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
+
+ if (VT.isVector()) {
+ unsigned NElts = VT.getVectorNumElements();
+ IntVT = MVT::getVectorVT(MVT::i32, NElts);
+ FltVT = MVT::getVectorVT(MVT::f32, NElts);
+ }
+
+ unsigned BitSize = VT.getScalarType().getSizeInBits();
+
+ SDValue jq = DAG.getConstant(1, IntVT);
+
+ if (sign) {
+ // char|short jq = ia ^ ib;
+ jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, VT, jq, DAG.getConstant(BitSize - 2, VT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, VT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, IntVT);
+ }
// int ia = (int)LHS;
- SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+ SDValue ia = sign ?
+ DAG.getSExtOrTrunc(LHS, DL, IntVT) : DAG.getZExtOrTrunc(LHS, DL, IntVT);
// int ib, (int)RHS;
- SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+ SDValue ib = sign ?
+ DAG.getSExtOrTrunc(RHS, DL, IntVT) : DAG.getZExtOrTrunc(RHS, DL, IntVT);
// float fa = (float)ia;
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+ SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
// float fb = (float)ib;
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+ SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
// float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(ISD::FMUL, DL, FLTTY,
- fa, DAG.getNode(AMDGPUISD::RCP, DL, FLTTY, fb));
+ SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
+ fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
// fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
// float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
- DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FltVT,
+ DAG.getNode(ISD::FMUL, DL, FltVT, fqneg, fb), fa);
// int iq = (int)fq;
- SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+ SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
// fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+ fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
// fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+ fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
- // int cv = fr >= fb;
- SDValue cv;
- if (INTTY == MVT::i32) {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- } else {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- }
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
- DAG.getConstant(0, OVT));
- // dst = iq + jq;
- iq = DAG.getSExtOrTrunc(iq, DL, OVT);
- iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
- return iq;
-}
-
-SDValue AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSDIV32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r0, r0, r1
- // ixor r10, r10, r11
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSelectCC(DL,
- r0, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, OVT),
- DAG.getConstant(0, OVT),
- ISD::SETLT);
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSelectCC(DL,
- r1, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, OVT),
- DAG.getConstant(0, OVT),
- ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+ // int cv = fr >= fb;
+ SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
- // udiv r0, r0, r1
- r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, VT));
- // ixor r10, r10, r11
- r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+ // dst = trunc/extend to legal type
+ iq = sign ? DAG.getSExtOrTrunc(iq, DL, VT) : DAG.getZExtOrTrunc(iq, DL, VT);
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+ // dst = iq + jq;
+ SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
+ // Rem needs compensation, it's easier to recompute it
+ SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
+ Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
-SDValue AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
+ SDValue Res[2] = {
+ Div,
+ Rem
+ };
+ return DAG.getMergeValues(Res, DL);
}
-SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType().getScalarType();
-
- if (OVT == MVT::i64)
- return LowerSDIV64(Op, DAG);
-
- if (OVT.getScalarType() == MVT::i32)
- return LowerSDIV32(Op, DAG);
-
- if (OVT == MVT::i16 || OVT == MVT::i8) {
- // FIXME: We should be checking for the masked bits. This isn't reached
- // because i8 and i16 are not legal types.
- return LowerSDIV24(Op, DAG);
- }
+void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) const {
+ assert(Op.getValueType() == MVT::i64);
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSREM32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r20, r0, r1
- // umul r20, r20, r1
- // sub r0, r0, r20
- // iadd r0, r0, r10
- // ixor DST, r0, r10
+ EVT VT = Op.getValueType();
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
- // mov r0, LHS
- SDValue r0 = LHS;
+ SDValue one = DAG.getConstant(1, HalfVT);
+ SDValue zero = DAG.getConstant(0, HalfVT);
- // mov r1, RHS
- SDValue r1 = RHS;
+ //HiLo split
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
+ SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+ SDValue RHS = Op.getOperand(1);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
+ SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+ // Get Speculative values
+ SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
+ SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+ SDValue REM_Hi = zero;
+ SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+ SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
+ SDValue DIV_Lo = zero;
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ const unsigned halfBitWidth = HalfVT.getSizeInBits();
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+ for (unsigned i = 0; i < halfBitWidth; ++i) {
+ SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
+ // Get Value of high bit
+ SDValue HBit;
+ if (halfBitWidth == 32 && Subtarget->hasBFE()) {
+ HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
+ } else {
+ HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+ }
- // udiv r20, r0, r1
- SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+ SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
+ DAG.getConstant(halfBitWidth - 1, HalfVT));
+ REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
+ REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
- // umul r20, r20, r1
- r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+ REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
+ REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
- // sub r0, r0, r20
- r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
+ SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+ SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE);
-SDValue AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
-}
+ DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
-SDValue AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType();
+ // Update REM
- if (OVT.getScalarType() == MVT::i64)
- return LowerSREM64(Op, DAG);
+ SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
- if (OVT.getScalarType() == MVT::i32)
- return LowerSREM32(Op, DAG);
+ REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
+ REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
+ REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
+ }
- return SDValue(Op.getNode(), 0);
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
+ Results.push_back(DIV);
+ Results.push_back(REM);
}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
@@ -1526,15 +1645,31 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDLoc DL(Op);
EVT VT = Op.getValueType();
+ if (VT == MVT::i64) {
+ SmallVector<SDValue, 2> Results;
+ LowerUDIVREM64(Op, DAG, Results);
+ return DAG.getMergeValues(Results, DL);
+ }
+
SDValue Num = Op.getOperand(0);
SDValue Den = Op.getOperand(1);
+ if (VT == MVT::i32) {
+ if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
+ DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
+ // TODO: We technically could do this for i64, but shouldn't that just be
+ // handled by something generally reducing 64-bit division on 32-bit
+ // values to 32-bit?
+ return LowerDIVREM24(Op, DAG, false);
+ }
+ }
+
// RCP = URECIP(Den) = 2^32 / Den + e
// e is rounding error.
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
- // RCP_LO = umulo(RCP, Den) */
- SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+ // RCP_LO = mul(RCP, Den) */
+ SDValue RCP_LO = DAG.getNode(ISD::MUL, DL, VT, RCP, Den);
// RCP_HI = mulhu (RCP, Den) */
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
@@ -1565,7 +1700,7 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
// Num_S_Remainder = Quotient * Den
- SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+ SDValue Num_S_Remainder = DAG.getNode(ISD::MUL, DL, VT, Quotient, Den);
// Remainder = Num - Num_S_Remainder
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
@@ -1630,12 +1765,22 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
SDLoc DL(Op);
EVT VT = Op.getValueType();
- SDValue Zero = DAG.getConstant(0, VT);
- SDValue NegOne = DAG.getConstant(-1, VT);
-
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
+ if (VT == MVT::i32) {
+ if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
+ DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
+ // TODO: We technically could do this for i64, but shouldn't that just be
+ // handled by something generally reducing 64-bit division on 32-bit
+ // values to 32-bit?
+ return LowerDIVREM24(Op, DAG, true);
+ }
+ }
+
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue NegOne = DAG.getConstant(-1, VT);
+
SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
@@ -1663,6 +1808,20 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
return DAG.getMergeValues(Res, DL);
}
+// (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y))
+SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ EVT VT = Op.getValueType();
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
+ SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
+
+ return DAG.getNode(ISD::FSUB, SL, VT, X, Mul);
+}
+
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
@@ -1705,7 +1864,7 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
const unsigned ExpBits = 11;
// Extract the exponent.
- SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32,
+ SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
Hi,
DAG.getConstant(FractBits - 32, MVT::i32),
DAG.getConstant(ExpBits, MVT::i32));
@@ -1796,13 +1955,43 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
+SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
+ SL, MVT::f64, Hi);
+
+ SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
+
+ SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
+ DAG.getConstant(32, MVT::i32));
+
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
+}
+
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue S0 = Op.getOperand(0);
- SDLoc DL(Op);
- if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
+ if (S0.getValueType() != MVT::i64)
return SDValue();
+ EVT DestVT = Op.getValueType();
+ if (DestVT == MVT::f64)
+ return LowerINT_TO_FP64(Op, DAG, false);
+
+ assert(DestVT == MVT::f32);
+
+ SDLoc DL(Op);
+
// f32 uint_to_fp i64
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(0, MVT::i32));
@@ -1815,16 +2004,62 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
}
-SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op,
- unsigned BitsDiff,
- SelectionDAG &DAG) const {
- MVT VT = Op.getSimpleValueType();
- SDLoc DL(Op);
- SDValue Shift = DAG.getConstant(BitsDiff, VT);
- // Shift left by 'Shift' bits.
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Op.getOperand(0), Shift);
- // Signed shift Right by 'Shift' bits.
- return DAG.getNode(ISD::SRA, DL, VT, Shl, Shift);
+SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
+ return LowerINT_TO_FP64(Op, DAG, true);
+
+ return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ SDLoc SL(Op);
+
+ SDValue Src = Op.getOperand(0);
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ SDValue K0
+ = DAG.getConstantFP(BitsToDouble(UINT64_C(0x3df0000000000000)), MVT::f64);
+ SDValue K1
+ = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), MVT::f64);
+
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
+
+ SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
+
+
+ SDValue Fma = DAG.getNode(ISD::FMA, SL, MVT::f64, FloorMul, K1, Trunc);
+
+ SDValue Hi = DAG.getNode(Signed ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, SL,
+ MVT::i32, FloorMul);
+ SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
+
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
+}
+
+SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+
+ if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
+ return LowerFP64_TO_INT(Op, DAG, true);
+
+ return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Src = Op.getOperand(0);
+
+ if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
+ return LowerFP64_TO_INT(Op, DAG, false);
+
+ return SDValue();
}
SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
@@ -1890,13 +2125,64 @@ template <typename IntTy>
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
uint32_t Offset, uint32_t Width) {
if (Width + Offset < 32) {
- IntTy Result = (Src0 << (32 - Offset - Width)) >> (32 - Width);
+ uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
+ IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
return DAG.getConstant(Result, MVT::i32);
}
return DAG.getConstant(Src0 >> Offset, MVT::i32);
}
+static bool usesAllNormalStores(SDNode *LoadVal) {
+ for (SDNode::use_iterator I = LoadVal->use_begin(); !I.atEnd(); ++I) {
+ if (!ISD::isNormalStore(*I))
+ return false;
+ }
+
+ return true;
+}
+
+// If we have a copy of an illegal type, replace it with a load / store of an
+// equivalently sized legal type. This avoids intermediate bit pack / unpack
+// instructions emitted when handling extloads and truncstores. Ideally we could
+// recognize the pack / unpack pattern to eliminate it.
+SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ StoreSDNode *SN = cast<StoreSDNode>(N);
+ SDValue Value = SN->getValue();
+ EVT VT = Value.getValueType();
+
+ if (isTypeLegal(VT) || SN->isVolatile() || !ISD::isNormalLoad(Value.getNode()))
+ return SDValue();
+
+ LoadSDNode *LoadVal = cast<LoadSDNode>(Value);
+ if (LoadVal->isVolatile() || !usesAllNormalStores(LoadVal))
+ return SDValue();
+
+ EVT MemVT = LoadVal->getMemoryVT();
+
+ SDLoc SL(N);
+ SelectionDAG &DAG = DCI.DAG;
+ EVT LoadVT = getEquivalentMemType(*DAG.getContext(), MemVT);
+
+ SDValue NewLoad = DAG.getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD,
+ LoadVT, SL,
+ LoadVal->getChain(),
+ LoadVal->getBasePtr(),
+ LoadVal->getOffset(),
+ LoadVT,
+ LoadVal->getMemOperand());
+
+ SDValue CastLoad = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad.getValue(0));
+ DCI.CombineTo(LoadVal, CastLoad, NewLoad.getValue(1), false);
+
+ return DAG.getStore(SN->getChain(), SL, NewLoad,
+ SN->getBasePtr(), SN->getMemOperand());
+}
+
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
@@ -1929,7 +2215,7 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
}
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
+ DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
@@ -1945,9 +2231,51 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
simplifyI24(N1, DCI);
return SDValue();
}
- case ISD::SELECT_CC: {
- return CombineMinMax(N, DAG);
+ case ISD::SELECT_CC: {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::f32 ||
+ (VT == MVT::f64 &&
+ Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue True = N->getOperand(2);
+ SDValue False = N->getOperand(3);
+ SDValue CC = N->getOperand(4);
+
+ return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
+ }
+
+ break;
+ }
+ case ISD::SELECT: {
+ SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ SDValue CC = Cond.getOperand(2);
+
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+
+ if (VT == MVT::f32 ||
+ (VT == MVT::f64 &&
+ Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
+ return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
+ }
+
+ // TODO: Implement min / max Evergreen instructions.
+ if (VT == MVT::i32 &&
+ Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
+ }
}
+
+ break;
+ }
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
@@ -1992,41 +2320,47 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
}
- if (ConstantSDNode *Val = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
if (Signed) {
return constantFoldBFE<int32_t>(DAG,
- Val->getSExtValue(),
+ CVal->getSExtValue(),
OffsetVal,
WidthVal);
}
return constantFoldBFE<uint32_t>(DAG,
- Val->getZExtValue(),
+ CVal->getZExtValue(),
OffsetVal,
WidthVal);
}
- APInt Demanded = APInt::getBitsSet(32,
- OffsetVal,
- OffsetVal + WidthVal);
-
if ((OffsetVal + WidthVal) >= 32) {
SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32);
return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
BitsFrom, ShiftVal);
}
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
- TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
+ if (BitsFrom.hasOneUse()) {
+ APInt Demanded = APInt::getBitsSet(32,
+ OffsetVal,
+ OffsetVal + WidthVal);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+ TLI.SimplifyDemandedBits(BitsFrom, Demanded,
+ KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
}
break;
}
+
+ case ISD::STORE:
+ return performStoreCombine(N, DCI);
}
return SDValue();
}
@@ -2117,12 +2451,19 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(CLAMP)
- NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(MAD)
+ NODE_NAME_CASE(FMAX_LEGACY)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
- NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(FMIN_LEGACY)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(FMAX3)
+ NODE_NAME_CASE(SMAX3)
+ NODE_NAME_CASE(UMAX3)
+ NODE_NAME_CASE(FMIN3)
+ NODE_NAME_CASE(SMIN3)
+ NODE_NAME_CASE(UMIN3)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)
@@ -2132,6 +2473,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(RSQ)
NODE_NAME_CASE(RSQ_LEGACY)
NODE_NAME_CASE(RSQ_CLAMPED)
+ NODE_NAME_CASE(LDEXP)
NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(BFE_U32)
NODE_NAME_CASE(BFE_I32)
@@ -2157,6 +2499,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CVT_F32_UBYTE2)
NODE_NAME_CASE(CVT_F32_UBYTE3)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
+ NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
@@ -2225,17 +2568,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
unsigned BitWidth = 32;
uint32_t Width = CWidth->getZExtValue() & 0x1f;
- if (Width == 0) {
- KnownZero = APInt::getAllOnesValue(BitWidth);
- KnownOne = APInt::getNullValue(BitWidth);
- return;
- }
- // FIXME: This could do a lot more. If offset is 0, should be the same as
- // sign_extend_inreg implementation, but that involves duplicating it.
- if (Opc == AMDGPUISD::BFE_I32)
- KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
- else
+ if (Opc == AMDGPUISD::BFE_U32)
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
break;
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 98a92ad..36b4ee6 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUISELLOWERING_H
-#define AMDGPUISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_AMDGPUISELLOWERING_H
#include "llvm/Target/TargetLowering.h"
@@ -43,48 +43,52 @@ private:
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
- SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+ SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
- unsigned BitsDiff,
- SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
protected:
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT);
- /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
- /// MachineFunction.
- ///
- /// \returns a RegisterSDNode representing Reg.
- virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
- const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const;
- SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
- SelectionDAG &DAG) const;
- /// \brief Split a vector load into multiple scalar loads.
- SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
+ virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
+ SelectionDAG &DAG) const;
+
+ /// \brief Split a vector load into a scalar load of each component.
+ SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
+ /// \brief Split a vector load into 2 loads of half the vector.
+ SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
+ /// \brief Split a vector store into a scalar store of each component.
+ SDValue ScalarizeVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
+ /// \brief Split a vector store into 2 stores of half the vector.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
+ void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
@@ -138,7 +142,23 @@ public:
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const;
+ SDValue CombineFMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ SelectionDAG &DAG) const;
+ SDValue CombineIMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ SelectionDAG &DAG) const;
+
const char* getTargetNodeName(unsigned Opcode) const override;
virtual SDNode *PostISelFolding(MachineSDNode *N,
@@ -155,10 +175,16 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- virtual unsigned ComputeNumSignBitsForTargetNode(
- SDValue Op,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const override;
+ unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
+ /// MachineFunction.
+ ///
+ /// \returns a RegisterSDNode representing Reg.
+ virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const;
};
namespace AMDGPUISD {
@@ -174,17 +200,24 @@ enum {
DWORDADDR,
FRACT,
CLAMP,
+ MAD, // Multiply + add with same result as the separate operations.
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
// Denormals handled on some parts.
COS_HW,
SIN_HW,
- FMAX,
+ FMAX_LEGACY,
SMAX,
UMAX,
- FMIN,
+ FMIN_LEGACY,
SMIN,
UMIN,
+ FMAX3,
+ SMAX3,
+ UMAX3,
+ FMIN3,
+ SMIN3,
+ UMIN3,
URECIP,
DIV_SCALE,
DIV_FMAS,
@@ -197,6 +230,7 @@ enum {
RSQ,
RSQ_LEGACY,
RSQ_CLAMPED,
+ LDEXP,
DOT4,
BFE_U32, // Extract range of bits with zero extension to 32-bits.
BFE_I32, // Extract range of bits with sign extension to 32-bits.
@@ -232,6 +266,8 @@ enum {
/// T2|v.z| | | |
/// T3|v.w| | | |
BUILD_VERTICAL_VECTOR,
+ /// Pointer to the start of the shader's constant data.
+ CONST_DATA_PTR,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
@@ -244,4 +280,4 @@ enum {
} // End namespace llvm
-#endif // AMDGPUISELLOWERING_H
+#endif
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index fef5b8c..a8fc614 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -86,21 +86,6 @@ AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// TODO: Implement this function
return nullptr;
}
-bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const {
- while (iter != MBB.end()) {
- switch (iter->getOpcode()) {
- default:
- break;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32:
- case AMDGPU::BRANCH:
- return true;
- };
- ++iter;
- }
- return false;
-}
void
AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -147,7 +132,6 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
} else if (isRegisterStore(*MI)) {
int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::val);
- AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
@@ -215,15 +199,30 @@ AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
return 0;
}
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const {
- assert(Offset2 > Offset1
- && "Second offset should be larger than first offset!");
- // If we have less than 16 loads in a row, and the offsets are within 16,
- // then schedule together.
- // TODO: Make the loads schedule near if it fits in a cacheline
- return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+bool AMDGPUInstrInfo::enableClusterLoads() const {
+ return true;
+}
+
+// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
+// the first 16 loads will be interleaved with the stores, and the next 16 will
+// be clustered as expected. It should really split into 2 16 store batches.
+//
+// Loads are clustered until this returns false, rather than trying to schedule
+// groups of stores. This also means we have to deal with saying different
+// address space loads should be clustered, and ones which might cause bank
+// conflicts.
+//
+// This might be deprecated so it might not be worth that much effort to fix.
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
+ int64_t Offset0, int64_t Offset1,
+ unsigned NumLoads) const {
+ assert(Offset1 > Offset0 &&
+ "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 64
+ // bytes, then schedule together.
+
+ // A cacheline is 64 bytes (for global memory).
+ return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
bool
@@ -320,7 +319,10 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
return -1;
}
- Offset = MF.getTarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
+ Offset = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getFrameIndexOffset(MF, -1);
return getIndirectIndexBegin(MF) + Offset;
}
@@ -335,7 +337,7 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
}
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
-// header files, so we need to wrap it in a function that takes unsigned
+// header files, so we need to wrap it in a function that takes unsigned
// instead.
namespace llvm {
namespace AMDGPU {
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 95dc8c1..da9833d 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -13,10 +13,9 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUINSTRUCTIONINFO_H
-#define AMDGPUINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUINSTRINFO_H
-#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <map>
@@ -41,8 +40,6 @@ class MachineInstrBuilder;
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
private:
const AMDGPURegisterInfo RI;
- bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const;
virtual void anchor();
protected:
const AMDGPUSubtarget &ST;
@@ -74,11 +71,6 @@ public:
LiveVariables *LV) const override;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const = 0;
-
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -101,6 +93,7 @@ protected:
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const override;
+public:
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
int getIndirectIndexBegin(const MachineFunction &MF) const;
@@ -109,7 +102,6 @@ protected:
/// read or write or -1 if indirect addressing is not used by this program.
int getIndirectIndexEnd(const MachineFunction &MF) const;
-public:
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
@@ -120,6 +112,9 @@ public:
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = nullptr) const override;
+
+ bool enableClusterLoads() const override;
+
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
@@ -144,7 +139,6 @@ public:
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
- virtual unsigned getIEQOpcode() const = 0;
virtual bool isMov(unsigned opcode) const = 0;
/// \brief Calculate the "Indirect Address" for the given \p RegIndex and
@@ -197,4 +191,4 @@ namespace AMDGPU {
#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
-#endif // AMDGPUINSTRINFO_H
+#endif
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 934d59d..4ee0f2b 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -23,6 +23,10 @@ def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
>;
+def AMDGPULdExpOp : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
@@ -34,6 +38,9 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
+def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
+
// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
@@ -49,12 +56,18 @@ def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
-// out = max(a, b) a and b are floats
-def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
+def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
+
+// out = max(a, b) a and b are floats, where a nan comparison fails.
+// This is not commutative because this gives the second operand:
+// x < nan ? x : nan -> nan
+// nan < x ? nan : x -> x
+def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
+ [SDNPAssociative]
>;
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
+def AMDGPUmad : SDNode<"AMDGPUISD::MAD", SDTFPTernaryOp, []>;
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
@@ -66,12 +79,12 @@ def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
-// out = min(a, b) a and b are floats
-def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
+// out = min(a, b) a and b are floats, where a nan comparison fails.
+def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
+ [SDNPAssociative]
>;
-// out = min(a, b) a snd b are signed ints
+// out = min(a, b) a and b are signed ints
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
@@ -81,6 +94,37 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
+// FIXME: TableGen doesn't like commutative instructions with more
+// than 2 operands.
+// out = max(a, b, c) a, b and c are floats
+def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b, and c are signed ints
+def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = max(a, b, c) a, b and c are unsigned ints
+def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are floats
+def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b, c) a, b and c are signed ints
+def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
+ [/*SDNPCommutative, SDNPAssociative*/]
+>;
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
SDTIntToFPOp, []>;
@@ -127,7 +171,7 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
//
// src0: vec4(src, 0, 0, mask)
-// src1: dst - rat offset (aka pointer) in dwords
+// src1: dst - rat offset (aka pointer) in dwords
def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
SDTypeProfile<0, 2, []>,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index b86b781..c215865 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -23,6 +23,8 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
let Pattern = pattern;
let Itinerary = NullALU;
+ let isCodeGenOnly = 1;
+
let TSFlags{63} = isRegisterLoad;
let TSFlags{62} = isRegisterStore;
}
@@ -34,9 +36,15 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
}
+def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
+def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
+def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
+
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
+let OperandType = "OPERAND_IMMEDIATE" in {
+
def u32imm : Operand<i32> {
let PrintMethod = "printU32ImmOperand";
}
@@ -49,6 +57,8 @@ def u8imm : Operand<i8> {
let PrintMethod = "printU8ImmOperand";
}
+} // End OperandType = "OPERAND_IMMEDIATE"
+
//===--------------------------------------------------------------------===//
// Custom Operands
//===--------------------------------------------------------------------===//
@@ -125,13 +135,35 @@ def COND_NE : PatLeaf <
def COND_NULL : PatLeaf <
(cond),
- [{return false;}]
+ [{(void)N; return false;}]
>;
//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
+class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+}]>;
+
+class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
+ (ops node:$ptr), (op node:$ptr)
+>;
+
+class PrivateStore <SDPatternOperator op> : PrivateMemOp <
+ (ops node:$value, node:$ptr), (op node:$value, node:$ptr)
+>;
+
+def extloadi8_private : PrivateLoad <extloadi8>;
+def sextloadi8_private : PrivateLoad <sextloadi8>;
+def extloadi16_private : PrivateLoad <extloadi16>;
+def sextloadi16_private : PrivateLoad <sextloadi16>;
+def load_private : PrivateLoad <load>;
+
+def truncstorei8_private : PrivateStore <truncstorei8>;
+def truncstorei16_private : PrivateStore <truncstorei16>;
+def store_private : PrivateStore <store>;
+
def global_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isGlobalStore(dyn_cast<StoreSDNode>(N));
@@ -165,6 +197,14 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@@ -193,6 +233,14 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@@ -218,6 +266,11 @@ def az_extloadi32_global : PatFrag<(ops node:$ptr),
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def az_extloadi32_flat : PatFrag<(ops node:$ptr),
+ (az_extloadi32 node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi32_constant : PatFrag<(ops node:$ptr),
(az_extloadi32 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
@@ -233,6 +286,16 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
+def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
def local_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isLocalStore(dyn_cast<StoreSDNode>(N));
@@ -252,6 +315,17 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
+ return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
+}]>;
+
+def local_load_aligned8bytes : Aligned8Bytes <
+ (ops node:$ptr), (local_load node:$ptr)
+>;
+
+def local_store_aligned8bytes : Aligned8Bytes <
+ (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
+>;
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
@@ -277,6 +351,7 @@ def mskor_global : PatFrag<(ops node:$val, node:$ptr),
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
+
def atomic_cmp_swap_32_local :
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
@@ -293,6 +368,45 @@ def atomic_cmp_swap_64_local :
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isFlatLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def flat_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isFlatStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
+ (AMDGPUstore_mskor node:$val, node:$ptr), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+}]>;
+
+class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value),
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
+>;
+
+def atomic_swap_global : global_binary_atomic_op<atomic_swap>;
+def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
+def atomic_and_global : global_binary_atomic_op<atomic_load_and>;
+def atomic_max_global : global_binary_atomic_op<atomic_load_max>;
+def atomic_min_global : global_binary_atomic_op<atomic_load_min>;
+def atomic_or_global : global_binary_atomic_op<atomic_load_or>;
+def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
+def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
+def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
+def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
+
+//===----------------------------------------------------------------------===//
+// Misc Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def fmad : PatFrag <
+ (ops node:$src0, node:$src1, node:$src2),
+ (fadd (fmul node:$src0, node:$src1), node:$src2)
+>;
class Constants {
int TWO_PI = 0x40c90fdb;
@@ -412,8 +526,9 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
// BFI_INT patterns
-multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
-
+multiclass BFIPatterns <Instruction BFI_INT,
+ Instruction LoadImm32,
+ RegisterClass RC64> {
// Definition from ISA doc:
// (y & x) | (z & ~x)
def : Pat <
@@ -435,8 +550,8 @@ multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
def : Pat <
(f64 (fcopysign f64:$src0, f64:$src1)),
- (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (i32 (EXTRACT_SUBREG $src0, sub0)), sub0),
+ (REG_SEQUENCE RC64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
(BFI_INT (LoadImm32 0x7fffffff),
(i32 (EXTRACT_SUBREG $src0, sub1)),
(i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
diff --git a/lib/Target/R600/AMDGPUIntrinsicInfo.cpp b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
index 58916a9..e94bb60 100644
--- a/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
#include "AMDGPUGenIntrinsics.inc"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo()
: TargetIntrinsicInfo() {}
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
diff --git a/lib/Target/R600/AMDGPUIntrinsicInfo.h b/lib/Target/R600/AMDGPUIntrinsicInfo.h
index 5be68a2..4c95b5e 100644
--- a/lib/Target/R600/AMDGPUIntrinsicInfo.h
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.h
@@ -11,8 +11,8 @@
/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
//
//===-----------------------------------------------------------------------===//
-#ifndef AMDGPU_INTRINSICINFO_H
-#define AMDGPU_INTRINSICINFO_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUINTRINSICINFO_H
#include "llvm/IR/Intrinsics.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -33,7 +33,7 @@ enum ID {
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
public:
- AMDGPUIntrinsicInfo(TargetMachine *tm);
+ AMDGPUIntrinsicInfo();
std::string getName(unsigned IntrId, Type **Tys = nullptr,
unsigned numTys = 0) const override;
unsigned lookupName(const char *Name, unsigned Len) const override;
@@ -45,4 +45,4 @@ public:
} // end namespace llvm
-#endif // AMDGPU_INTRINSICINFO_H
+#endif
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index d934676..eee9c29 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -13,9 +13,6 @@
let TargetPrefix = "AMDGPU", isTarget = 1 in {
- def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index ac82e88..bca027f 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -22,7 +22,9 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectStreamer.h"
@@ -77,6 +79,20 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(GV->getName()));
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx));
+ break;
+ }
+ case MachineOperand::MO_TargetIndex: {
+ assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ MCOp = MCOperand::CreateExpr(Expr);
+ break;
+ }
}
OutMI.addOperand(MCOp);
}
@@ -88,7 +104,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
#ifdef _DEBUG
StringRef Err;
- if (!TM.getInstrInfo()->verifyInstruction(MI, Err)) {
+ if (!TM.getSubtargetImpl()->getInstrInfo()->verifyInstruction(MI, Err)) {
errs() << "Warning: Illegal instruction detected: " << Err << "\n";
MI->dump();
}
@@ -112,8 +128,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
std::string &DisasmLine = DisasmLines.back();
raw_string_ostream DisasmStream(DisasmLine);
- AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *TM.getInstrInfo(),
- *TM.getRegisterInfo());
+ AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
+ *TM.getSubtargetImpl()->getInstrInfo(),
+ *TM.getSubtargetImpl()->getRegisterInfo());
InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
// Disassemble instruction/operands to hex representation.
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
index 58fe34d..00d1f1b 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.h
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_MCINSTLOWER_H
-#define AMDGPU_MCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
+#define LLVM_LIB_TARGET_R600_AMDGPUMCINSTLOWER_H
namespace llvm {
@@ -45,4 +45,4 @@ public:
} // End namespace llvm
-#endif //AMDGPU_MCINSTLOWER_H
+#endif
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
index 14171f4..0f3f9e2 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -10,9 +10,11 @@ static const char *const ShaderTypeAttribute = "ShaderType";
void AMDGPUMachineFunction::anchor() {}
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
- MachineFunctionInfo() {
- ShaderType = ShaderType::COMPUTE;
- LDSSize = 0;
+ MachineFunctionInfo(),
+ ShaderType(ShaderType::COMPUTE),
+ LDSSize(0),
+ ScratchSize(0),
+ IsKernel(true) {
AttributeSet Set = MF.getFunction()->getAttributes();
Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
ShaderTypeAttribute);
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
index fea0b39..f5e4694 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.h
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -10,8 +10,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUMACHINEFUNCTION_H
-#define AMDGPUMACHINEFUNCTION_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
+#define LLVM_LIB_TARGET_R600_AMDGPUMACHINEFUNCTION_H
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
@@ -20,15 +20,26 @@ namespace llvm {
class AMDGPUMachineFunction : public MachineFunctionInfo {
virtual void anchor();
+ unsigned ShaderType;
+
public:
AMDGPUMachineFunction(const MachineFunction &MF);
- unsigned ShaderType;
/// A map to keep track of local memory objects and their offsets within
/// the local memory space.
std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
/// Number of bytes in the LDS that are being used.
unsigned LDSSize;
+
+ /// Start of implicit kernel args
+ unsigned ABIArgOffset;
+
+ unsigned getShaderType() const {
+ return ShaderType;
+ }
+
+ unsigned ScratchSize;
+ bool IsKernel;
};
}
-#endif // AMDGPUMACHINEFUNCTION_H
+#endif
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index 218750d..b81fef4 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -36,11 +36,9 @@ class AMDGPUPromoteAlloca : public FunctionPass,
public:
AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
LocalMemAvailable(0) { }
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
- virtual const char *getPassName() const {
- return "AMDGPU Promote Alloca";
- }
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
void visitAlloca(AllocaInst &I);
};
@@ -107,14 +105,16 @@ static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
ArrayTy->getArrayNumElements());
}
-static Value* calculateVectorIndex(Value *Ptr,
- std::map<GetElementPtrInst*, Value*> GEPIdx) {
+static Value *
+calculateVectorIndex(Value *Ptr,
+ const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
if (isa<AllocaInst>(Ptr))
return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
- return GEPIdx[GEP];
+ auto I = GEPIdx.find(GEP);
+ return I == GEPIdx.end() ? nullptr : I->second;
}
static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
@@ -234,7 +234,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
return true;
}
-static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+ bool Success = true;
for (User *User : Val->users()) {
if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
continue;
@@ -242,11 +243,20 @@ static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
WorkList.push_back(User);
continue;
}
+
+ // FIXME: Correctly handle ptrtoint instructions.
+ Instruction *UseInst = dyn_cast<Instruction>(User);
+ if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
+ return false;
+
if (!User->getType()->isPointerTy())
continue;
+
WorkList.push_back(User);
- collectUsesWithPtrTypes(User, WorkList);
+
+ Success &= collectUsesWithPtrTypes(User, WorkList);
}
+ return Success;
}
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
@@ -274,6 +284,13 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
return;
}
+ std::vector<Value*> WorkList;
+
+ if (!collectUsesWithPtrTypes(&I, WorkList)) {
+ DEBUG(dbgs() << " Do not know how to convert all uses\n");
+ return;
+ }
+
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
@@ -320,10 +337,6 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
- std::vector<Value*> WorkList;
-
- collectUsesWithPtrTypes(Offset, WorkList);
-
for (std::vector<Value*>::iterator i = WorkList.begin(),
e = WorkList.end(); i != e; ++i) {
Value *V = *i;
@@ -331,6 +344,13 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
if (!Call) {
Type *EltTy = V->getType()->getPointerElementType();
PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+
+ // The operand's value should be corrected on its own.
+ if (isa<AddrSpaceCastInst>(V))
+ continue;
+
+ // FIXME: It doesn't really make sense to try to do this for all
+ // instructions.
V->mutateType(NewTy);
continue;
}
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index 4731595..f27576a 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUREGISTERINFO_H
-#define AMDGPUREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_AMDGPUREGISTERINFO_H
#include "llvm/ADT/BitVector.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -62,4 +62,4 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
} // End namespace llvm
-#endif // AMDIDSAREGISTERINFO_H
+#endif
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index b83c290..9d09a19 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -13,8 +13,14 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
#include "SIInstrInfo.h"
+#include "SIISelLowering.h"
+#include "llvm/ADT/SmallString.h"
+
+#include "llvm/ADT/SmallString.h"
using namespace llvm;
@@ -25,29 +31,66 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
-AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
- AMDGPUGenSubtargetInfo(TT, GPU, FS),
- DevName(GPU),
- Is64bit(false),
- DumpCode(false),
- R600ALUInst(false),
- HasVertexCache(false),
- TexVTXClauseSize(0),
- Gen(AMDGPUSubtarget::R600),
- FP64(false),
- CaymanISA(false),
- EnableIRStructurizer(true),
- EnableIfCvt(true),
- WavefrontSize(0),
- CFALUBug(false),
- LocalMemorySize(0),
- InstrItins(getInstrItineraryForCPU(GPU)) {
- ParseSubtargetFeatures(GPU, FS);
+static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
+ std::string Ret = "e-p:32:32";
+
+ if (ST.is64bit()) {
+ // 32-bit private, local, and region pointers. 64-bit global and constant.
+ Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
+ }
+
+ Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
+ "-v512:512-v1024:1024-v2048:2048-n32:64";
+
+ return Ret;
+}
+
+AMDGPUSubtarget &
+AMDGPUSubtarget::initializeSubtargetDependencies(StringRef GPU, StringRef FS) {
+ // Determine default and user-specified characteristics
+ // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
+ // enabled, but some instructions do not respect them and they run at the
+ // double precision rate, so don't enable by default.
+ //
+ // We want to be able to turn these off, but making this a subtarget feature
+ // for SI has the unhelpful behavior that it unsets everything else if you
+ // disable it.
+
+ SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
+ FullFS += FS;
+
+ ParseSubtargetFeatures(GPU, FullFS);
+
+ // FIXME: I don't think think Evergreen has any useful support for
+ // denormals, but should be checked. Should we issue a warning somewhere
+ // if someone tries to enable these?
+ if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ FP32Denormals = false;
+ FP64Denormals = false;
+ }
+ return *this;
+}
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
+ TargetMachine &TM)
+ : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
+ DumpCode(false), R600ALUInst(false), HasVertexCache(false),
+ TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
+ FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
+ FlatAddressSpace(false), EnableIRStructurizer(true),
+ EnablePromoteAlloca(false), EnableIfCvt(true),
+ EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+ DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ 64 * 16, // Maximum stack alignment (long16)
+ 0),
+ InstrItins(getInstrItineraryForCPU(GPU)) {
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
InstrInfo.reset(new R600InstrInfo(*this));
+ TLInfo.reset(new R600TargetLowering(TM));
} else {
InstrInfo.reset(new SIInstrInfo(*this));
+ TLInfo.reset(new SITargetLowering(TM));
}
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 0c388b3..f71d80a 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -12,10 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUSUBTARGET_H
-#define AMDGPUSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
+#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "R600ISelLowering.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -23,14 +28,10 @@
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
-#define MAX_CB_SIZE (1 << 16)
-
namespace llvm {
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
- std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
-
public:
enum Generation {
R600 = 0,
@@ -50,24 +51,43 @@ private:
short TexVTXClauseSize;
Generation Gen;
bool FP64;
+ bool FP64Denormals;
+ bool FP32Denormals;
bool CaymanISA;
+ bool FlatAddressSpace;
bool EnableIRStructurizer;
+ bool EnablePromoteAlloca;
bool EnableIfCvt;
+ bool EnableLoadStoreOpt;
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
+ const DataLayout DL;
+ AMDGPUFrameLowering FrameLowering;
+ std::unique_ptr<AMDGPUTargetLowering> TLInfo;
+ std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
InstrItineraryData InstrItins;
public:
- AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
+ AMDGPUSubtarget &initializeSubtargetDependencies(StringRef GPU, StringRef FS);
- const AMDGPUInstrInfo *getInstrInfo() const {
+ const AMDGPUFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const AMDGPUInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
}
-
- const InstrItineraryData &getInstrItineraryData() const {
- return InstrItins;
+ const AMDGPURegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo->getRegisterInfo();
+ }
+ AMDGPUTargetLowering *getTargetLowering() const override {
+ return TLInfo.get();
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
}
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -81,7 +101,7 @@ public:
}
short getTexVTXClauseSize() const {
- return TexVTXClauseSize;
+ return TexVTXClauseSize;
}
Generation getGeneration() const {
@@ -96,6 +116,18 @@ public:
return CaymanISA;
}
+ bool hasFP32Denormals() const {
+ return FP32Denormals;
+ }
+
+ bool hasFP64Denormals() const {
+ return FP64Denormals;
+ }
+
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
@@ -112,8 +144,10 @@ public:
if (Size == 32)
return (getGeneration() >= EVERGREEN);
- assert(Size == 64);
- return (getGeneration() >= SOUTHERN_ISLANDS);
+ if (Size == 64)
+ return (getGeneration() >= SOUTHERN_ISLANDS);
+
+ return false;
}
bool hasMulU24() const {
@@ -125,14 +159,30 @@ public:
hasCaymanISA());
}
+ bool hasFFBL() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasFFBH() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
bool IsIRStructurizerEnabled() const {
return EnableIRStructurizer;
}
+ bool isPromoteAllocaEnabled() const {
+ return EnablePromoteAlloca;
+ }
+
bool isIfCvtEnabled() const {
return EnableIfCvt;
}
+ bool loadStoreOptEnabled() const {
+ return EnableLoadStoreOpt;
+ }
+
unsigned getWavefrontSize() const {
return WavefrontSize;
}
@@ -171,4 +221,4 @@ public:
} // End namespace llvm
-#endif // AMDGPUSUBTARGET_H
+#endif
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 8aab944..b2cd988 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -22,6 +22,7 @@
#include "SIInstrInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Verifier.h"
@@ -33,7 +34,6 @@
#include "llvm/Transforms/Scalar.h"
#include <llvm/CodeGen/Passes.h>
-
using namespace llvm;
extern "C" void LLVMInitializeR600Target() {
@@ -49,46 +49,20 @@ static MachineSchedRegistry
SchedCustomRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);
-static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
- std::string Ret = "e-p:32:32";
-
- if (ST.is64bit()) {
- // 32-bit private, local, and region pointers. 64-bit global and constant.
- Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
- }
-
- Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
- "-v512:512-v1024:1024-v2048:2048-n32:64";
-
- return Ret;
-}
-
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OptLevel
-)
-:
- LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- Subtarget(TT, CPU, FS),
- Layout(computeDataLayout(Subtarget)),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- 64 * 16 // Maximum stack alignment (long16)
- , 0),
- IntrinsicInfo(this),
- InstrItins(&Subtarget.getInstrItineraryData()) {
- // TLInfo uses InstrInfo so it must be initialized after.
- if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- TLInfo.reset(new R600TargetLowering(*this));
- } else {
- TLInfo.reset(new SITargetLowering(*this));
- }
+ StringRef CPU, StringRef FS,
+ TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ TLOF(new TargetLoweringObjectFileELF()),
+ Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
AMDGPUTargetMachine::~AMDGPUTargetMachine() {
+ delete TLOF;
}
namespace {
@@ -109,7 +83,8 @@ public:
return nullptr;
}
- virtual void addCodeGenPrepare();
+ void addIRPasses() override;
+ void addCodeGenPrepare() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addPreRegAlloc() override;
@@ -135,10 +110,26 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
PM.add(createAMDGPUTargetTransformInfoPass(this));
}
+void AMDGPUPassConfig::addIRPasses() {
+ // Function calls are not supported, so make sure we inline everything.
+ addPass(createAMDGPUAlwaysInlinePass());
+ addPass(createAlwaysInlinerPass());
+ // We need to add the barrier noop pass, otherwise adding the function
+ // inlining pass will cause all of the PassConfigs passes to be run
+ // one function at a time, which means if we have a nodule with two
+ // functions, then we will generate code for the first function
+ // without ever running any passes on the second.
+ addPass(createBarrierNoopPass());
+ TargetPassConfig::addIRPasses();
+}
+
void AMDGPUPassConfig::addCodeGenPrepare() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- addPass(createAMDGPUPromoteAlloca(ST));
- addPass(createSROAPass());
+ if (ST.isPromoteAllocaEnabled()) {
+ addPass(createAMDGPUPromoteAlloca(ST));
+ addPass(createSROAPass());
+ }
+
TargetPassConfig::addCodeGenPrepare();
}
@@ -159,8 +150,15 @@ AMDGPUPassConfig::addPreISel() {
}
bool AMDGPUPassConfig::addInstSelector() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
- addPass(createSILowerI1CopiesPass());
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ addPass(createSILowerI1CopiesPass());
+ addPass(createSIFixSGPRCopiesPass(*TM));
+ }
+
return false;
}
@@ -170,12 +168,18 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createR600VectorRegMerger(*TM));
} else {
- addPass(createSIFixSGPRCopiesPass(*TM));
- // SIFixSGPRCopies can generate a lot of duplicate instructions,
- // so we need to run MachineCSE afterwards.
- addPass(&MachineCSEID);
- initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
- insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID);
+ if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+ // Don't do this with no optimizations since it throws away debug info by
+ // merging nonadjacent loads.
+
+ // This should be run after scheduling, but before register allocation. It
+ // also need extra copies to the address operand to be eliminated.
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+ }
+
+ addPass(createSIShrinkInstructionsPass());
+ addPass(createSIFixSGPRLiveRangesPass());
}
return false;
}
@@ -183,6 +187,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
bool AMDGPUPassConfig::addPostRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ addPass(createSIShrinkInstructionsPass());
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createSIInsertWaits(*TM));
}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index 3bb15be..1b3dbce 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_TARGET_MACHINE_H
-#define AMDGPU_TARGET_MACHINE_H
+#ifndef LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
+#define LLVM_LIB_TARGET_R600_AMDGPUTARGETMACHINE_H
#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
@@ -25,47 +25,30 @@
namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
-
+ TargetLoweringObjectFile *TLOF;
AMDGPUSubtarget Subtarget;
- const DataLayout Layout;
- AMDGPUFrameLowering FrameLowering;
AMDGPUIntrinsicInfo IntrinsicInfo;
- std::unique_ptr<AMDGPUTargetLowering> TLInfo;
- const InstrItineraryData *InstrItins;
public:
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- const AMDGPUFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
- return &IntrinsicInfo;
- }
- const AMDGPUInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
const AMDGPUSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const AMDGPURegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
- AMDGPUTargetLowering *getTargetLowering() const override {
- return TLInfo.get();
- }
- const InstrItineraryData *getInstrItineraryData() const override {
- return InstrItins;
+ const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
+ return &IntrinsicInfo;
}
- const DataLayout *getDataLayout() const override { return &Layout; }
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
/// \brief Register R600 analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF;
+ }
};
} // End namespace llvm
-#endif // AMDGPU_TARGET_MACHINE_H
+#endif
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index ea78f43..e7bc006 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -52,7 +52,7 @@ public:
AMDGPUTTI(const AMDGPUTargetMachine *TM)
: ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
}
@@ -74,10 +74,14 @@ public:
bool hasBranchDivergence() const override;
- void getUnrollingPreferences(Loop *L,
+ void getUnrollingPreferences(const Function *F, Loop *L,
UnrollingPreferences &UP) const override;
- /// @}
+ PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override;
+
+ unsigned getNumberOfRegisters(bool Vector) const override;
+ unsigned getRegisterBitWidth(bool Vector) const override;
+ unsigned getMaxInterleaveFactor() const override;
};
} // end anonymous namespace
@@ -93,16 +97,20 @@ llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
bool AMDGPUTTI::hasBranchDivergence() const { return true; }
-void AMDGPUTTI::getUnrollingPreferences(Loop *L,
+void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
UnrollingPreferences &UP) const {
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *BB = *BI;
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
- if (!GEP)
+ UP.Threshold = 300; // Twice the default.
+ UP.Count = UINT_MAX;
+ UP.Partial = true;
+
+ // TODO: Do we want runtime unrolling?
+
+ for (const BasicBlock *BB : L->getBlocks()) {
+ for (const Instruction &I : *BB) {
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
+ if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
+
const Value *Ptr = GEP->getPointerOperand();
const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
if (Alloca) {
@@ -116,8 +124,34 @@ void AMDGPUTTI::getUnrollingPreferences(Loop *L,
//
// Don't use the maximum allowed value here as it will make some
// programs way too big.
- UP.Threshold = 500;
+ UP.Threshold = 800;
}
}
}
}
+
+AMDGPUTTI::PopcntSupportKind
+AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const {
+ if (Vec)
+ return 0;
+
+ // Number of VGPRs on SI.
+ if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 256;
+
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned AMDGPUTTI::getRegisterBitWidth(bool) const {
+ return 32;
+}
+
+unsigned AMDGPUTTI::getMaxInterleaveFactor() const {
+ // Semi-arbitrary large amount.
+ return 64;
+}
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index f3a0391..ee6e8ec 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -11,6 +11,7 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -160,7 +161,7 @@ public:
bool prepare();
bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
@@ -337,7 +338,7 @@ protected:
void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
- /// This is work around solution for findNearestCommonDominator not avaiable
+ /// This is work around solution for findNearestCommonDominator not available
/// to post dom a proper fix should go to Dominators.h.
MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2);
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
new file mode 100644
index 0000000..7ad815d
--- /dev/null
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -0,0 +1,320 @@
+//===-- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &_MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ bool parseCnt(int64_t &IntVal);
+ OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+};
+
+class AMDGPUOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate
+ } Kind;
+
+public:
+ AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct ImmOp {
+ int64_t Val;
+ };
+
+ union {
+ TokOp Tok;
+ ImmOp Imm;
+ };
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ }
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("addRegOperands");
+ }
+ StringRef getToken() const {
+ return StringRef(Tok.Data, Tok.Length);
+ }
+ bool isToken() const override {
+ return Kind == Token;
+ }
+
+ bool isImm() const override {
+ return Kind == Immediate;
+ }
+
+ int64_t getImm() const {
+ return Imm.Val;
+ }
+
+ bool isReg() const override {
+ return false;
+ }
+
+ unsigned getReg() const override {
+ return 0;
+ }
+
+ bool isMem() const override {
+ return false;
+ }
+
+ SMLoc getStartLoc() const override {
+ return SMLoc();
+ }
+
+ SMLoc getEndLoc() const override {
+ return SMLoc();
+ }
+
+ void print(raw_ostream &OS) const override { }
+
+ static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
+ auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
+ Op->Imm.Val = Val;
+ return Op;
+ }
+
+ static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+ auto Res = llvm::make_unique<AMDGPUOperand>(Token);
+ Res->Tok.Data = Str.data();
+ Res->Tok.Length = Str.size();
+ return Res;
+ }
+
+ bool isSWaitCnt() const;
+};
+
+}
+
+bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ return true;
+}
+
+
+bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+ default: break;
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_InvalidOperand: {
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ }
+ return Error(IDLoc, "invalid operand for instruction");
+ }
+ }
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
+ return true;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+
+ // Try to parse with a custom parser
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+ // If we successfully parsed the operand or if there as an error parsing,
+ // we are done.
+ if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+ return ResTy;
+
+ switch(getLexer().getKind()) {
+ case AsmToken::Integer: {
+ int64_t IntVal;
+ if (getParser().parseAbsoluteExpression(IntVal))
+ return MatchOperand_ParseFail;
+ Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
+ return MatchOperand_Success;
+ }
+ default:
+ return MatchOperand_NoMatch;
+ }
+}
+
+bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
+ // Add the instruction mnemonic
+ Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+ switch (Res) {
+ case MatchOperand_Success: return false;
+ case MatchOperand_ParseFail: return Error(NameLoc,
+ "Failed parsing operand");
+ case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// s_waitcnt
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
+ StringRef CntName = Parser.getTok().getString();
+ int64_t CntVal;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LParen))
+ return true;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Integer))
+ return true;
+
+ if (getParser().parseAbsoluteExpression(CntVal))
+ return true;
+
+ if (getLexer().isNot(AsmToken::RParen))
+ return true;
+
+ Parser.Lex();
+ if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
+ Parser.Lex();
+
+ int CntShift;
+ int CntMask;
+
+ if (CntName == "vmcnt") {
+ CntMask = 0xf;
+ CntShift = 0;
+ } else if (CntName == "expcnt") {
+ CntMask = 0x7;
+ CntShift = 4;
+ } else if (CntName == "lgkmcnt") {
+ CntMask = 0x7;
+ CntShift = 8;
+ } else {
+ return true;
+ }
+
+ IntVal &= ~(CntMask << CntShift);
+ IntVal |= (CntVal << CntShift);
+ return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
+ // Disable all counters by default.
+ // vmcnt [3:0]
+ // expcnt [6:4]
+ // lgkmcnt [10:8]
+ int64_t CntVal = 0x77f;
+
+ switch(getLexer().getKind()) {
+ default: return MatchOperand_ParseFail;
+ case AsmToken::Integer:
+ // The operand can be an integer value.
+ if (getParser().parseAbsoluteExpression(CntVal))
+ return MatchOperand_ParseFail;
+ break;
+
+ case AsmToken::Identifier:
+ do {
+ if (parseCnt(CntVal))
+ return MatchOperand_ParseFail;
+ } while(getLexer().isNot(AsmToken::EndOfStatement));
+ break;
+ }
+ Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
+ return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isSWaitCnt() const {
+ return isImm();
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializeR600AsmParser() {
+ RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AMDGPUGenAsmMatcher.inc"
+
diff --git a/lib/Target/R600/AsmParser/CMakeLists.txt b/lib/Target/R600/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..1b42af7
--- /dev/null
+++ b/lib/Target/R600/AsmParser/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMR600AsmParser
+ AMDGPUAsmParser.cpp
+ )
diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/Target/R600/AsmParser/LLVMBuild.txt
index dd22f1b..940e4ce 100644
--- a/lib/ExecutionEngine/JIT/LLVMBuild.txt
+++ b/lib/Target/R600/AsmParser/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/ExecutionEngine/JIT/LLVMBuild.txt ------------------*- Conf -*--===;
+;===- ./lib/Target/R600/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -17,6 +17,7 @@
[component_0]
type = Library
-name = JIT
-parent = ExecutionEngine
-required_libraries = CodeGen Core ExecutionEngine Support
+name = R600AsmParser
+parent = R600
+required_libraries = MC MCParser R600Desc R600Info Support
+add_to_library_groups = R600
diff --git a/lib/Target/Hexagon/InstPrinter/Makefile b/lib/Target/R600/AsmParser/Makefile
index 20331d8..e6689b5 100644
--- a/lib/Target/Hexagon/InstPrinter/Makefile
+++ b/lib/Target/R600/AsmParser/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Hexagon/InstPrinter/Makefile ----------------------------===##
+##===- lib/Target/R600/AsmParser/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,9 +7,9 @@
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
-LIBRARYNAME = LLVMHexagonAsmPrinter
+LIBRARYNAME = LLVMR600AsmParser
-# Hack: we need to include 'main' Hexagon target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+# Hack: we need to include 'main' R600 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 4d16082..ed0a216 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -6,13 +6,15 @@ tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
-tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
add_public_tablegen_target(AMDGPUCommonTableGen)
add_llvm_target(R600CodeGen
AMDILCFGStructurizer.cpp
+ AMDGPUAlwaysInlinePass.cpp
AMDGPUAsmPrinter.cpp
AMDGPUFrameLowering.cpp
AMDGPUIntrinsicInfo.cpp
@@ -44,13 +46,16 @@ add_llvm_target(R600CodeGen
SIInsertWaits.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
+ SILoadStoreOptimizer.cpp
SILowerControlFlow.cpp
SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
+ SIShrinkInstructions.cpp
SITypeRewriter.cpp
)
+add_subdirectory(AsmParser)
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td
index 2630345..58b5ce2 100644
--- a/lib/Target/R600/CaymanInstructions.td
+++ b/lib/Target/R600/CaymanInstructions.td
@@ -46,6 +46,8 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
+defm : RsqPat<RECIPSQRT_IEEE_cm, f32>;
+
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index dcb7e98..f24f76b 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -69,6 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+defm : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
@@ -256,6 +257,12 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
let Predicates = [isEGorCayman] in {
+// Should be predicated on FeatureFP64
+// def FMA_64 : R600_3OP <
+// 0xA, "FMA_64",
+// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
+// >;
+
// BFE_UINT - bit_extract, an optimization for mask and shift
// Src0 = Input
// Src1 = Offset
@@ -295,7 +302,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)),
def : Pat<(i32 (sext_inreg i32:$src, i16)),
(BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
-defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
+defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32, R600_Reg64>;
def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
[(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
@@ -312,6 +319,7 @@ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
def : ROTRPattern <BIT_ALIGN_INT_eg>;
def MULADD_eg : MULADD_Common<0x14>;
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+def FMA_eg : FMA_Common<0x7>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
@@ -328,6 +336,9 @@ defm CUBE_eg : CUBE_Common<0xC0>;
def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
+def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
+def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
+
let hasSideEffects = 1 in {
def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
}
@@ -463,21 +474,47 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
let DisableEncoding = "$dst";
}
-class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
+class R600_LDS_1A2D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
+ string dst =""> :
R600_LDS <
- lds_op,
- (outs),
+ lds_op, outs,
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
- " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
+ " "#name# "$last "#dst#"$src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
pattern> {
+
+ field string BaseOp;
+
+ let LDS_1A1D = 0;
let LDS_1A2D = 1;
}
+class R600_LDS_1A2D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A2D <lds_op, (outs), name, pattern> {
+ let BaseOp = name;
+}
+
+class R600_LDS_1A2D_RET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A2D <lds_op, (outs R600_Reg32:$dst), name, pattern> {
+
+ let BaseOp = name;
+ let usesCustomInserter = 1;
+ let DisableEncoding = "$dst";
+}
+
def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
+def LDS_AND : R600_LDS_1A1D_NORET <0x9, "LDS_AND", [] >;
+def LDS_OR : R600_LDS_1A1D_NORET <0xa, "LDS_OR", [] >;
+def LDS_XOR : R600_LDS_1A1D_NORET <0xb, "LDS_XOR", [] >;
+def LDS_WRXCHG: R600_LDS_1A1D_NORET <0xd, "LDS_WRXCHG", [] >;
+def LDS_CMPST: R600_LDS_1A2D_NORET <0x10, "LDS_CMPST", [] >;
+def LDS_MIN_INT : R600_LDS_1A1D_NORET <0x5, "LDS_MIN_INT", [] >;
+def LDS_MAX_INT : R600_LDS_1A1D_NORET <0x6, "LDS_MAX_INT", [] >;
+def LDS_MIN_UINT : R600_LDS_1A1D_NORET <0x7, "LDS_MIN_UINT", [] >;
+def LDS_MAX_UINT : R600_LDS_1A1D_NORET <0x8, "LDS_MAX_UINT", [] >;
def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
[(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
>;
@@ -493,6 +530,33 @@ def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
[(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
>;
+def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND",
+ [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))]
+>;
+def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR",
+ [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))]
+>;
+def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR",
+ [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))]
+>;
+def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT",
+ [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))]
+>;
+def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT",
+ [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))]
+>;
+def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT",
+ [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))]
+>;
+def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT",
+ [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))]
+>;
+def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG",
+ [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))]
+>;
+def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST",
+ [(set i32:$dst, (atomic_cmp_swap_32_local i32:$src0, i32:$src1, i32:$src2))]
+>;
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>;
@@ -526,7 +590,7 @@ def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
// SHA-256 Patterns
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
-def : FROUNDPat <CNDGE_eg>;
+def : FROUNDPat <CNDGE_eg, CNDGT_eg>;
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 0927040..64fe726 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -9,6 +9,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUInstPrinter.h"
+#include "SIDefines.h"
+
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -40,6 +42,81 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
}
+void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
+}
+
+void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
+}
+
+void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " offen";
+}
+
+void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " idxen";
+}
+
+void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " addr64";
+}
+
+void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " offset:";
+ printU16ImmOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ uint16_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm != 0) {
+ O << " offset:";
+ printU16ImmDecOperand(MI, OpNo, O);
+ }
+}
+
+void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << " offset0:";
+ printU8ImmDecOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << " offset1:";
+ printU8ImmDecOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " glc";
+}
+
+void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " slc";
+}
+
+void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " tfe";
+}
+
void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
switch (reg) {
case AMDGPU::VCC:
@@ -54,6 +131,27 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
case AMDGPU::M0:
O << "m0";
return;
+ case AMDGPU::FLAT_SCR:
+ O << "flat_scratch";
+ return;
+ case AMDGPU::VCC_LO:
+ O << "vcc_lo";
+ return;
+ case AMDGPU::VCC_HI:
+ O << "vcc_hi";
+ return;
+ case AMDGPU::EXEC_LO:
+ O << "exec_lo";
+ return;
+ case AMDGPU::EXEC_HI:
+ O << "exec_hi";
+ return;
+ case AMDGPU::FLAT_SCR_LO:
+ O << "flat_scratch_lo";
+ return;
+ case AMDGPU::FLAT_SCR_HI:
+ O << "flat_scratch_hi";
+ return;
default:
break;
}
@@ -117,19 +215,27 @@ void AMDGPUInstPrinter::printImmediate(uint32_t Imm, raw_ostream &O) {
return;
}
- if (Imm == FloatToBits(1.0f) ||
- Imm == FloatToBits(-1.0f) ||
- Imm == FloatToBits(0.5f) ||
- Imm == FloatToBits(-0.5f) ||
- Imm == FloatToBits(2.0f) ||
- Imm == FloatToBits(-2.0f) ||
- Imm == FloatToBits(4.0f) ||
- Imm == FloatToBits(-4.0f)) {
- O << BitsToFloat(Imm);
- return;
+ if (Imm == FloatToBits(0.0f))
+ O << "0.0";
+ else if (Imm == FloatToBits(1.0f))
+ O << "1.0";
+ else if (Imm == FloatToBits(-1.0f))
+ O << "-1.0";
+ else if (Imm == FloatToBits(0.5f))
+ O << "0.5";
+ else if (Imm == FloatToBits(-0.5f))
+ O << "-0.5";
+ else if (Imm == FloatToBits(2.0f))
+ O << "2.0";
+ else if (Imm == FloatToBits(-2.0f))
+ O << "-2.0";
+ else if (Imm == FloatToBits(4.0f))
+ O << "4.0";
+ else if (Imm == FloatToBits(-4.0f))
+ O << "-4.0";
+ else {
+ O << formatHex(static_cast<uint64_t>(Imm));
}
-
- O << formatHex(static_cast<uint64_t>(Imm));
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -149,25 +255,30 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else if (Op.isImm()) {
printImmediate(Op.getImm(), O);
} else if (Op.isFPImm()) {
- O << Op.getFPImm();
+
+ // We special case 0.0 because otherwise it will be printed as an integer.
+ if (Op.getFPImm() == 0.0)
+ O << "0.0";
+ else
+ printImmediate(FloatToBits(Op.getFPImm()), O);
} else if (Op.isExpr()) {
const MCExpr *Exp = Op.getExpr();
Exp->print(O);
} else {
- assert(!"unknown operand type in printOperand");
+ llvm_unreachable("unknown operand type in printOperand");
}
}
void AMDGPUInstPrinter::printOperandAndMods(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
- if (InputModifiers & 0x1)
- O << "-";
- if (InputModifiers & 0x2)
- O << "|";
+ if (InputModifiers & SISrcMods::NEG)
+ O << '-';
+ if (InputModifiers & SISrcMods::ABS)
+ O << '|';
printOperand(MI, OpNo + 1, O);
- if (InputModifiers & 0x2)
- O << "|";
+ if (InputModifiers & SISrcMods::ABS)
+ O << '|';
}
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
@@ -181,7 +292,7 @@ void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
} else if (Imm == 0) {
O << "P10";
} else {
- assert(!"Invalid interpolation parameter slot");
+ llvm_unreachable("Invalid interpolation parameter slot");
}
}
@@ -214,6 +325,23 @@ void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
printIfSet(MI, OpNo, O, "_SAT");
}
+void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " clamp";
+}
+
+void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int Imm = MI->getOperand(OpNo).getImm();
+ if (Imm == SIOutMods::MUL2)
+ O << " mul:2";
+ else if (Imm == SIOutMods::MUL4)
+ O << " mul:4";
+ else if (Imm == SIOutMods::DIV2)
+ O << " div:2";
+}
+
void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int32_t Imm = MI->getOperand(OpNo).getImm();
@@ -281,7 +409,7 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
sel -= 512;
int cb = sel >> 12;
sel &= 4095;
- O << cb << "[" << sel << "]";
+ O << cb << '[' << sel << ']';
} else if (sel >= 448) {
sel -= 448;
O << sel;
@@ -290,7 +418,7 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
}
if (sel >= 0)
- O << "." << chans[chan];
+ O << '.' << chans[chan];
}
void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
@@ -323,25 +451,25 @@ void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
unsigned Sel = MI->getOperand(OpNo).getImm();
switch (Sel) {
case 0:
- O << "X";
+ O << 'X';
break;
case 1:
- O << "Y";
+ O << 'Y';
break;
case 2:
- O << "Z";
+ O << 'Z';
break;
case 3:
- O << "W";
+ O << 'W';
break;
case 4:
- O << "0";
+ O << '0';
break;
case 5:
- O << "1";
+ O << '1';
break;
case 7:
- O << "_";
+ O << '_';
break;
default:
break;
@@ -353,10 +481,10 @@ void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
unsigned CT = MI->getOperand(OpNo).getImm();
switch (CT) {
case 0:
- O << "U";
+ O << 'U';
break;
case 1:
- O << "N";
+ O << 'N';
break;
default:
break;
@@ -368,10 +496,10 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
int KCacheMode = MI->getOperand(OpNo).getImm();
if (KCacheMode > 0) {
int KCacheBank = MI->getOperand(OpNo - 2).getImm();
- O << "CB" << KCacheBank <<":";
+ O << "CB" << KCacheBank << ':';
int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
- int LineSize = (KCacheMode == 1)?16:32;
- O << KCacheAddr * 16 << "-" << KCacheAddr * 16 + LineSize;
+ int LineSize = (KCacheMode == 1) ? 16 : 32;
+ O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
}
}
@@ -415,12 +543,26 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
unsigned Vmcnt = SImm16 & 0xF;
unsigned Expcnt = (SImm16 >> 4) & 0xF;
unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
- if (Vmcnt != 0xF)
- O << "vmcnt(" << Vmcnt << ") ";
- if (Expcnt != 0x7)
- O << "expcnt(" << Expcnt << ") ";
- if (Lgkmcnt != 0x7)
- O << "lgkmcnt(" << Lgkmcnt << ")";
+
+ bool NeedSpace = false;
+
+ if (Vmcnt != 0xF) {
+ O << "vmcnt(" << Vmcnt << ')';
+ NeedSpace = true;
+ }
+
+ if (Expcnt != 0x7) {
+ if (NeedSpace)
+ O << ' ';
+ O << "expcnt(" << Expcnt << ')';
+ NeedSpace = true;
+ }
+
+ if (Lgkmcnt != 0x7) {
+ if (NeedSpace)
+ O << ' ';
+ O << "lgkmcnt(" << Lgkmcnt << ')';
+ }
}
#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 6ca7170..4c06ac0 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -10,8 +10,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUINSTPRINTER_H
-#define AMDGPUINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
+#define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
@@ -34,7 +34,19 @@ public:
private:
void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printImmediate(uint32_t Imm, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -45,6 +57,8 @@ private:
StringRef Asm, StringRef Default = "");
static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ static void printClampSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ static void printOModSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -65,4 +79,4 @@ private:
} // End namespace llvm
-#endif // AMDGPUINSTRPRINTER_H
+#endif
diff --git a/lib/Target/R600/LLVMBuild.txt b/lib/Target/R600/LLVMBuild.txt
index 408ed75..f3f254f 100644
--- a/lib/Target/R600/LLVMBuild.txt
+++ b/lib/Target/R600/LLVMBuild.txt
@@ -16,17 +16,18 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = R600
parent = Target
+has_asmparser = 1
has_asmprinter = 1
[component_1]
type = Library
name = R600CodeGen
parent = R600
-required_libraries = Analysis AsmPrinter CodeGen Core MC R600AsmPrinter R600Desc R600Info Scalar SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC R600AsmParser R600AsmPrinter R600Desc R600Info Scalar SelectionDAG Support Target TransformUtils
add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 489cec7..5fb311b 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -9,9 +9,11 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/TargetRegistry.h"
@@ -43,7 +45,7 @@ public:
AMDGPUAsmBackend(const Target &T)
: MCAsmBackend() {}
- unsigned getNumFixupKinds() const override { return 0; };
+ unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -55,9 +57,9 @@ public:
assert(!"Not implemented");
}
bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
- return true;
- }
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
};
} //End anonymous namespace
@@ -73,9 +75,50 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
- uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
- assert(Fixup.getKind() == FK_PCRel_4);
- *Dst = (Value - 4) / 4;
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("Unknown fixup kind");
+ case AMDGPU::fixup_si_sopp_br: {
+ uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
+ *Dst = (Value - 4) / 4;
+ break;
+ }
+
+ case AMDGPU::fixup_si_rodata: {
+ uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
+ *Dst = Value;
+ break;
+ }
+
+ case AMDGPU::fixup_si_end_of_text: {
+ uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
+ // The value points to the last instruction in the text section, so we
+ // need to add 4 bytes to get to the start of the constants.
+ *Dst = Value + 4;
+ break;
+ }
+ }
+}
+
+const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
+ MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_si_rodata", 0, 32, 0 },
+ { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
+bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ for (unsigned i = 0; i < Count; ++i)
+ OW->Write8(0);
+
+ return true;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 53b0e85..5fb94d5 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
using namespace llvm;
@@ -21,7 +22,7 @@ public:
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override {
- llvm_unreachable("Not implemented");
+ return Fixup.getKind();
}
};
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h
new file mode 100644
index 0000000..01021d6
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h
@@ -0,0 +1,34 @@
+//===-- AMDGPUFixupKinds.h - AMDGPU Specific Fixup Entries ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace AMDGPU {
+enum Fixups {
+ /// 16-bit PC relative fixup for SOPP branch instructions.
+ fixup_si_sopp_br = FirstTargetFixupKind,
+
+ /// fixup for global addresses with constant initializers
+ fixup_si_rodata,
+
+ /// fixup for offset from instruction to end of text section
+ fixup_si_end_of_text,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 78bbe0a..3c2b889 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -11,21 +11,14 @@
#include "AMDGPUMCAsmInfo.h"
using namespace llvm;
-AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfoELF() {
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
- HasSubsectionsViaSymbols = true;
- HasMachoZeroFillDirective = false;
- HasMachoTBSSDirective = false;
- HasStaticCtorDtorReferenceInStaticMode = false;
- LinkerRequiresNonEmptyDwarfLines = true;
MaxInstLength = 16;
SeparatorString = "\n";
CommentString = ";";
- LabelSuffix = ":";
InlineAsmStart = ";#ASMSTART";
InlineAsmEnd = ";#ASMEND";
- AssemblerDialect = 0;
//===--- Data Emission Directives -------------------------------------===//
ZeroDirective = ".zero";
@@ -35,28 +28,15 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
Data16bitsDirective = ".short\t";
Data32bitsDirective = ".long\t";
Data64bitsDirective = ".quad\t";
- GPRel32Directive = nullptr;
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
- //===--- Alignment Information ----------------------------------------===//
- AlignmentIsInBytes = true;
- TextAlignFillValue = 0;
-
//===--- Global Variable Emission Directives --------------------------===//
- GlobalDirective = ".global";
- HasSetDirective = false;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
WeakRefDirective = ".weakref\t";
//===--- Dwarf Emission Directives -----------------------------------===//
- HasLEB128 = true;
SupportsDebugInformation = true;
}
-
-const MCSection*
-AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
- return nullptr;
-}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
index 59aebec..8f75c76 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===//
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,18 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUMCASMINFO_H
-#define AMDGPUMCASMINFO_H
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
-class AMDGPUMCAsmInfo : public MCAsmInfo {
+// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
+// you will need to make sure your new class sets PrivateGlobalPrefix to
+// a prefix that won't appeary in a fuction name. The default value
+// for PrivateGlobalPrefix is 'L', so it will consider any function starting
+// with 'L' as a local symbol.
+class AMDGPUMCAsmInfo : public MCAsmInfoELF {
public:
explicit AMDGPUMCAsmInfo(StringRef &TT);
- const MCSection* getNonexecutableStackSection(MCContext &CTX) const override;
};
} // namespace llvm
-#endif // AMDGPUMCASMINFO_H
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 6a5cd67..c957427 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AMDGPUCODEEMITTER_H
-#define AMDGPUCODEEMITTER_H
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,8 +37,14 @@ public:
const MCSubtargetInfo &STI) const {
return 0;
}
+
+ virtual unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 0;
+ }
};
} // End namespace llvm
-#endif // AMDGPUCODEEMITTER_H
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 38a2956..8731055 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -84,12 +84,9 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
+ raw_ostream &_OS, MCCodeEmitter *_Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, false);
}
extern "C" void LLVMInitializeR600TargetMC() {
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index f6b3376..c019766 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
//
-#ifndef AMDGPUMCTARGETDESC_H
-#define AMDGPUMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
#include "llvm/ADT/StringRef.h"
@@ -55,4 +55,4 @@ MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
-#endif // AMDGPUMCTARGETDESC_H
+#endif
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index ee02111..999fd0d 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -13,8 +13,11 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
+#include "SIDefines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
@@ -39,6 +42,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
+ MCContext &Ctx;
/// \brief Can this operand also contain immediate values?
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -49,7 +53,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
MCContext &ctx)
- : MCII(mcii), MRI(mri) { }
+ : MCII(mcii), MRI(mri), Ctx(ctx) { }
~SIMCCodeEmitter() { }
@@ -62,6 +66,12 @@ public:
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+
+ /// \brief Use a fixup to encode the simm16 field for SOPP branch
+ /// instructions.
+ unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
};
} // End anonymous namespace
@@ -75,12 +85,13 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
unsigned OpNo) const {
-
unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
return (AMDGPU::SSrc_32RegClassID == RegClass) ||
(AMDGPU::SSrc_64RegClassID == RegClass) ||
(AMDGPU::VSrc_32RegClassID == RegClass) ||
- (AMDGPU::VSrc_64RegClassID == RegClass);
+ (AMDGPU::VSrc_64RegClassID == RegClass) ||
+ (AMDGPU::VCSrc_32RegClassID == RegClass) ||
+ (AMDGPU::VCSrc_64RegClassID == RegClass);
}
uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
@@ -90,6 +101,8 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
Imm.I = MO.getImm();
else if (MO.isFPImm())
Imm.F = MO.getFPImm();
+ else if (MO.isExpr())
+ return 255;
else
return ~0;
@@ -157,8 +170,13 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
IntFloatUnion Imm;
if (Op.isImm())
Imm.I = Op.getImm();
- else
+ else if (Op.isFPImm())
Imm.F = Op.getFPImm();
+ else {
+ assert(Op.isExpr());
+ // This will be replaced with a fixup value.
+ Imm.I = 0;
+ }
for (unsigned j = 0; j < 4; j++) {
OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
@@ -169,6 +187,21 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
}
+unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ return getMachineOpValue(MI, MO, Fixups, STI);
+}
+
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
@@ -177,10 +210,19 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
return MRI.getEncodingValue(MO.getReg());
if (MO.isExpr()) {
- const MCExpr *Expr = MO.getExpr();
- MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
- return 0;
+ const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
+ MCFixupKind Kind;
+ const MCSymbol *Sym =
+ Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
+
+ if (&Expr->getSymbol() == Sym) {
+ // Add the offset to the beginning of the constant values.
+ Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
+ } else {
+ // This is used for constant data stored in .rodata.
+ Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
+ }
+ Fixups.push_back(MCFixup::Create(4, Expr, Kind, MI.getLoc()));
}
// Figure out the operand number, needed for isSrcOperand check
diff --git a/lib/Target/R600/Makefile b/lib/Target/R600/Makefile
index 1b3ebbe..64a7c8c 100644
--- a/lib/Target/R600/Makefile
+++ b/lib/Target/R600/Makefile
@@ -16,8 +16,8 @@ BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
- AMDGPUGenAsmWriter.inc
+ AMDGPUGenAsmWriter.inc AMDGPUGenAsmMatcher.inc
-DIRS = InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
index 92bf0df..f07be00 100644
--- a/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -18,6 +18,7 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -167,7 +168,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
}
bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index d98a6db..edaf278 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -336,7 +336,7 @@ private:
getHWInstrDesc(IsTex?CF_TC:CF_VC))
.addImm(0) // ADDR
.addImm(AluInstCount - 1); // COUNT
- return ClauseFile(MIb, ClauseContent);
+ return ClauseFile(MIb, std::move(ClauseContent));
}
void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
@@ -426,7 +426,7 @@ private:
}
assert(ClauseContent.size() < 128 && "ALU clause is too big");
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
- return ClauseFile(ClauseHead, ClauseContent);
+ return ClauseFile(ClauseHead, std::move(ClauseContent));
}
void
@@ -459,11 +459,9 @@ private:
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
}
- void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
- const {
- for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
- It != E; ++It) {
- MachineInstr *MI = *It;
+ void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
+ unsigned Addr) const {
+ for (MachineInstr *MI : MIs) {
CounterPropagateAddr(MI, Addr);
}
}
@@ -477,18 +475,19 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
- TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI = static_cast<const R600RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- CFStack CFStack(ST, MFI->ShaderType);
+ CFStack CFStack(ST, MFI->getShaderType());
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
- if (MFI->ShaderType == 1) {
+ if (MFI->getShaderType() == ShaderType::VERTEX) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
getHWInstrDesc(CF_CALL_FS));
CfCount++;
@@ -542,7 +541,7 @@ public:
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
- LoopStack.push_back(Pair);
+ LoopStack.push_back(std::move(Pair));
MI->eraseFromParent();
CfCount++;
break;
@@ -550,7 +549,7 @@ public:
case AMDGPU::ENDLOOP: {
CFStack.popLoop();
std::pair<unsigned, std::set<MachineInstr *> > Pair =
- LoopStack.back();
+ std::move(LoopStack.back());
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index f2f28fe..51d87ed 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -8,8 +8,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef R600DEFINES_H_
-#define R600DEFINES_H_
+#ifndef LLVM_LIB_TARGET_R600_R600DEFINES_H
+#define LLVM_LIB_TARGET_R600_R600DEFINES_H
#include "llvm/MC/MCRegisterInfo.h"
@@ -168,4 +168,4 @@ namespace OpName {
#define R_0288E8_SQ_LDS_ALLOC 0x0288E8
-#endif // R600DEFINES_H_
+#endif
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
index 38afebe..fdc2030 100644
--- a/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -19,6 +19,7 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -297,7 +298,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index 732b06d..211d392 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -19,6 +19,7 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -65,7 +66,7 @@ void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
}
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
const R600RegisterInfo &TRI = TII->getRegisterInfo();
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 7f3560a..a214e53 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -19,6 +19,7 @@
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -82,6 +83,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
@@ -189,7 +192,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
+ static_cast<const R600InstrInfo *>(MF->getSubtarget().getInstrInfo());
switch (MI->getOpcode()) {
default:
@@ -199,7 +202,10 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
assert(DstIdx != -1);
MachineInstrBuilder NewMI;
- if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
+ // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
+ // LDS_1A2D support and remove this special case.
+ if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
+ MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
return BB;
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
@@ -642,8 +648,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
MachineSDNode *interp;
if (ijb < 0) {
const MachineFunction &MF = DAG.getMachineFunction();
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo *>(
+ MF.getSubtarget().getInstrInfo());
interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
return DAG.getTargetExtractSubreg(
@@ -803,6 +809,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
+ case Intrinsic::AMDGPU_read_workdim:
+ return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
@@ -839,8 +848,20 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
default:
AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
return;
- case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ case ISD::FP_TO_UINT:
+ if (N->getValueType(0) == MVT::i1) {
+ Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ return;
+ }
+ // Fall-through. Since we don't care about out of bounds values
+ // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
+ // considers some extra cases which are not necessary here.
+ case ISD::FP_TO_SINT: {
+ SDValue Result;
+ if (expandFP_TO_SINT(N, Result, DAG))
+ Results.push_back(Result);
return;
+ }
case ISD::UDIV: {
SDValue Op = SDValue(N, 0);
SDLoc DL(Op);
@@ -886,74 +907,7 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::UDIVREM: {
SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
-
- SDValue one = DAG.getConstant(1, HalfVT);
- SDValue zero = DAG.getConstant(0, HalfVT);
-
- //HiLo split
- SDValue LHS = N->getOperand(0);
- SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
- SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
-
- SDValue RHS = N->getOperand(1);
- SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
- SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
-
- // Get Speculative values
- SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
- SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
-
- SDValue REM_Hi = zero;
- SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
-
- SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
- SDValue DIV_Lo = zero;
-
- const unsigned halfBitWidth = HalfVT.getSizeInBits();
-
- for (unsigned i = 0; i < halfBitWidth; ++i) {
- SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
- // Get Value of high bit
- SDValue HBit;
- if (halfBitWidth == 32 && Subtarget->hasBFE()) {
- HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
- } else {
- HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
- HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
- }
-
- SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
- DAG.getConstant(halfBitWidth - 1, HalfVT));
- REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
- REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
-
- REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
- REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
-
-
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
-
- SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
- SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
-
- DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
-
- // Update REM
-
- SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
-
- REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
- REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
- REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
- }
-
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
- SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
- Results.push_back(DIV);
- Results.push_back(REM);
+ LowerUDIVREM64(Op, DAG, Results);
break;
}
}
@@ -1415,8 +1369,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
- getTargetMachine().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ getTargetMachine().getSubtargetImpl()->getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1512,10 +1466,23 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, DL);
}
+ // Lower loads constant address space global variable loads
+ if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ isa<GlobalVariable>(
+ GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
+
+ SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
+ getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
+ Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
+ DAG.getConstant(2, MVT::i32));
+ return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
+ LoadNode->getChain(), Ptr,
+ DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
+ }
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
SDValue MergedValues[2] = {
- SplitVectorLoad(Op, DAG),
+ ScalarizeVectorLoad(Op, DAG),
Chain
};
return DAG.getMergeValues(MergedValues, DL);
@@ -1585,6 +1552,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
LoadNode->getPointerInfo(), MemVT,
LoadNode->isVolatile(),
LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
LoadNode->getAlignment());
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
@@ -1599,8 +1567,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
- getTargetMachine().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ getTargetMachine().getSubtargetImpl()->getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1663,10 +1631,10 @@ SDValue R600TargetLowering::LowerFormalArguments(
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
MachineFunction &MF = DAG.getMachineFunction();
- unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
SmallVector<ISD::InputArg, 8> LocalIns;
@@ -1676,10 +1644,15 @@ SDValue R600TargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT VT = Ins[i].VT;
- EVT MemVT = LocalIns[i].VT;
+ const ISD::InputArg &In = Ins[i];
+ EVT VT = In.VT;
+ EVT MemVT = VA.getLocVT();
+ if (!VT.isVector() && MemVT.isVector()) {
+ // Get load source type if scalarized.
+ MemVT = MemVT.getVectorElementType();
+ }
- if (ShaderType != ShaderType::COMPUTE) {
+ if (MFI->getShaderType() != ShaderType::COMPUTE) {
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
InVals.push_back(Register);
@@ -1687,7 +1660,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
+ AMDGPUAS::CONSTANT_BUFFER_0);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
@@ -1696,18 +1669,33 @@ SDValue R600TargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
+ ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
+ if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
+ // FIXME: This should really check the extload type, but the handling of
+ // extload vector parameters seems to be broken.
+
+ // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ Ext = ISD::SEXTLOAD;
+ }
+
+ // Compute the offset from the value.
+ // XXX - I think PartOffset should give you this, but it seems to give the
+ // size of the register which isn't useful.
+
+ unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
+ unsigned PartOffset = VA.getLocMemOffset();
+ unsigned Offset = 36 + VA.getLocMemOffset();
- // FIXME: This should really check the extload type, but the handling of
- // extload vecto parameters seems to be broken.
- //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
- ISD::LoadExtType Ext = ISD::SEXTLOAD;
- SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
- DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
- MachinePointerInfo(UndefValue::get(PtrTy)),
- MemVT, false, false, 4);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
+ SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
+ DAG.getConstant(Offset, MVT::i32),
+ DAG.getUNDEF(MVT::i32),
+ PtrInfo,
+ MemVT, false, true, true, 4);
// 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
+ MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
}
return Chain;
}
@@ -2053,7 +2041,7 @@ static bool
FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
if (!Src.isMachineOpcode())
return false;
switch (Src.getMachineOpcode()) {
@@ -2178,7 +2166,7 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
if (!Node->isMachineOpcode())
return Node;
unsigned Opcode = Node->getMachineOpcode();
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index d22c8c9..10ebc10 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600ISELLOWERING_H
-#define R600ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_R600ISELLOWERING_H
#include "AMDGPUISelLowering.h"
@@ -74,4 +74,4 @@ private:
} // End namespace llvm;
-#endif // R600ISELLOWERING_H
+#endif
diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td
index 9428bab..0ffd485 100644
--- a/lib/Target/R600/R600InstrFormats.td
+++ b/lib/Target/R600/R600InstrFormats.td
@@ -38,6 +38,9 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
let Pattern = pattern;
let Itinerary = itin;
+ // No AsmMatcher support.
+ let isCodeGenOnly = 1;
+
let TSFlags{4} = Trig;
let TSFlags{5} = Op3;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 3972e2f..653fd0d 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -92,10 +92,6 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
return true;
}
-unsigned R600InstrInfo::getIEQOpcode() const {
- return AMDGPU::SETE_INT;
-}
-
bool R600InstrInfo::isMov(unsigned Opcode) const {
@@ -209,8 +205,10 @@ bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
}
bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
- const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
- return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+ return MFI->getShaderType() != ShaderType::COMPUTE &&
+ usesVertexCache(MI->getOpcode());
}
bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
@@ -218,9 +216,11 @@ bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
}
bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
- const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
- return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
- usesTextureCache(MI->getOpcode());
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+ return (MFI->getShaderType() == ShaderType::COMPUTE &&
+ usesVertexCache(MI->getOpcode())) ||
+ usesTextureCache(MI->getOpcode());
}
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
@@ -319,7 +319,7 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
continue;
}
-
+
}
return Result;
}
@@ -571,7 +571,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
if (!isLastAluTrans)
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
- TransOps = IGSrcs.back();
+ TransOps = std::move(IGSrcs.back());
IGSrcs.pop_back();
ValidSwizzle.pop_back();
@@ -654,10 +654,10 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
return fitsConstReadLimitations(Consts);
}
-DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- const InstrItineraryData *II = TM->getInstrItineraryData();
- return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
+DFAPacketizer *
+R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);
}
static bool
@@ -1082,9 +1082,8 @@ bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(
- MF.getTarget().getFrameLowering());
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index 45a57d3..d3dc0e5 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600INSTRUCTIONINFO_H_
-#define R600INSTRUCTIONINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_R600INSTRINFO_H
+#define LLVM_LIB_TARGET_R600_R600INSTRINFO_H
#include "AMDGPUInstrInfo.h"
#include "R600Defines.h"
@@ -152,11 +152,10 @@ namespace llvm {
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
- unsigned getIEQOpcode() const override;
bool isMov(unsigned Opcode) const override;
- DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const override;
+ DFAPacketizer *
+ CreateTargetScheduleState(const TargetSubtargetInfo &) const override;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
@@ -207,7 +206,7 @@ namespace llvm {
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override { return 1;}
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
/// \brief Reserve the registers that may be accesed using indirect addressing.
void reserveIndirectRegisters(BitVector &Reserved,
@@ -299,4 +298,4 @@ int getLDSNoRetOp(uint16_t Opcode);
} // End llvm namespace
-#endif // R600INSTRINFO_H_
+#endif
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 73fa345..b6c00f8 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -216,7 +216,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
def TEX_SHADOW : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
- return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
+ return (TType >= 6 && TType <= 8) || TType == 13;
}]
>;
@@ -475,13 +475,13 @@ class ExportBufWord1 {
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
@@ -513,17 +513,17 @@ multiclass SteamOutputExportPattern<Instruction ExportInst,
// Stream1
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ (ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf1inst, 0)>;
// Stream2
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ (ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf2inst, 0)>;
// Stream3
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+ (ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf3inst, 0)>;
}
@@ -674,8 +674,9 @@ def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
// Non-IEEE MUL: 0 * anything = 0
def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
-def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
-def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+// TODO: Do these actually match the regular fmin/fmax behavior?
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
// so some of the instruction names don't match the asm string.
@@ -915,6 +916,11 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
[(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
>;
+class FMA_Common <bits<5> inst> : R600_3OP <
+ inst, "FMA",
+ [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
+>;
+
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",
[(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
@@ -1068,7 +1074,7 @@ class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
}
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+ inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))]
> {
let Itinerary = TransALU;
}
@@ -1114,6 +1120,7 @@ def FNEG_R600 : FNEG<R600_Reg32>;
// Helper patterns for complex intrinsics
//===----------------------------------------------------------------------===//
+// FIXME: Should be predicated on unsafe fp math.
multiclass DIV_Common <InstR600 recip_ieee> {
def : Pat<
(int_AMDGPU_div f32:$src0, f32:$src1),
@@ -1124,6 +1131,8 @@ def : Pat<
(fdiv f32:$src0, f32:$src1),
(MUL_IEEE $src0, (recip_ieee $src1))
>;
+
+def : RcpPat<recip_ieee, f32>;
}
class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
@@ -1133,9 +1142,12 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
>;
// FROUND pattern
-class FROUNDPat<Instruction CNDGE> : Pat <
+class FROUNDPat<Instruction CNDGE, Instruction CNDGT> : Pat <
(AMDGPUround f32:$x),
- (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
+ (CNDGE $x,
+ (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)),
+ (CNDGT (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
+ )
>;
@@ -1180,7 +1192,9 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
- def : FROUNDPat <CNDGE_r600>;
+ defm : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+
+ def : FROUNDPat <CNDGE_r600, CNDGT_r600>;
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
@@ -1482,6 +1496,7 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let isCodeGenOnly = 1;
}
multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index b0ae22e..263561e 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -10,8 +10,8 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef R600MACHINEFUNCTIONINFO_H
-#define R600MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_R600_R600MACHINEFUNCTIONINFO_H
#include "AMDGPUMachineFunction.h"
#include "llvm/ADT/BitVector.h"
@@ -31,4 +31,4 @@ public:
} // End llvm namespace
-#endif //R600MACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 7ea654c..d782713 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -14,7 +14,6 @@
#include "R600MachineScheduler.h"
#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/PassManager.h"
@@ -76,21 +75,25 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
float ALUFetchRationEstimate =
(AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
(FetchInstCount + Available[IDFetch].size());
- unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
- DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
- // We assume the local GPR requirements to be "dominated" by the requirement
- // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
- // after TEX are indeed likely to consume or generate values from/for the
- // TEX clause.
- // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
- // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
- // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
- // (TODO : use RegisterPressure)
- // If we are going too use too many GPR, we flush Fetch instruction to lower
- // register pressure on 128 bits regs.
- unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
- if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
+ if (ALUFetchRationEstimate == 0) {
AllowSwitchFromAlu = true;
+ } else {
+ unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
+ DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
+ // We assume the local GPR requirements to be "dominated" by the requirement
+ // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
+ // after TEX are indeed likely to consume or generate values from/for the
+ // TEX clause.
+ // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
+ // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
+ // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
+ // (TODO : use RegisterPressure)
+ // If we are going too use too many GPR, we flush Fetch instruction to lower
+ // register pressure on 128 bits regs.
+ unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
+ if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
+ AllowSwitchFromAlu = true;
+ }
}
if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index fd475af..fc5b95c 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600MACHINESCHEDULER_H_
-#define R600MACHINESCHEDULER_H_
+#ifndef LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_R600_R600MACHINESCHEDULER_H
#include "R600InstrInfo.h"
#include "llvm/ADT/PriorityQueue.h"
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 2314136..742c0e0 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -279,9 +280,8 @@ bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
continue;
if (PreviousRegSeqByReg[MOp->getReg()].empty())
continue;
- std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()];
- for (unsigned i = 0, e = MIs.size(); i < e; i++) {
- CompatibleRSI = PreviousRegSeq[MIs[i]];
+ for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
+ CompatibleRSI = PreviousRegSeq[MI];
if (RSI == CompatibleRSI)
continue;
if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
@@ -314,7 +314,7 @@ void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
}
bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const R600InstrInfo *>(Fn.getTarget().getInstrInfo());
+ TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo());
MRI = &(Fn.getRegInfo());
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 74cf309..ddf68c9 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -148,11 +148,11 @@ private:
}
public:
// Ctor.
- R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT)
- : VLIWPacketizerList(MF, MLI, MDT, true),
- TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
- TRI(TII->getRegisterInfo()) {
+ R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
+ : VLIWPacketizerList(MF, MLI, true),
+ TII(static_cast<const R600InstrInfo *>(
+ MF.getSubtarget().getInstrInfo())),
+ TRI(TII->getRegisterInfo()) {
VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
}
@@ -328,12 +328,11 @@ public:
};
bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
// Instantiate the packetizer.
- R600PacketizerList Packetizer(Fn, MLI, MDT);
+ R600PacketizerList Packetizer(Fn, MLI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index 247808b..f1a8a41 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef R600REGISTERINFO_H_
-#define R600REGISTERINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_R600REGISTERINFO_H
#include "AMDGPURegisterInfo.h"
@@ -46,4 +46,4 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
} // End namespace llvm
-#endif // AMDIDSAREGISTERINFO_H_
+#endif
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 4d31a11..2e7dab6 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -8,10 +8,11 @@
/// \file
//===----------------------------------------------------------------------===//
-#ifndef SIDEFINES_H_
-#define SIDEFINES_H_
+#ifndef LLVM_LIB_TARGET_R600_SIDEFINES_H
+#define LLVM_LIB_TARGET_R600_SIDEFINES_H
namespace SIInstrFlags {
+// This needs to be kept in sync with the field bits in InstSI.
enum {
MIMG = 1 << 3,
SMRD = 1 << 4,
@@ -19,10 +20,38 @@ enum {
VOP2 = 1 << 6,
VOP3 = 1 << 7,
VOPC = 1 << 8,
- SALU = 1 << 9
+ SALU = 1 << 9,
+ MUBUF = 1 << 10,
+ MTBUF = 1 << 11,
+ FLAT = 1 << 12
};
}
+namespace SIInstrFlags {
+ enum Flags {
+ // First 4 bits are the instruction encoding
+ VM_CNT = 1 << 0,
+ EXP_CNT = 1 << 1,
+ LGKM_CNT = 1 << 2
+ };
+}
+
+namespace SISrcMods {
+ enum {
+ NEG = 1 << 0,
+ ABS = 1 << 1
+ };
+}
+
+namespace SIOutMods {
+ enum {
+ NONE = 0,
+ MUL2 = 1,
+ MUL4 = 2,
+ DIV2 = 3
+ };
+}
+
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8)
@@ -32,6 +61,7 @@ enum {
#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
+#define S_00B02C_SCRATCH_EN(x) (((x) & 0x1) << 0)
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
@@ -85,4 +115,7 @@ enum {
#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
-#endif // SIDEFINES_H_
+#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
+#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+
+#endif
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 5f71453..d6f4b4c 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -66,6 +66,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -195,10 +196,10 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF.getTarget().getRegisterInfo());
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- MF.getTarget().getInstrInfo());
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -237,14 +238,66 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// If a PHI node defines an SGPR and any of its operands are VGPRs,
// then we need to move it to the VALU.
+ //
+ // Also, if a PHI node defines an SGPR and has all SGPR operands
+ // we must move it to the VALU, because the SGPR operands will
+ // all end up being assigned the same register, which means
+ // there is a potential for a conflict if different threads take
+ // different control flow paths.
+ //
+ // For Example:
+ //
+ // sgpr0 = def;
+ // ...
+ // sgpr1 = def;
+ // ...
+ // sgpr2 = PHI sgpr0, sgpr1
+ // use sgpr2;
+ //
+ // Will Become:
+ //
+ // sgpr2 = def;
+ // ...
+ // sgpr2 = def;
+ // ...
+ // use sgpr2
+ //
+ // FIXME: This is OK if the branching decision is made based on an
+ // SGPR value.
+ bool SGPRBranch = false;
+
+ // The one exception to this rule is when one of the operands
+ // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+ // instruction. In this case, there we know the program will
+ // never enter the second block (the loop) without entering
+ // the first block (where the condition is computed), so there
+ // is no chance for values to be over-written.
+
+ bool HasBreakDef = false;
for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
unsigned Reg = MI.getOperand(i).getReg();
if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
TII->moveToVALU(MI);
break;
}
+ MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+ assert(DefInstr);
+ switch(DefInstr->getOpcode()) {
+
+ case AMDGPU::SI_BREAK:
+ case AMDGPU::SI_IF_BREAK:
+ case AMDGPU::SI_ELSE_BREAK:
+ // If we see a PHI instruction that defines an SGPR, then that PHI
+ // instruction has already been considered and should have
+ // a *_BREAK as an operand.
+ case AMDGPU::PHI:
+ HasBreakDef = true;
+ break;
+ }
}
+ if (!SGPRBranch && !HasBreakDef)
+ TII->moveToVALU(MI);
break;
}
case AMDGPU::REG_SEQUENCE: {
@@ -252,8 +305,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
!hasVGPROperands(MI, TRI))
continue;
- DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n");
- DEBUG(MI.print(dbgs()));
+ DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
TII->moveToVALU(MI);
break;
@@ -265,8 +317,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
if (TRI->isSGPRClass(DstRC) &&
(TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
- DEBUG(dbgs() << " Fixing INSERT_SUBREG:\n");
- DEBUG(MI.print(dbgs()));
+ DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
TII->moveToVALU(MI);
}
break;
@@ -274,5 +325,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
}
}
- return false;
+
+ return true;
}
diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
index 7d116ee..f34c375 100644
--- a/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -9,18 +9,49 @@
//
/// \file
/// SALU instructions ignore control flow, so we need to modify the live ranges
-/// of the registers they define.
+/// of the registers they define in some cases.
///
-/// The strategy is to view the entire program as if it were a single basic
-/// block and calculate the intervals accordingly. We implement this
-/// by walking this list of segments for each LiveRange and setting the
-/// end of each segment equal to the start of the segment that immediately
-/// follows it.
+/// The main case we need to handle is when a def is used in one side of a
+/// branch and not another. For example:
+///
+/// %def
+/// IF
+/// ...
+/// ...
+/// ELSE
+/// %use
+/// ...
+/// ENDIF
+///
+/// Here we need the register allocator to avoid assigning any of the defs
+/// inside of the IF to the same register as %def. In traditional live
+/// interval analysis %def is not live inside the IF branch, however, since
+/// SALU instructions inside of IF will be executed even if the branch is not
+/// taken, there is the chance that one of the instructions will overwrite the
+/// value of %def, so the use in ELSE will see the wrong value.
+///
+/// The strategy we use for solving this is to add an extra use after the ENDIF:
+///
+/// %def
+/// IF
+/// ...
+/// ...
+/// ELSE
+/// %use
+/// ...
+/// ENDIF
+/// %use
+///
+/// Adding this use will make the def live thoughout the IF branch, which is
+/// what we want.
#include "AMDGPU.h"
+#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
@@ -40,16 +71,15 @@ public:
initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const override {
+ const char *getPassName() const override {
return "SI Fix SGPR live ranges";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachinePostDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -60,6 +90,7 @@ public:
INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
@@ -73,36 +104,86 @@ FunctionPass *llvm::createSIFixSGPRLiveRangesPass() {
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF.getTarget().getRegisterInfo());
+ MF.getSubtarget().getRegisterInfo());
LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+ MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
+ std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;
+
+ // First pass, collect all live intervals for SGPRs
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.defs()) {
+ if (MO.isImplicit())
+ continue;
+ unsigned Def = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Def)) {
+ if (TRI->isSGPRClass(MRI.getRegClass(Def)))
+ SGPRLiveRanges.push_back(
+ std::make_pair(Def, &LIS->getInterval(Def)));
+ } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
+ SGPRLiveRanges.push_back(
+ std::make_pair(Def, &LIS->getRegUnit(Def)));
+ }
+ }
+ }
+ }
+ // Second pass fix the intervals
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
-
MachineBasicBlock &MBB = *BI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- MachineOperand *ExecUse = MI.findRegisterUseOperand(AMDGPU::EXEC);
- if (ExecUse)
+ if (MBB.succ_size() < 2)
+ continue;
+
+ // We have structured control flow, so number of succesors should be two.
+ assert(MBB.succ_size() == 2);
+ MachineBasicBlock *SuccA = *MBB.succ_begin();
+ MachineBasicBlock *SuccB = *(++MBB.succ_begin());
+ MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
+
+ if (!NCD)
+ continue;
+
+ MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();
+
+ if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
+ assert(NCD->succ_size() == 2);
+ // We want to make sure we insert the Use after the ENDIF, not after
+ // the ELSE.
+ NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
+ *(++NCD->succ_begin()));
+ }
+ assert(SuccA && SuccB);
+ for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
+ unsigned Reg = RegLR.first;
+ LiveRange *LR = RegLR.second;
+
+ // FIXME: We could be smarter here. If the register is Live-In to
+ // one block, but the other doesn't have any SGPR defs, then there
+ // won't be a conflict. Also, if the branch decision is based on
+ // a value in an SGPR, then there will be no conflict.
+ bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
+ bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);
+
+ if ((!LiveInToA && !LiveInToB) ||
+ (LiveInToA && LiveInToB))
continue;
- for (const MachineOperand &Def : MI.operands()) {
- if (!Def.isReg() || !Def.isDef() ||!TargetRegisterInfo::isVirtualRegister(Def.getReg()))
- continue;
-
- const TargetRegisterClass *RC = MRI.getRegClass(Def.getReg());
-
- if (!TRI->isSGPRClass(RC))
- continue;
- LiveInterval &LI = LIS->getInterval(Def.getReg());
- for (unsigned i = 0, e = LI.size() - 1; i != e; ++i) {
- LiveRange::Segment &Seg = LI.segments[i];
- LiveRange::Segment &Next = LI.segments[i + 1];
- Seg.end = Next.start;
- }
- }
+ // This interval is live in to one successor, but not the other, so
+ // we need to update its range so it is live in to both.
+ DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR <<
+ " BB#" << SuccA->getNumber() << ", BB#" <<
+ SuccB->getNumber() <<
+ " with NCD = " << NCD->getNumber() << '\n');
+
+ // FIXME: Need to figure out how to update LiveRange here so this pass
+ // will be able to preserve LiveInterval analysis.
+ BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
+ TII->get(AMDGPU::SGPR_USE))
+ .addReg(Reg, RegState::Implicit);
+ DEBUG(NCD->getFirstNonPHI()->dump());
}
}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index b13c3b8..8d4164a 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -12,6 +12,12 @@
//
//===----------------------------------------------------------------------===//
+#ifdef _MSC_VER
+// Provide M_PI.
+#define _USE_MATH_DEFINES
+#include <cmath>
+#endif
+
#include "SIISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
@@ -19,6 +25,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -46,10 +53,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+ addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
- addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
+ addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
computeRegisterProperties();
@@ -80,8 +87,15 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SUBC, MVT::i32, Legal);
setOperationAction(ISD::SUBE, MVT::i32, Legal);
+ setOperationAction(ISD::FSIN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOS, MVT::f32, Custom);
+
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+
// We need to custom lower vector stores from local memory
- setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
@@ -89,12 +103,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
- // We need to custom lower loads/stores from private memory
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
- setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
-
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
@@ -114,6 +122,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
@@ -126,8 +136,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom);
-
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -179,6 +188,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
};
+ setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT, MVT::i1, Promote);
+
for (MVT VT : VecTypes) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
@@ -188,10 +200,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
case ISD::BITCAST:
case ISD::EXTRACT_VECTOR_ELT:
case ISD::INSERT_VECTOR_ELT:
- case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
case ISD::EXTRACT_SUBVECTOR:
break;
+ case ISD::CONCAT_VECTORS:
+ setOperationAction(Op, VT, Custom);
+ break;
default:
setOperationAction(Op, VT, Expand);
break;
@@ -213,16 +227,37 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
- // FIXME: These should be removed and handled the same was as f32 fneg. Source
- // modifiers also work for the double instructions.
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::f32, Custom);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FMINNUM);
+ setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::UINT_TO_FP);
+ // All memory operations. Some folding on the pointer operand is done to help
+ // matching the constant offsets in the addressing modes.
+ setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD);
+ setTargetDAGCombine(ISD::ATOMIC_STORE);
+ setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP);
+ setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ setTargetDAGCombine(ISD::ATOMIC_SWAP);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_AND);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_OR);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
+
setSchedulingPreference(Sched::RegPressure);
}
@@ -230,15 +265,63 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
// TargetLowering queries
//===----------------------------------------------------------------------===//
-bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- bool *IsFast) const {
+bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
+ EVT) const {
+ // SI has some legal vector types, but no legal vector operations. Say no
+ // shuffles are legal in order to prefer scalarizing some vector operations.
+ return false;
+}
+
+// FIXME: This really needs an address space argument. The immediate offset
+// size is different for different sets of memory instruction sets.
+
+// The single offset DS instructions have a 16-bit unsigned byte offset.
+//
+// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r +
+// r + i with addr64. 32-bit has more addressing mode options. Depending on the
+// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
+//
+// SMRD instructions have an 8-bit, dword offset.
+//
+bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Allow a 16-bit unsigned immediate field, since this is what DS instructions
+ // use.
+ if (!isUInt<16>(AM.BaseOffs))
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default: // Don't allow n * r
+ return false;
+ }
+
+ return true;
+}
+
+bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align,
+ bool *IsFast) const {
if (IsFast)
*IsFast = false;
- // XXX: This depends on the address space and also we may want to revist
- // the alignment values we specify in the DataLayout.
-
// TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
// which isn't a simple VT.
if (!VT.isSimple() || VT == MVT::Other)
@@ -248,28 +331,44 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
// see what for specifically. The wording everywhere else seems to be the
// same.
- // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have
- // no alignment restrictions.
- if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
- // Using any pair of GPRs should be the same as any other pair.
- if (IsFast)
- *IsFast = true;
- return VT.bitsGE(MVT::i64);
- }
-
// XXX - The only mention I see of this in the ISA manual is for LDS direct
// reads the "byte address and must be dword aligned". Is it also true for the
// normal loads and stores?
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
- return false;
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
+ // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
+ // aligned, 8 byte access in a single operation using ds_read2/write2_b32
+ // with adjacent offsets.
+ return Align % 4 == 0;
+ }
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
// byte-address are ignored, thus forcing Dword alignment.
+ // This applies to private, global, and constant memory.
if (IsFast)
*IsFast = true;
return VT.bitsGT(MVT::i32);
}
+EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ // FIXME: Should account for address space here.
+
+ // The default fallback uses the private pointer size as a guess for a type to
+ // use. Make sure we switch these to 64-bit accesses.
+
+ if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global
+ return MVT::v4i32;
+
+ if (Size >= 8 && DstAlign >= 4)
+ return MVT::v2i32;
+
+ // Use the default.
+ return MVT::Other;
+}
+
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -280,25 +379,37 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const {
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
return TII->isInlineConstant(Imm);
}
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- SDLoc DL, SDValue Chain,
+ SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
+ const DataLayout *DL = getDataLayout();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
+ unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_ADDRESS);
- SDValue BasePtr = DAG.getCopyFromReg(Chain, DL,
- MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
- SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+ SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
+ MRI.getLiveInVirtReg(InputPtrReg), MVT::i64);
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, BasePtr,
DAG.getConstant(Offset, MVT::i64));
- return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr,
- MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
- false, false, MemVT.getSizeInBits() >> 3);
-
+ SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+
+ return DAG.getLoad(ISD::UNINDEXED, Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD,
+ VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
+ false, // isVolatile
+ true, // isNonTemporal
+ true, // isInvariant
+ DL->getABITypeAlignment(Ty)); // Alignment
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -309,7 +420,9 @@ SDValue SITargetLowering::LowerFormalArguments(
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetMachine &TM = getTargetMachine();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
@@ -318,20 +431,20 @@ SDValue SITargetLowering::LowerFormalArguments(
assert(CallConv == CallingConv::C);
SmallVector<ISD::InputArg, 16> Splits;
- uint32_t Skipped = 0;
+ BitVector Skipped(Ins.size());
for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
// First check if it's a PS input addr
- if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
+ if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
!Arg.Flags.isByVal()) {
assert((PSInputNum <= 15) && "Too many PS inputs!");
if (!Arg.Used) {
// We can savely skip PS inputs
- Skipped |= 1 << i;
+ Skipped.set(i);
++PSInputNum;
continue;
}
@@ -340,7 +453,7 @@ SDValue SITargetLowering::LowerFormalArguments(
}
// Second split vertices into their elements
- if (Info->ShaderType != ShaderType::COMPUTE && Arg.VT.isVector()) {
+ if (Info->getShaderType() != ShaderType::COMPUTE && Arg.VT.isVector()) {
ISD::InputArg NewArg = Arg;
NewArg.Flags.setSplit();
NewArg.VT = Arg.VT.getVectorElementType();
@@ -356,30 +469,51 @@ SDValue SITargetLowering::LowerFormalArguments(
NewArg.PartOffset += NewArg.VT.getStoreSize();
}
- } else if (Info->ShaderType != ShaderType::COMPUTE) {
+ } else if (Info->getShaderType() != ShaderType::COMPUTE) {
Splits.push_back(Arg);
}
}
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// At least one interpolation mode must be enabled or else the GPU will hang.
- if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
+ if (Info->getShaderType() == ShaderType::PIXEL &&
+ (Info->PSInputAddr & 0x7F) == 0) {
Info->PSInputAddr |= 1;
CCInfo.AllocateReg(AMDGPU::VGPR0);
CCInfo.AllocateReg(AMDGPU::VGPR1);
}
// The pointer to the list of arguments is stored in SGPR0, SGPR1
- if (Info->ShaderType == ShaderType::COMPUTE) {
- CCInfo.AllocateReg(AMDGPU::SGPR0);
- CCInfo.AllocateReg(AMDGPU::SGPR1);
- MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
+ // The pointer to the scratch buffer is stored in SGPR2, SGPR3
+ if (Info->getShaderType() == ShaderType::COMPUTE) {
+ Info->NumUserSGPRs = 4;
+
+ unsigned InputPtrReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+ unsigned InputPtrRegLo =
+ TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
+ unsigned InputPtrRegHi =
+ TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
+
+ unsigned ScratchPtrReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
+ unsigned ScratchPtrRegLo =
+ TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
+ unsigned ScratchPtrRegHi =
+ TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
+
+ CCInfo.AllocateReg(InputPtrRegLo);
+ CCInfo.AllocateReg(InputPtrRegHi);
+ CCInfo.AllocateReg(ScratchPtrRegLo);
+ CCInfo.AllocateReg(ScratchPtrRegHi);
+ MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
+ MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
}
- if (Info->ShaderType == ShaderType::COMPUTE) {
+ if (Info->getShaderType() == ShaderType::COMPUTE) {
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
Splits);
}
@@ -389,23 +523,36 @@ SDValue SITargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
- if (Skipped & (1 << i)) {
+ if (Skipped[i]) {
InVals.push_back(DAG.getUNDEF(Arg.VT));
continue;
}
CCValAssign &VA = ArgLocs[ArgIdx++];
- EVT VT = VA.getLocVT();
+ MVT VT = VA.getLocVT();
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = Splits[i].VT;
+ const unsigned Offset = 36 + VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
- 36 + VA.getLocMemOffset(),
- Ins[i].Flags.isSExt());
+ Offset, Ins[i].Flags.isSExt());
+
+ const PointerType *ParamTy =
+ dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ // On SI local pointers are just offsets into LDS, so they are always
+ // less than 16-bits. On CI and newer they could potentially be
+ // real pointers, so we can't guarantee their size.
+ Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
+ DAG.getValueType(MVT::i16));
+ }
+
InVals.push_back(Arg);
+ Info->ABIArgOffset = Offset + MemVT.getStoreSize();
continue;
}
assert(VA.isRegLoc() && "Parameter must be in a register!");
@@ -458,39 +605,13 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
- case AMDGPU::SI_ADDR64_RSRC: {
- unsigned SuperReg = MI->getOperand(0).getReg();
- unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
- unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
- unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
- .addOperand(MI->getOperand(1));
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
- .addImm(0);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
- .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
- .addReg(SubRegHiLo)
- .addImm(AMDGPU::sub0)
- .addReg(SubRegHiHi)
- .addImm(AMDGPU::sub1);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SuperReg)
- .addReg(SubRegLo)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SubRegHi)
- .addImm(AMDGPU::sub2_sub3);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::V_SUB_F64: {
unsigned DestReg = MI->getOperand(0).getReg();
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
@@ -498,8 +619,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
.addReg(MI->getOperand(1).getReg())
.addImm(1) // SRC1 modifiers
.addReg(MI->getOperand(2).getReg())
- .addImm(0) // SRC2 modifiers
- .addImm(0) // src2
.addImm(0) // CLAMP
.addImm(0); // OMOD
MI->eraseFromParent();
@@ -517,49 +636,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
- case AMDGPU::FABS_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
- Reg)
- .addImm(0x7fffffff);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addReg(Reg);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::FNEG_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
- Reg)
- .addImm(0x80000000);
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addReg(Reg);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::FCLAMP_SI: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
- MI->getOperand(0).getReg())
- .addImm(0) // SRC0 modifiers
- .addOperand(MI->getOperand(1))
- .addImm(0) // SRC1 modifiers
- .addImm(0) // SRC1
- .addImm(1) // CLAMP
- .addImm(0); // OMOD
- MI->eraseFromParent();
- }
}
return BB;
}
@@ -598,148 +674,31 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
//===----------------------------------------------------------------------===//
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
- LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
- EVT VT = Op.getValueType();
-
- // These loads are legal.
- if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
- VT.isVector() && VT.getVectorNumElements() == 2 &&
- VT.getVectorElementType() == MVT::i32)
- return SDValue();
-
- if (Op.getValueType().isVector() &&
- (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
- (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
- Op.getValueType().getVectorNumElements() > 4))) {
- return SplitVectorLoad(Op, DAG);
- } else {
- SDValue Result = LowerLOAD(Op, DAG);
- assert((!Result.getNode() ||
- Result.getNode()->getNumValues() == 2) &&
- "Load should return a value and a chain");
- return Result;
- }
+ SDValue Result = LowerLOAD(Op, DAG);
+ assert((!Result.getNode() ||
+ Result.getNode()->getNumValues() == 2) &&
+ "Load should return a value and a chain");
+ return Result;
}
+ case ISD::FSIN:
+ case ISD::FCOS:
+ return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::FDIV: return LowerFDIV(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntrinsicID =
- cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- //XXX: Hardcoded we only use two to store the pointer to the parameters.
- unsigned NumUserSGPRs = 2;
- switch (IntrinsicID) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case Intrinsic::r600_read_ngroups_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false);
- case Intrinsic::r600_read_ngroups_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false);
- case Intrinsic::r600_read_ngroups_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false);
- case Intrinsic::r600_read_global_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false);
- case Intrinsic::r600_read_global_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false);
- case Intrinsic::r600_read_global_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false);
- case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false);
- case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false);
- case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false);
- case Intrinsic::r600_read_tgid_x:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
- case Intrinsic::r600_read_tgid_y:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 1), VT);
- case Intrinsic::r600_read_tgid_z:
- return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 2), VT);
- case Intrinsic::r600_read_tidig_x:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR0, VT);
- case Intrinsic::r600_read_tidig_y:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR1, VT);
- case Intrinsic::r600_read_tidig_z:
- return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
- AMDGPU::VGPR2, VT);
- case AMDGPUIntrinsic::SI_load_const: {
- SDValue Ops [] = {
- Op.getOperand(1),
- Op.getOperand(2)
- };
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
- VT.getSizeInBits() / 8, 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
- case AMDGPUIntrinsic::SI_sample:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
- case AMDGPUIntrinsic::SI_sampleb:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
- case AMDGPUIntrinsic::SI_sampled:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
- case AMDGPUIntrinsic::SI_samplel:
- return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
- case AMDGPUIntrinsic::SI_vs_load_input:
- return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
- }
+ case ISD::GlobalAddress: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ return LowerGlobalAddress(MFI, Op, DAG);
}
-
- case ISD::INTRINSIC_VOID:
- SDValue Chain = Op.getOperand(0);
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-
- switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_tbuffer_store: {
- SDLoc DL(Op);
- SDValue Ops [] = {
- Chain,
- Op.getOperand(2),
- Op.getOperand(3),
- Op.getOperand(4),
- Op.getOperand(5),
- Op.getOperand(6),
- Op.getOperand(7),
- Op.getOperand(8),
- Op.getOperand(9),
- Op.getOperand(10),
- Op.getOperand(11),
- Op.getOperand(12),
- Op.getOperand(13),
- Op.getOperand(14)
- };
- EVT VT = Op.getOperand(3).getValueType();
-
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getSizeInBits() / 8, 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
- }
- default:
- break;
- }
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
}
return SDValue();
}
@@ -760,6 +719,14 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
return nullptr;
}
+SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+
+ FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
+ unsigned FrameIndex = FINode->getIndex();
+
+ return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
+}
+
/// This transforms the control flow intrinsics to get the branch destination as
/// last parameter, also switches branch target with BR if the need arise
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
@@ -810,7 +777,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
BR->getOperand(0),
BRCOND.getOperand(2)
};
- DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops);
+ SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
+ DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
+ BR = NewBR.getNode();
}
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
@@ -838,56 +807,190 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
return Chain;
}
-SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- LoadSDNode *Load = cast<LoadSDNode>(Op);
- SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
- if (Lowered.getNode())
- return Lowered;
+SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
+ SDValue Op,
+ SelectionDAG &DAG) const {
+ GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
- if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
- return SDValue();
- }
+ if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+ return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
- EVT MemVT = Load->getMemoryVT();
+ SDLoc DL(GSD);
+ const GlobalValue *GV = GSD->getGlobal();
+ MVT PtrVT = getPointerTy(GSD->getAddressSpace());
- assert(!MemVT.isVector() && "Private loads should be scalarized");
- assert(!MemVT.isFloatingPoint() && "FP loads should be promoted to int");
+ SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
- SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
- DAG.getConstant(2, MVT::i32));
+ SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
+ DAG.getConstant(0, MVT::i32));
+ SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
+ PtrLo, GA);
+ SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
+ PtrHi, DAG.getConstant(0, MVT::i32),
+ SDValue(Lo.getNode(), 1));
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
- // FIXME: REGISTER_LOAD should probably have a chain result.
- SDValue Chain = Load->getChain();
- SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Chain, Ptr,
- DAG.getTargetConstant(0, MVT::i32),
- Op.getOperand(2));
+SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(MF.getSubtarget().getRegisterInfo());
- SDValue Ret = LoLoad.getValue(0);
- if (MemVT.getSizeInBits() == 64) {
- // TODO: This needs a test to make sure the right thing is happening with
- // the chain. That is hard without general function support.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ switch (IntrinsicID) {
+ case Intrinsic::r600_read_ngroups_x:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_X, false);
+ case Intrinsic::r600_read_ngroups_y:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Y, false);
+ case Intrinsic::r600_read_ngroups_z:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Z, false);
+ case Intrinsic::r600_read_global_size_x:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+ case Intrinsic::r600_read_global_size_y:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+ case Intrinsic::r600_read_global_size_z:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+ case Intrinsic::r600_read_local_size_x:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::LOCAL_SIZE_X, false);
+ case Intrinsic::r600_read_local_size_y:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
+ case Intrinsic::r600_read_local_size_z:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
+
+ case Intrinsic::AMDGPU_read_workdim:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
+ false);
+
+ case Intrinsic::r600_read_tgid_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
+ case Intrinsic::r600_read_tgid_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
+ case Intrinsic::r600_read_tgid_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
+ case Intrinsic::r600_read_tidig_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
+ case Intrinsic::r600_read_tidig_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
+ case Intrinsic::r600_read_tidig_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
+ case AMDGPUIntrinsic::SI_load_const: {
+ SDValue Ops[] = {
+ Op.getOperand(1),
+ Op.getOperand(2)
+ };
- SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
- DAG.getConstant(1, MVT::i32));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
+ Op->getVTList(), Ops, VT, MMO);
+ }
+ case AMDGPUIntrinsic::SI_sample:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampleb:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampled:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
+ case AMDGPUIntrinsic::SI_samplel:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
+ case AMDGPUIntrinsic::SI_vs_load_input:
+ return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3));
+ default:
+ return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ }
+}
- SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Chain, IncPtr,
- DAG.getTargetConstant(0, MVT::i32),
- Op.getOperand(2));
+SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad);
- // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- // LoLoad.getValue(1), HiLoad.getValue(1));
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::SI_tbuffer_store: {
+ SDLoc DL(Op);
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(2),
+ Op.getOperand(3),
+ Op.getOperand(4),
+ Op.getOperand(5),
+ Op.getOperand(6),
+ Op.getOperand(7),
+ Op.getOperand(8),
+ Op.getOperand(9),
+ Op.getOperand(10),
+ Op.getOperand(11),
+ Op.getOperand(12),
+ Op.getOperand(13),
+ Op.getOperand(14)
+ };
+
+ EVT VT = Op.getOperand(3).getValueType();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getStoreSize(), 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
+ Op->getVTList(), Ops, VT, MMO);
}
+ default:
+ return SDValue();
+ }
+}
- SDValue Ops[] = {
- Ret,
- Chain
- };
+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+
+ if (Op.getValueType().isVector()) {
+ assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
+ "Custom lowering for non-i32 vectors hasn't been implemented.");
+ unsigned NumElements = Op.getValueType().getVectorNumElements();
+ assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
+ switch (Load->getAddressSpace()) {
+ default: break;
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // v4 loads are supported for private and global memory.
+ if (NumElements <= 4)
+ break;
+ // fall-through
+ case AMDGPUAS::LOCAL_ADDRESS:
+ return ScalarizeVectorLoad(Op, DAG);
+ }
+ }
- return DAG.getMergeValues(Ops, DL);
+ return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
}
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
@@ -926,6 +1029,100 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
}
+// Catch division cases where we can use shortcuts with rcp and rsq
+// instructions.
+SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
+
+ if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
+ if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals())) &&
+ CLHS->isExactlyValue(1.0)) {
+ // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
+ // the CI documentation has a worst case error of 1 ulp.
+ // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
+ // use it as long as we aren't trying to use denormals.
+
+ // 1.0 / sqrt(x) -> rsq(x)
+ //
+ // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
+ // error seems really high at 2^29 ULP.
+ if (RHS.getOpcode() == ISD::FSQRT)
+ return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
+
+ // 1.0 / x -> rcp(x)
+ return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
+ }
+ }
+
+ if (Unsafe) {
+ // Turn into multiply by the reciprocal.
+ // x / y -> x * (1.0 / y)
+ SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
+ return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
+ SDValue FastLowered = LowerFastFDIV(Op, DAG);
+ if (FastLowered.getNode())
+ return FastLowered;
+
+ // This uses v_rcp_f32 which does not handle denormals. Let this hit a
+ // selection error for now rather than do something incorrect.
+ if (Subtarget->hasFP32Denormals())
+ return SDValue();
+
+ SDLoc SL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
+
+ const APFloat K0Val(BitsToFloat(0x6f800000));
+ const SDValue K0 = DAG.getConstantFP(K0Val, MVT::f32);
+
+ const APFloat K1Val(BitsToFloat(0x2f800000));
+ const SDValue K1 = DAG.getConstantFP(K1Val, MVT::f32);
+
+ const SDValue One = DAG.getTargetConstantFP(1.0, MVT::f32);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+
+ SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
+
+ SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+
+ r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
+
+ SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
+
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
+
+ return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
+}
+
+SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue();
+}
+
+SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ if (VT == MVT::f32)
+ return LowerFDIV32(Op, DAG);
+
+ if (VT == MVT::f64)
+ return LowerFDIV64(Op, DAG);
+
+ llvm_unreachable("Unexpected type for fdiv");
+}
+
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -937,79 +1134,42 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
VT.getVectorElementType() == MVT::i32)
return SDValue();
+ if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ if (VT.isVector() && VT.getVectorNumElements() > 4)
+ return ScalarizeVectorStore(Op, DAG);
+ return SDValue();
+ }
+
SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
if (Ret.getNode())
return Ret;
if (VT.isVector() && VT.getVectorNumElements() >= 8)
- return SplitVectorStore(Op, DAG);
+ return ScalarizeVectorStore(Op, DAG);
if (VT == MVT::i1)
return DAG.getTruncStore(Store->getChain(), DL,
DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
- if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
- return SDValue();
+ return SDValue();
+}
- SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(),
- DAG.getConstant(2, MVT::i32));
- SDValue Chain = Store->getChain();
- SmallVector<SDValue, 8> Values;
-
- if (Store->isTruncatingStore()) {
- unsigned Mask = 0;
- if (Store->getMemoryVT() == MVT::i8) {
- Mask = 0xff;
- } else if (Store->getMemoryVT() == MVT::i16) {
- Mask = 0xffff;
- }
- SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Chain, Store->getBasePtr(),
- DAG.getConstant(0, MVT::i32));
- SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(),
- DAG.getConstant(0x3, MVT::i32));
- SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
- DAG.getConstant(3, MVT::i32));
- SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(),
- DAG.getConstant(Mask, MVT::i32));
- SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
- MaskedValue, ShiftAmt);
- SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32,
- DAG.getConstant(32, MVT::i32), ShiftAmt);
- SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32,
- DAG.getConstant(Mask, MVT::i32),
- RotrAmt);
- Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
- Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
-
- Values.push_back(Dst);
- } else if (VT == MVT::i64) {
- for (unsigned i = 0; i < 2; ++i) {
- Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Store->getValue(), DAG.getConstant(i, MVT::i32)));
- }
- } else if (VT == MVT::i128) {
- for (unsigned i = 0; i < 2; ++i) {
- for (unsigned j = 0; j < 2; ++j) {
- Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
- Store->getValue(), DAG.getConstant(i, MVT::i32)),
- DAG.getConstant(j, MVT::i32)));
- }
- }
- } else {
- Values.push_back(Store->getValue());
- }
+SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDValue Arg = Op.getOperand(0);
+ SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
+ DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
+ DAG.getConstantFP(0.5 / M_PI, VT)));
- for (unsigned i = 0; i < Values.size(); ++i) {
- SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
- Ptr, DAG.getConstant(i, MVT::i32));
- Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
- Chain, Values[i], PartPtr,
- DAG.getTargetConstant(0, MVT::i32));
+ switch (Op.getOpcode()) {
+ case ISD::FCOS:
+ return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
+ case ISD::FSIN:
+ return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
+ default:
+ llvm_unreachable("Wrong trig opcode");
}
- return Chain;
}
//===----------------------------------------------------------------------===//
@@ -1106,6 +1266,111 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
return SDValue();
}
+// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
+
+// This is a variant of
+// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
+//
+// The normal DAG combiner will do this, but only if the add has one use since
+// that would increase the number of instructions.
+//
+// This prevents us from seeing a constant offset that can be folded into a
+// memory instruction's addressing mode. If we know the resulting add offset of
+// a pointer can be folded into an addressing offset, we can replace the pointer
+// operand with the add of new constant offset. This eliminates one of the uses,
+// and may allow the remaining use to also be simplified.
+//
+SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
+ unsigned AddrSpace,
+ DAGCombinerInfo &DCI) const {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
+ if (!CN1)
+ return SDValue();
+
+ const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!CAdd)
+ return SDValue();
+
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
+
+ // If the resulting offset is too large, we can't fold it into the addressing
+ // mode offset.
+ APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
+ if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace))
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
+ SDValue COffset = DAG.getConstant(Offset, MVT::i32);
+
+ return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
+}
+
+static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FMAXNUM:
+ return AMDGPUISD::FMAX3;
+ case AMDGPUISD::SMAX:
+ return AMDGPUISD::SMAX3;
+ case AMDGPUISD::UMAX:
+ return AMDGPUISD::UMAX3;
+ case ISD::FMINNUM:
+ return AMDGPUISD::FMIN3;
+ case AMDGPUISD::SMIN:
+ return AMDGPUISD::SMIN3;
+ case AMDGPUISD::UMIN:
+ return AMDGPUISD::UMIN3;
+ default:
+ llvm_unreachable("Not a min/max opcode");
+ }
+}
+
+SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ unsigned Opc = N->getOpcode();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // Only do this if the inner op has one use since this will just increases
+ // register pressure for no benefit.
+
+ // max(max(a, b), c)
+ if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0.getOperand(0),
+ Op0.getOperand(1),
+ Op1);
+ }
+
+ // max(a, max(b, c))
+ if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0,
+ Op1.getOperand(0),
+ Op1.getOperand(1));
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -1114,20 +1379,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
- case ISD::SELECT_CC: {
- ConstantSDNode *True, *False;
- // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
- if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
- && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
- && True->isAllOnesValue()
- && False->isNullValue()
- && VT == MVT::i1) {
- return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
- N->getOperand(1), N->getOperand(4));
-
- }
- break;
- }
case ISD::SETCC: {
SDValue Arg0 = N->getOperand(0);
SDValue Arg1 = N->getOperand(1);
@@ -1147,6 +1398,17 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::FMAXNUM: // TODO: What about fmax_legacy?
+ case ISD::FMINNUM:
+ case AMDGPUISD::SMAX:
+ case AMDGPUISD::SMIN:
+ case AMDGPUISD::UMAX:
+ case AMDGPUISD::UMIN: {
+ if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ getTargetMachine().getOptLevel() > CodeGenOpt::None)
+ return performMin3Max3Combine(N, DCI);
+ break;
+ }
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
@@ -1171,16 +1433,151 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP: {
return performUCharToFloatCombine(N, DCI);
+
+ case ISD::FADD: {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ break;
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::f32)
+ break;
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // These should really be instruction patterns, but writing patterns with
+ // source modiifiers is a pain.
+
+ // fadd (fadd (a, a), b) -> mad 2.0, a, b
+ if (LHS.getOpcode() == ISD::FADD) {
+ SDValue A = LHS.getOperand(0);
+ if (A == LHS.getOperand(1)) {
+ const SDValue Two = DAG.getTargetConstantFP(2.0, MVT::f32);
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, RHS);
+ }
+ }
+
+ // fadd (b, fadd (a, a)) -> mad 2.0, a, b
+ if (RHS.getOpcode() == ISD::FADD) {
+ SDValue A = RHS.getOperand(0);
+ if (A == RHS.getOperand(1)) {
+ const SDValue Two = DAG.getTargetConstantFP(2.0, MVT::f32);
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, LHS);
+ }
+ }
+
+ break;
+ }
+ case ISD::FSUB: {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ break;
+
+ EVT VT = N->getValueType(0);
+
+ // Try to get the fneg to fold into the source modifier. This undoes generic
+ // DAG combines and folds them into the mad.
+ if (VT == MVT::f32) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (LHS.getOpcode() == ISD::FMUL) {
+ // (fsub (fmul a, b), c) -> mad a, b, (fneg c)
+
+ SDValue A = LHS.getOperand(0);
+ SDValue B = LHS.getOperand(1);
+ SDValue C = DAG.getNode(ISD::FNEG, DL, VT, RHS);
+
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
+ }
+
+ if (RHS.getOpcode() == ISD::FMUL) {
+ // (fsub c, (fmul a, b)) -> mad (fneg a), b, c
+
+ SDValue A = DAG.getNode(ISD::FNEG, DL, VT, RHS.getOperand(0));
+ SDValue B = RHS.getOperand(1);
+ SDValue C = LHS;
+
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, A, B, C);
+ }
+
+ if (LHS.getOpcode() == ISD::FADD) {
+ // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
+
+ SDValue A = LHS.getOperand(0);
+ if (A == LHS.getOperand(1)) {
+ const SDValue Two = DAG.getTargetConstantFP(2.0, MVT::f32);
+ SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
+
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Two, A, NegRHS);
+ }
+ }
+
+ if (RHS.getOpcode() == ISD::FADD) {
+ // (fsub c, (fadd a, a)) -> mad -2.0, a, c
+
+ SDValue A = RHS.getOperand(0);
+ if (A == RHS.getOperand(1)) {
+ const SDValue NegTwo = DAG.getTargetConstantFP(-2.0, MVT::f32);
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, NegTwo, A, LHS);
+ }
+ }
+ }
+
+ break;
}
}
+ case ISD::LOAD:
+ case ISD::STORE:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics.
+ if (DCI.isBeforeLegalize())
+ break;
+
+ MemSDNode *MemNode = cast<MemSDNode>(N);
+ SDValue Ptr = MemNode->getBasePtr();
+ // TODO: We could also do this for multiplies.
+ unsigned AS = MemNode->getAddressSpace();
+ if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+ SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
+ if (NewPtr) {
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = MemNode->getNumOperands(); I != E; ++I)
+ NewOps.push_back(MemNode->getOperand(I));
+
+ NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
+ return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
+ }
+ }
+ break;
+ }
+ }
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
/// \brief Test if RegClass is one of the VSrc classes
static bool isVSrc(unsigned RegClass) {
- return AMDGPU::VSrc_32RegClassID == RegClass ||
- AMDGPU::VSrc_64RegClassID == RegClass;
+ switch(RegClass) {
+ default: return false;
+ case AMDGPU::VSrc_32RegClassID:
+ case AMDGPU::VCSrc_32RegClassID:
+ case AMDGPU::VSrc_64RegClassID:
+ case AMDGPU::VCSrc_64RegClassID:
+ return true;
+ }
}
/// \brief Test if RegClass is one of the SSrc classes
@@ -1227,8 +1624,8 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
bool &ScalarSlotUsed) const {
MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
return false;
@@ -1262,8 +1659,8 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
const TargetRegisterClass *SITargetLowering::getRegClassForNode(
SelectionDAG &DAG, const SDValue &Op) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
const SIRegisterInfo &TRI = TII->getRegisterInfo();
if (!Op->isMachineOpcode()) {
@@ -1292,10 +1689,9 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode(
// If the COPY_TO_REGCLASS instruction is copying to a VSrc register
// class, then the register class for the value could be either a
// VReg or and SReg. In order to get a more accurate
- if (OpClassID == AMDGPU::VSrc_32RegClassID ||
- OpClassID == AMDGPU::VSrc_64RegClassID) {
+ if (isVSrc(OpClassID))
return getRegClassForNode(DAG, Op.getOperand(0));
- }
+
return TRI.getRegClass(OpClassID);
case AMDGPU::EXTRACT_SUBREG: {
int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -1315,7 +1711,8 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode(
/// \brief Does "Op" fit into register class "RegClass" ?
bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const {
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
if (!RC) {
return false;
@@ -1323,37 +1720,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
}
-/// \brief Make sure that we don't exeed the number of allowed scalars
-void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
- unsigned RegClass,
- bool &ScalarSlotUsed) const {
-
- // First map the operands register class to a destination class
- if (RegClass == AMDGPU::VSrc_32RegClassID)
- RegClass = AMDGPU::VReg_32RegClassID;
- else if (RegClass == AMDGPU::VSrc_64RegClassID)
- RegClass = AMDGPU::VReg_64RegClassID;
- else
- return;
-
- // Nothing to do if they fit naturally
- if (fitsRegClass(DAG, Operand, RegClass))
- return;
-
- // If the scalar slot isn't used yet use it now
- if (!ScalarSlotUsed) {
- ScalarSlotUsed = true;
- return;
- }
-
- // This is a conservative aproach. It is possible that we can't determine the
- // correct register class and copy too often, but better safe than sorry.
- SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
- SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
- Operand.getValueType(), Operand, RC);
- Operand = SDValue(Node, 0);
-}
-
/// \returns true if \p Node's operands are different from the SDValue list
/// \p Ops
static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
@@ -1365,14 +1731,15 @@ static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
return false;
}
-/// \brief Try to fold the Nodes operands into the Node
-SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
- SelectionDAG &DAG) const {
-
+/// TODO: This needs to be removed. It's current primary purpose is to fold
+/// immediates into operands when legal. The legalization parts are redundant
+/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
+SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
// Original encoding (either e32 or e64)
int Opcode = Node->getMachineOpcode();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
const MCInstrDesc *Desc = &TII->get(Opcode);
unsigned NumDefs = Desc->getNumDefs();
@@ -1385,13 +1752,6 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
assert(!DescRev || DescRev->getNumDefs() == NumDefs);
assert(!DescRev || DescRev->getNumOperands() == NumOps);
- // e64 version if available, -1 otherwise
- int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
- const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
- int InputModifiers[3] = {0};
-
- assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
-
int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
bool HaveVSrc = false, HaveSSrc = false;
@@ -1421,9 +1781,17 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
// No scalar allowed when we have both VSrc and SSrc
bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
+ // If this instruction has an implicit use of VCC, then it can't use the
+ // constant bus.
+ for (unsigned i = 0, e = Desc->getNumImplicitUses(); i != e; ++i) {
+ if (Desc->ImplicitUses[i] == AMDGPU::VCC) {
+ ScalarSlotUsed = true;
+ break;
+ }
+ }
+
// Second go over the operands and try to fold them
std::vector<SDValue> Ops;
- bool Promote2e64 = false;
for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
i != e && Op < NumOps; ++i, ++Op) {
@@ -1438,11 +1806,9 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
// Is this a VSrc or SSrc operand?
unsigned RegClass = Desc->OpInfo[Op].RegClass;
if (isVSrc(RegClass) || isSSrc(RegClass)) {
- // Try to fold the immediates
- if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
- // Folding didn't work, make sure we don't hit the SReg limit.
- ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
- }
+ // Try to fold the immediates. If this ends up with multiple constant bus
+ // uses, it will be legalized later.
+ foldImm(Ops[i], Immediate, ScalarSlotUsed);
continue;
}
@@ -1464,66 +1830,6 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
continue;
}
}
-
- if (Immediate)
- continue;
-
- if (DescE64) {
- // Test if it makes sense to switch to e64 encoding
- unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
- if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
- continue;
-
- int32_t TmpImm = -1;
- if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
- (!fitsRegClass(DAG, Ops[i], RegClass) &&
- fitsRegClass(DAG, Ops[1], OtherRegClass))) {
-
- // Switch to e64 encoding
- Immediate = -1;
- Promote2e64 = true;
- Desc = DescE64;
- DescE64 = nullptr;
- }
- }
-
- if (!DescE64 && !Promote2e64)
- continue;
- if (!Operand.isMachineOpcode())
- continue;
- if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
- Ops.pop_back();
- Ops.push_back(Operand.getOperand(0));
- InputModifiers[i] = 1;
- Promote2e64 = true;
- if (!DescE64)
- continue;
- Desc = DescE64;
- DescE64 = nullptr;
- }
- else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
- Ops.pop_back();
- Ops.push_back(Operand.getOperand(0));
- InputModifiers[i] = 2;
- Promote2e64 = true;
- if (!DescE64)
- continue;
- Desc = DescE64;
- DescE64 = nullptr;
- }
- }
-
- if (Promote2e64) {
- std::vector<SDValue> OldOps(Ops);
- Ops.clear();
- for (unsigned i = 0; i < OldOps.size(); ++i) {
- // src_modifier
- Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
- Ops.push_back(OldOps[i]);
- }
- // Add the modifier flags while promoting
- for (unsigned i = 0; i < 2; ++i)
- Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
}
// Add optional chain and glue
@@ -1632,46 +1938,182 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
}
+/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
+/// with frame index operands.
+/// LLVM assumes that inputs are to these instructions are registers.
+void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
+ SelectionDAG &DAG) const {
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
+ if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
+ Ops.push_back(Node->getOperand(i));
+ continue;
+ }
+
+ SDLoc DL(Node);
+ Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
+ Node->getOperand(i).getValueType(),
+ Node->getOperand(i)), 0));
+ }
+
+ DAG.UpdateNodeOperands(Node, Ops);
+}
+
/// \brief Fold the instructions after selecting them.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
Node = AdjustRegClass(Node, DAG);
if (TII->isMIMG(Node->getMachineOpcode()))
adjustWritemask(Node, DAG);
- return foldOperands(Node, DAG);
+ if (Node->getMachineOpcode() == AMDGPU::INSERT_SUBREG ||
+ Node->getMachineOpcode() == AMDGPU::REG_SEQUENCE) {
+ legalizeTargetIndependentNode(Node, DAG);
+ return Node;
+ }
+
+ return legalizeOperands(Node, DAG);
}
/// \brief Assign the register class depending on the number of
/// bits set in the writemask
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- if (!TII->isMIMG(MI->getOpcode()))
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ getTargetMachine().getSubtargetImpl()->getInstrInfo());
+
+ TII->legalizeOperands(MI);
+
+ if (TII->isMIMG(MI->getOpcode())) {
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned Writemask = MI->getOperand(1).getImm();
+ unsigned BitsSet = 0;
+ for (unsigned i = 0; i < 4; ++i)
+ BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+ const TargetRegisterClass *RC;
+ switch (BitsSet) {
+ default: return;
+ case 1: RC = &AMDGPU::VReg_32RegClass; break;
+ case 2: RC = &AMDGPU::VReg_64RegClass; break;
+ case 3: RC = &AMDGPU::VReg_96RegClass; break;
+ }
+
+ unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
+ MI->setDesc(TII->get(NewOpcode));
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MRI.setRegClass(VReg, RC);
+ return;
+ }
+
+ // Replace unused atomics with the no return version.
+ int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI->getOpcode());
+ if (NoRetAtomicOp != -1) {
+ if (!Node->hasAnyUseOfValue(0)) {
+ MI->setDesc(TII->get(NoRetAtomicOp));
+ MI->RemoveOperand(0);
+ }
+
return;
+ }
+}
+
+static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
+ SDValue K = DAG.getTargetConstant(Val, MVT::i32);
+ return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
+}
- unsigned VReg = MI->getOperand(0).getReg();
- unsigned Writemask = MI->getOperand(1).getImm();
- unsigned BitsSet = 0;
- for (unsigned i = 0; i < 4; ++i)
- BitsSet += Writemask & (1 << i) ? 1 : 0;
+MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr) const {
+#if 1
+ // XXX - Workaround for moveToVALU not handling different register class
+ // inserts for REG_SEQUENCE.
+
+ // Build the half of the subregister with the constants.
+ const SDValue Ops0[] = {
+ DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
+ buildSMovImm32(DAG, DL, 0),
+ DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+ buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
+ DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
+ };
+
+ SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::v2i32, Ops0), 0);
+
+ // Combine the constants and the pointer.
+ const SDValue Ops1[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ Ptr,
+ DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
+ SubRegHi,
+ DAG.getTargetConstant(AMDGPU::sub2_sub3, MVT::i32)
+ };
+
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
+#else
+ const SDValue Ops[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ Ptr,
+ DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
+ buildSMovImm32(DAG, DL, 0),
+ DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
+ buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
+ DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
- const TargetRegisterClass *RC;
- switch (BitsSet) {
- default: return;
- case 1: RC = &AMDGPU::VReg_32RegClass; break;
- case 2: RC = &AMDGPU::VReg_64RegClass; break;
- case 3: RC = &AMDGPU::VReg_96RegClass; break;
+#endif
+}
+
+/// \brief Return a resource descriptor with the 'Add TID' bit enabled
+/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
+/// of the resource descriptor) to create an offset, which is added to the
+/// resource ponter.
+MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr,
+ uint32_t RsrcDword1,
+ uint64_t RsrcDword2And3) const {
+ SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
+ SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
+ if (RsrcDword1) {
+ PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
+ DAG.getConstant(RsrcDword1, MVT::i32)), 0);
}
- unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
- MI->setDesc(TII->get(NewOpcode));
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- MRI.setRegClass(VReg, RC);
+ SDValue DataLo = buildSMovImm32(DAG, DL,
+ RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
+ SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);
+
+ const SDValue Ops[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
+ PtrLo,
+ DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
+ PtrHi,
+ DAG.getTargetConstant(AMDGPU::sub1, MVT::i32),
+ DataLo,
+ DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
+ DataHi,
+ DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
+}
+
+MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr) const {
+ uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size
+
+ return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
}
MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
@@ -1699,12 +2141,21 @@ MachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
return N;
}
ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1));
- SDValue Ops[] = {
- SDValue(DAG.getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::i128,
- DAG.getConstant(0, MVT::i64)), 0),
- N->getOperand(0),
- DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32)
- };
+
+ const SDValue Zero64 = DAG.getTargetConstant(0, MVT::i64);
+ SDValue Ptr(DAG.getMachineNode(AMDGPU::S_MOV_B64, DL, MVT::i64, Zero64), 0);
+ MachineSDNode *RSrc = wrapAddr64Rsrc(DAG, DL, Ptr);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(SDValue(RSrc, 0));
+ Ops.push_back(N->getOperand(0));
+ Ops.push_back(DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32));
+
+ // Copy remaining operands so we keep any chain and glue nodes that follow
+ // the normal operands.
+ for (unsigned I = 2, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops);
}
}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index e25323a..7bf406e 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SIISELLOWERING_H
-#define SIISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_R600_SIISELLOWERING_H
+#define LLVM_LIB_TARGET_R600_SIISELLOWERING_H
#include "AMDGPUISelLowering.h"
#include "SIInstrInfo.h"
@@ -25,9 +25,21 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue Chain, unsigned Offset, bool Signed) const;
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
+ SelectionDAG &DAG) const override;
+
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
@@ -36,20 +48,37 @@ class SITargetLowering : public AMDGPUTargetLowering {
const SDValue &Op) const;
bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const;
- void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
- unsigned RegClass, bool &ScalarSlotUsed) const;
- SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const;
+ SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
static SDValue performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI);
+ SDValue performSHLPtrCombine(SDNode *N,
+ unsigned AS,
+ DAGCombinerInfo &DCI) const;
+
+ SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
public:
SITargetLowering(TargetMachine &tm);
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
- bool *IsFast) const override;
+
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+ EVT /*VT*/) const override;
+
+ bool isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const override;
+
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
+ unsigned Align,
+ bool *IsFast) const override;
+
+ EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
@@ -77,8 +106,19 @@ public:
int32_t analyzeImmediate(const SDNode *N) const;
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const override;
+ void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
+
+ MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const;
+ MachineSDNode *buildRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr,
+ uint32_t RsrcDword1,
+ uint64_t RsrcDword2And3) const;
+ MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
+ SDLoc DL,
+ SDValue Ptr) const;
};
} // End namespace llvm
-#endif //SIISELLOWERING_H
+#endif
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 1733326..712d97d 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -17,6 +17,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -273,17 +275,17 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
continue;
NeedWait = true;
-
+
if (Ordered[i]) {
unsigned Value = LastIssued.Array[i] - Required.Array[i];
- // adjust the value to the real hardware posibilities
+ // Adjust the value to the real hardware possibilities.
Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
} else
Counts.Array[i] = 0;
- // Remember on what we have waited on
+ // Remember on what we have waited on.
WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
}
@@ -346,8 +348,9 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
- TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
- TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MRI = &MF.getRegInfo();
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 7cae9fc..10e0a3f 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -24,7 +24,11 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> VOP3 = 0;
field bits<1> VOPC = 0;
field bits<1> SALU = 0;
+ field bits<1> MUBUF = 0;
+ field bits<1> MTBUF = 0;
+ field bits<1> FLAT = 0;
+ // These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
let TSFlags{2} = LGKM_CNT;
@@ -35,38 +39,60 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{7} = VOP3;
let TSFlags{8} = VOPC;
let TSFlags{9} = SALU;
+ let TSFlags{10} = MUBUF;
+ let TSFlags{11} = MTBUF;
+ let TSFlags{12} = FLAT;
+
+ // Most instructions require adjustments after selection to satisfy
+ // operand requirements.
+ let hasPostISelHook = 1;
}
-class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
+class Enc32 {
field bits<32> Inst;
- let Size = 4;
+ int Size = 4;
}
-class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
+class Enc64 {
field bits<64> Inst;
- let Size = 8;
+ int Size = 8;
+}
+
+class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP1 = 1;
}
class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
+ InstSI <outs, ins, asm, pattern> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
+ // Using complex patterns gives VOP3 patterns a very high complexity rating,
+ // but standalone patterns are almost always prefered, so we need to adjust the
+ // priority lower. The goal is to use a high number to reduce complexity to
+ // zero (or less than zero).
+ let AddedComplexity = -1000;
+
let VOP3 = 1;
+
+ int Size = 8;
+ let Uses = [EXEC];
}
//===----------------------------------------------------------------------===//
// Scalar operations
//===----------------------------------------------------------------------===//
-class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
+class SOP1e <bits<8> op> : Enc32 {
bits<7> SDST;
bits<8> SSRC0;
@@ -75,16 +101,10 @@ class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{15-8} = op;
let Inst{22-16} = SDST;
let Inst{31-23} = 0x17d; //encoding;
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
}
-class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
+class SOP2e <bits<7> op> : Enc32 {
+
bits<7> SDST;
bits<8> SSRC0;
bits<8> SSRC1;
@@ -94,15 +114,9 @@ class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{22-16} = SDST;
let Inst{29-23} = op;
let Inst{31-30} = 0x2; // encoding
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
}
-class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
+class SOPCe <bits<7> op> : Enc32 {
bits<8> SSRC0;
bits<8> SSRC1;
@@ -111,113 +125,137 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{15-8} = SSRC1;
let Inst{22-16} = op;
let Inst{31-23} = 0x17e;
-
- let DisableEncoding = "$dst";
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
}
-class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins , asm, pattern> {
+class SOPKe <bits<5> op> : Enc32 {
bits <7> SDST;
bits <16> SIMM16;
-
+
let Inst{15-0} = SIMM16;
let Inst{22-16} = SDST;
let Inst{27-23} = op;
let Inst{31-28} = 0xb; //encoding
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
}
-class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
- (outs),
- ins,
- asm,
- pattern > {
+class SOPPe <bits<7> op> : Enc32 {
- bits <16> SIMM16;
+ bits <16> simm16;
- let Inst{15-0} = SIMM16;
+ let Inst{15-0} = simm16;
let Inst{22-16} = op;
let Inst{31-23} = 0x17f; // encoding
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let SALU = 1;
}
-class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
- list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
+class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
bits<7> SDST;
bits<7> SBASE;
bits<8> OFFSET;
-
+
let Inst{7-0} = OFFSET;
let Inst{8} = imm;
let Inst{14-9} = SBASE{6-1};
let Inst{21-15} = SDST;
let Inst{26-22} = op;
let Inst{31-27} = 0x18; //encoding
+}
+
+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern>, SOP1e <op> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SALU = 1;
+}
+
+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern>, SOP2e<op> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SALU = 1;
+
+ let UseNamedOperandTable = 1;
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern>, SOPCe <op> {
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SALU = 1;
+
+ let UseNamedOperandTable = 1;
+}
+
+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins , asm, pattern>, SOPKe<op> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SALU = 1;
+
+ let UseNamedOperandTable = 1;
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
+ InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let isCodeGenOnly = 0;
+ let SALU = 1;
+
+ let UseNamedOperandTable = 1;
+}
+
+class SMRD <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern> {
let LGKM_CNT = 1;
let SMRD = 1;
+ let mayStore = 0;
+ let mayLoad = 1;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
}
//===----------------------------------------------------------------------===//
// Vector ALU operations
//===----------------------------------------------------------------------===//
-
-let Uses = [EXEC] in {
-class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
+class VOP1e <bits<8> op> : Enc32 {
bits<8> VDST;
bits<9> SRC0;
-
+
let Inst{8-0} = SRC0;
let Inst{16-9} = op;
let Inst{24-17} = VDST;
let Inst{31-25} = 0x3f; //encoding
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP1 = 1;
}
-class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
+class VOP2e <bits<6> op> : Enc32 {
bits<8> VDST;
bits<9> SRC0;
bits<8> VSRC1;
-
+
let Inst{8-0} = SRC0;
let Inst{16-9} = VSRC1;
let Inst{24-17} = VDST;
let Inst{30-25} = op;
let Inst{31} = 0x0; //encoding
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP2 = 1;
}
-class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP3Common <outs, ins, asm, pattern> {
+class VOP3e <bits<9> op> : Enc64 {
bits<8> dst;
bits<2> src0_modifiers;
@@ -243,11 +281,9 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{61} = src0_modifiers{0};
let Inst{62} = src1_modifiers{0};
let Inst{63} = src2_modifiers{0};
-
}
-class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP3Common <outs, ins, asm, pattern> {
+class VOP3be <bits<9> op> : Enc64 {
bits<8> dst;
bits<2> src0_modifiers;
@@ -270,11 +306,9 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{61} = src0_modifiers{0};
let Inst{62} = src1_modifiers{0};
let Inst{63} = src2_modifiers{0};
-
}
-class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
- Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+class VOPCe <bits<8> op> : Enc32 {
bits<9> SRC0;
bits<8> VSRC1;
@@ -283,16 +317,9 @@ class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
let Inst{16-9} = VSRC1;
let Inst{24-17} = op;
let Inst{31-25} = 0x3e;
-
- let DisableEncoding = "$dst";
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let VOPC = 1;
}
-class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
+class VINTRPe <bits<2> op> : Enc32 {
bits<8> VDST;
bits<8> VSRC;
@@ -305,22 +332,9 @@ class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{17-16} = op;
let Inst{25-18} = VDST;
let Inst{31-26} = 0x32; // encoding
-
- let neverHasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 0;
}
-} // End Uses = [EXEC]
-
-//===----------------------------------------------------------------------===//
-// Vector I/O operations
-//===----------------------------------------------------------------------===//
-
-let Uses = [EXEC] in {
-
-class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
+class DSe <bits<8> op> : Enc64 {
bits<8> vdst;
bits<1> gds;
@@ -339,12 +353,9 @@ class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{47-40} = data0;
let Inst{55-48} = data1;
let Inst{63-56} = vdst;
-
- let LGKM_CNT = 1;
}
-class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64<outs, ins, asm, pattern> {
+class MUBUFe <bits<7> op> : Enc64 {
bits<12> offset;
bits<1> offen;
@@ -373,16 +384,9 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{54} = slc;
let Inst{55} = tfe;
let Inst{63-56} = soffset;
-
- let VM_CNT = 1;
- let EXP_CNT = 1;
-
- let neverHasSideEffects = 1;
- let UseNamedOperandTable = 1;
}
-class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64<outs, ins, asm, pattern> {
+class MTBUFe <bits<3> op> : Enc64 {
bits<8> VDATA;
bits<12> OFFSET;
@@ -413,15 +417,9 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{54} = SLC;
let Inst{55} = TFE;
let Inst{63-56} = SOFFSET;
-
- let VM_CNT = 1;
- let EXP_CNT = 1;
-
- let neverHasSideEffects = 1;
}
-class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
+class MIMGe <bits<7> op> : Enc64 {
bits<8> VDATA;
bits<4> DMASK;
@@ -434,7 +432,7 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
bits<1> SLC;
bits<8> VADDR;
bits<7> SRSRC;
- bits<7> SSAMP;
+ bits<7> SSAMP;
let Inst{11-8} = DMASK;
let Inst{12} = UNORM;
@@ -450,19 +448,29 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{47-40} = VDATA;
let Inst{52-48} = SRSRC{6-2};
let Inst{57-53} = SSAMP{6-2};
-
- let VM_CNT = 1;
- let EXP_CNT = 1;
- let MIMG = 1;
}
-def EXP : Enc64<
- (outs),
- (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
- VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
- "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
- [] > {
+class FLATe<bits<7> op> : Enc64 {
+ bits<8> addr;
+ bits<8> data;
+ bits<8> vdst;
+ bits<1> slc;
+ bits<1> glc;
+ bits<1> tfe;
+
+ // 15-0 is reserved.
+ let Inst{16} = glc;
+ let Inst{17} = slc;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x37; // Encoding.
+ let Inst{39-32} = addr;
+ let Inst{47-40} = data;
+ // 54-48 is reserved.
+ let Inst{55} = tfe;
+ let Inst{63-56} = vdst;
+}
+class EXPe : Enc64 {
bits<4> EN;
bits<6> TGT;
bits<1> COMPR;
@@ -483,8 +491,110 @@ def EXP : Enc64<
let Inst{47-40} = VSRC1;
let Inst{55-48} = VSRC2;
let Inst{63-56} = VSRC3;
+}
+
+let Uses = [EXEC] in {
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ VOP1Common <outs, ins, asm, pattern>,
+ VOP1e<op>;
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern>, VOP2e<op> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP2 = 1;
+}
+
+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ VOP3Common <outs, ins, asm, pattern>, VOP3e<op>;
+
+class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
+
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+ InstSI <(outs VCCReg:$dst), ins, asm, pattern>, VOPCe <op> {
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOPC = 1;
+}
+
+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern>, VINTRPe<op> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+} // End Uses = [EXEC]
+
+//===----------------------------------------------------------------------===//
+// Vector I/O operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> , DSe<op> {
+
+ let LGKM_CNT = 1;
+ let UseNamedOperandTable = 1;
+}
+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern>, MUBUFe <op> {
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+ let MUBUF = 1;
+
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+}
+
+class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern> {
+
+ let VM_CNT = 1;
let EXP_CNT = 1;
+ let MTBUF = 1;
+
+ let neverHasSideEffects = 1;
+ let UseNamedOperandTable = 1;
}
+class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern>, FLATe <op> {
+ let FLAT = 1;
+ // Internally, FLAT instruction are executed as both an LDS and a
+ // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
+ // and are not considered done until both have been decremented.
+ let VM_CNT = 1;
+ let LGKM_CNT = 1;
+
+ let Uses = [EXEC, FLAT_SCR]; // M0
+
+ let UseNamedOperandTable = 1;
+ let hasSideEffects = 0;
+}
+
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern>, MIMGe <op> {
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+ let MIMG = 1;
+
+ let hasSideEffects = 0; // XXX ????
+}
+
+
+
} // End Uses = [EXEC]
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 455c890..8343362 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -17,10 +17,13 @@
#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -32,6 +35,259 @@ SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
// TargetInstrInfo callbacks
//===----------------------------------------------------------------------===//
+static unsigned getNumOperandsNoGlue(SDNode *Node) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ return N;
+}
+
+static SDValue findChainOperand(SDNode *Load) {
+ SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
+ assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
+ return LastOp;
+}
+
+/// \brief Returns true if both nodes have the same value for the given
+/// operand \p Op, or if both nodes do not have this operand.
+static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
+ unsigned Opc0 = N0->getMachineOpcode();
+ unsigned Opc1 = N1->getMachineOpcode();
+
+ int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
+ int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
+
+ if (Op0Idx == -1 && Op1Idx == -1)
+ return true;
+
+
+ if ((Op0Idx == -1 && Op1Idx != -1) ||
+ (Op1Idx == -1 && Op0Idx != -1))
+ return false;
+
+ // getNamedOperandIdx returns the index for the MachineInstr's operands,
+ // which includes the result as the first operand. We are indexing into the
+ // MachineSDNode's operands, so we need to skip the result operand to get
+ // the real index.
+ --Op0Idx;
+ --Op1Idx;
+
+ return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
+}
+
+bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
+ int64_t &Offset0,
+ int64_t &Offset1) const {
+ if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode())
+ return false;
+
+ unsigned Opc0 = Load0->getMachineOpcode();
+ unsigned Opc1 = Load1->getMachineOpcode();
+
+ // Make sure both are actually loads.
+ if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad())
+ return false;
+
+ if (isDS(Opc0) && isDS(Opc1)) {
+
+ // FIXME: Handle this case:
+ if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1))
+ return false;
+
+ // Check base reg.
+ if (Load0->getOperand(1) != Load1->getOperand(1))
+ return false;
+
+ // Check chain.
+ if (findChainOperand(Load0) != findChainOperand(Load1))
+ return false;
+
+ // Skip read2 / write2 variants for simplicity.
+ // TODO: We should report true if the used offsets are adjacent (excluded
+ // st64 versions).
+ if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
+ AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
+ return false;
+
+ Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
+ return true;
+ }
+
+ if (isSMRD(Opc0) && isSMRD(Opc1)) {
+ assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
+
+ // Check base reg.
+ if (Load0->getOperand(0) != Load1->getOperand(0))
+ return false;
+
+ // Check chain.
+ if (findChainOperand(Load0) != findChainOperand(Load1))
+ return false;
+
+ Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
+ return true;
+ }
+
+ // MUBUF and MTBUF can access the same addresses.
+ if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
+
+ // MUBUF and MTBUF have vaddr at different indices.
+ if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
+ findChainOperand(Load0) != findChainOperand(Load1) ||
+ !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
+ !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
+ return false;
+
+ int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
+ int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
+
+ if (OffIdx0 == -1 || OffIdx1 == -1)
+ return false;
+
+ // getNamedOperandIdx returns the index for MachineInstrs. Since they
+ // inlcude the output in the operand list, but SDNodes don't, we need to
+ // subtract the index by one.
+ --OffIdx0;
+ --OffIdx1;
+
+ SDValue Off0 = Load0->getOperand(OffIdx0);
+ SDValue Off1 = Load1->getOperand(OffIdx1);
+
+ // The offset might be a FrameIndexSDNode.
+ if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
+ return false;
+
+ Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
+ Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+static bool isStride64(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::DS_READ2ST64_B32:
+ case AMDGPU::DS_READ2ST64_B64:
+ case AMDGPU::DS_WRITE2ST64_B32:
+ case AMDGPU::DS_WRITE2ST64_B64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SIInstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt,
+ unsigned &BaseReg, unsigned &Offset,
+ const TargetRegisterInfo *TRI) const {
+ unsigned Opc = LdSt->getOpcode();
+ if (isDS(Opc)) {
+ const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset);
+ if (OffsetImm) {
+ // Normal, single offset LDS instruction.
+ const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::addr);
+
+ BaseReg = AddrReg->getReg();
+ Offset = OffsetImm->getImm();
+ return true;
+ }
+
+ // The 2 offset instructions use offset0 and offset1 instead. We can treat
+ // these as a load with a single offset if the 2 offsets are consecutive. We
+ // will use this for some partially aligned loads.
+ const MachineOperand *Offset0Imm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset0);
+ const MachineOperand *Offset1Imm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset1);
+
+ uint8_t Offset0 = Offset0Imm->getImm();
+ uint8_t Offset1 = Offset1Imm->getImm();
+ assert(Offset1 > Offset0);
+
+ if (Offset1 - Offset0 == 1) {
+ // Each of these offsets is in element sized units, so we need to convert
+ // to bytes of the individual reads.
+
+ unsigned EltSize;
+ if (LdSt->mayLoad())
+ EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2;
+ else {
+ assert(LdSt->mayStore());
+ int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
+ EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize();
+ }
+
+ if (isStride64(Opc))
+ EltSize *= 64;
+
+ const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::addr);
+ BaseReg = AddrReg->getReg();
+ Offset = EltSize * Offset0;
+ return true;
+ }
+
+ return false;
+ }
+
+ if (isMUBUF(Opc) || isMTBUF(Opc)) {
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
+ return false;
+
+ const MachineOperand *AddrReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::vaddr);
+ if (!AddrReg)
+ return false;
+
+ const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset);
+ BaseReg = AddrReg->getReg();
+ Offset = OffsetImm->getImm();
+ return true;
+ }
+
+ if (isSMRD(Opc)) {
+ const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
+ AMDGPU::OpName::offset);
+ if (!OffsetImm)
+ return false;
+
+ const MachineOperand *SBaseReg = getNamedOperand(*LdSt,
+ AMDGPU::OpName::sbase);
+ BaseReg = SBaseReg->getReg();
+ Offset = OffsetImm->getImm();
+ return true;
+ }
+
+ return false;
+}
+
+bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const {
+ unsigned Opc0 = FirstLdSt->getOpcode();
+ unsigned Opc1 = SecondLdSt->getOpcode();
+
+ // TODO: This needs finer tuning
+ if (NumLoads > 4)
+ return false;
+
+ if (isDS(Opc0) && isDS(Opc1))
+ return true;
+
+ if (isSMRD(Opc0) && isSMRD(Opc1))
+ return true;
+
+ if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
+ return true;
+
+ return false;
+}
+
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -182,6 +438,19 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
return Opcode;
}
+static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
+
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const TargetMachine &TM = MF->getTarget();
+
+ // FIXME: Even though it can cause problems, we need to enable
+ // spilling at -O0, since the fast register allocator always
+ // spills registers that are live at the end of blocks.
+ return MFI->getShaderType() == ShaderType::COMPUTE &&
+ TM.getOptLevel() == CodeGenOpt::None;
+
+}
+
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
@@ -189,50 +458,43 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
- unsigned KillFlag = isKill ? RegState::Kill : 0;
+ int Opcode = -1;
- if (RI.hasVGPRs(RC)) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
- .addReg(SrcReg);
- } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
- unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
- unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
- .addReg(SrcReg, KillFlag)
- .addImm(Lane);
- MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
- } else if (RI.isSGPRClass(RC)) {
+ if (RI.isSGPRClass(RC)) {
// We are only allowed to create one new instruction when spilling
- // registers, so we need to use pseudo instruction for vector
- // registers.
- //
- // Reserve a spot in the spill tracker for each sub-register of
- // the vector register.
- unsigned NumSubRegs = RC->getSize() / 4;
- unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
- MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
- FirstLane);
-
- unsigned Opcode;
+ // registers, so we need to use pseudo instruction for spilling
+ // SGPRs.
switch (RC->getSize() * 8) {
- case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
- default: llvm_unreachable("Cannot spill register class");
+ case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
}
+ } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+ switch(RC->getSize() * 8) {
+ case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
+ case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
+ }
+ }
- BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
+ if (Opcode != -1) {
+ FrameInfo->setObjectAlignment(FrameIndex, 4);
+ BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg)
- .addImm(FrameIndex);
+ .addFrameIndex(FrameIndex);
} else {
- llvm_unreachable("VGPR spilling not supported");
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
+ " spill register");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
+ .addReg(SrcReg);
}
}
@@ -242,55 +504,138 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
+ int Opcode = -1;
- if (RI.hasVGPRs(RC)) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
- .addImm(0);
- } else if (RI.isSGPRClass(RC)){
- unsigned Opcode;
+ if (RI.isSGPRClass(RC)){
switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
- default: llvm_unreachable("Cannot spill register class");
+ case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
}
+ } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+ switch(RC->getSize() * 8) {
+ case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
+ case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
+ case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
+ }
+ }
- SIMachineFunctionInfo::SpilledReg Spill =
- MFI->SpillTracker.getSpilledReg(FrameIndex);
-
+ if (Opcode != -1) {
+ FrameInfo->setObjectAlignment(FrameIndex, 4);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addReg(Spill.VGPR)
- .addImm(FrameIndex);
+ .addFrameIndex(FrameIndex);
} else {
- llvm_unreachable("VGPR spilling not supported");
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
+ " restore register");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addReg(AMDGPU::VGPR0);
}
}
-static unsigned getNumSubRegsForSpillOp(unsigned Op) {
-
- switch (Op) {
- case AMDGPU::SI_SPILL_S512_SAVE:
- case AMDGPU::SI_SPILL_S512_RESTORE:
- return 16;
- case AMDGPU::SI_SPILL_S256_SAVE:
- case AMDGPU::SI_SPILL_S256_RESTORE:
- return 8;
- case AMDGPU::SI_SPILL_S128_SAVE:
- case AMDGPU::SI_SPILL_S128_RESTORE:
- return 4;
- case AMDGPU::SI_SPILL_S64_SAVE:
- case AMDGPU::SI_SPILL_S64_RESTORE:
- return 2;
- case AMDGPU::SI_SPILL_S32_RESTORE:
- return 1;
- default: llvm_unreachable("Invalid spill opcode");
+/// \param @Offset Offset in bytes of the FrameIndex being spilled
+unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ RegScavenger *RS, unsigned TmpReg,
+ unsigned FrameOffset,
+ unsigned Size) const {
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF->getTarget().getSubtarget<AMDGPUSubtarget>();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
+ unsigned WavefrontSize = ST.getWavefrontSize();
+
+ unsigned TIDReg = MFI->getTIDReg();
+ if (!MFI->hasCalculatedTID()) {
+ MachineBasicBlock &Entry = MBB.getParent()->front();
+ MachineBasicBlock::iterator Insert = Entry.front();
+ DebugLoc DL = Insert->getDebugLoc();
+
+ TIDReg = RI.findUnusedVGPR(MF->getRegInfo());
+ if (TIDReg == AMDGPU::NoRegister)
+ return TIDReg;
+
+
+ if (MFI->getShaderType() == ShaderType::COMPUTE &&
+ WorkGroupSize > WavefrontSize) {
+
+ unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
+ unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
+ unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
+ unsigned InputPtrReg =
+ TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
+ static const unsigned TIDIGRegs[3] = {
+ TIDIGXReg, TIDIGYReg, TIDIGZReg
+ };
+ for (unsigned Reg : TIDIGRegs) {
+ if (!Entry.isLiveIn(Reg))
+ Entry.addLiveIn(Reg);
+ }
+
+ RS->enterBasicBlock(&Entry);
+ unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
+ .addReg(InputPtrReg)
+ .addImm(SI::KernelInputOffsets::NGROUPS_Z);
+ BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
+ .addReg(InputPtrReg)
+ .addImm(SI::KernelInputOffsets::NGROUPS_Y);
+
+ // NGROUPS.X * NGROUPS.Y
+ BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
+ .addReg(STmp1)
+ .addReg(STmp0);
+ // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
+ .addReg(STmp1)
+ .addReg(TIDIGXReg);
+ // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
+ .addReg(STmp0)
+ .addReg(TIDIGYReg)
+ .addReg(TIDReg);
+ // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
+ .addReg(TIDReg)
+ .addReg(TIDIGZReg);
+ } else {
+ // Get the wave id
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
+ TIDReg)
+ .addImm(-1)
+ .addImm(0);
+
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32),
+ TIDReg)
+ .addImm(-1)
+ .addReg(TIDReg);
+ }
+
+ BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
+ TIDReg)
+ .addImm(2)
+ .addReg(TIDReg);
+ MFI->setTIDReg(TIDReg);
}
+
+ // Add FrameIndex to LDS offset
+ unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
+ .addImm(LDSOffset)
+ .addReg(TIDReg);
+
+ return TmpReg;
}
void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
@@ -308,95 +653,102 @@ void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
}
bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- SIMachineFunctionInfo *MFI =
- MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI->getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
- // SGPR register spill
- case AMDGPU::SI_SPILL_S512_SAVE:
- case AMDGPU::SI_SPILL_S256_SAVE:
- case AMDGPU::SI_SPILL_S128_SAVE:
- case AMDGPU::SI_SPILL_S64_SAVE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
- unsigned FrameIndex = MI->getOperand(2).getImm();
-
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- SIMachineFunctionInfo::SpilledReg Spill;
- unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
- &AMDGPU::SGPR_32RegClass, i);
- Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
- MI->getOperand(0).getReg())
- .addReg(SubReg)
- .addImm(Spill.Lane + i);
- }
+ case AMDGPU::SI_CONSTDATA_PTR: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
+ unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
+
+ // Add 32-bit offset from this instruction to the start of the constant data.
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
+ .addReg(RegLo)
+ .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
+ .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+ .addReg(RegHi)
+ .addImm(0)
+ .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
+ .addReg(AMDGPU::SCC, RegState::Implicit);
MI->eraseFromParent();
break;
}
-
- // SGPR register restore
- case AMDGPU::SI_SPILL_S512_RESTORE:
- case AMDGPU::SI_SPILL_S256_RESTORE:
- case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_S64_RESTORE:
- case AMDGPU::SI_SPILL_S32_RESTORE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
-
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- SIMachineFunctionInfo::SpilledReg Spill;
- unsigned FrameIndex = MI->getOperand(2).getImm();
- unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
- &AMDGPU::SGPR_32RegClass, i);
- Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
- .addReg(MI->getOperand(1).getReg())
- .addImm(Spill.Lane + i);
- }
- insertNOPs(MI, 3);
+ case AMDGPU::SGPR_USE:
+ // This is just a placeholder for register allocation.
MI->eraseFromParent();
break;
}
- }
return true;
}
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
+ if (MI->getNumOperands() < 3)
+ return nullptr;
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src0);
+ assert(Src0Idx != -1 && "Should always have src0 operand");
+
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ if (!Src0.isReg())
+ return nullptr;
+
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1);
+ if (Src1Idx == -1)
return nullptr;
- // Cannot commute VOP2 if src0 is SGPR.
- if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
- RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
- return nullptr;
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+ // Make sure it's legal to commute operands for VOP2.
+ if (isVOP2(MI->getOpcode()) &&
+ (!isOperandLegal(MI, Src0Idx, &Src1) ||
+ !isOperandLegal(MI, Src1Idx, &Src0)))
+ return nullptr;
- if (!MI->getOperand(2).isReg()) {
- // XXX: Commute instructions with FPImm operands
- if (NewMI || MI->getOperand(2).isFPImm() ||
+ if (!Src1.isReg()) {
+ // Allow commuting instructions with Imm or FPImm operands.
+ if (NewMI || (!Src1.isImm() && !Src1.isFPImm()) ||
(!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
return nullptr;
}
- // XXX: Commute VOP3 instructions with abs and neg set.
- if (isVOP3(MI->getOpcode()) &&
- (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::abs)).getImm() ||
- MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::neg)).getImm()))
- return nullptr;
+ // Be sure to copy the source modifiers to the right place.
+ if (MachineOperand *Src0Mods
+ = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+ MachineOperand *Src1Mods
+ = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers);
- unsigned Reg = MI->getOperand(1).getReg();
- unsigned SubReg = MI->getOperand(1).getSubReg();
- MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
- MI->getOperand(2).ChangeToRegister(Reg, false);
- MI->getOperand(2).setSubReg(SubReg);
+ int Src0ModsVal = Src0Mods->getImm();
+ if (!Src1Mods && Src0ModsVal != 0)
+ return nullptr;
+
+ // XXX - This assert might be a lie. It might be useful to have a neg
+ // modifier with 0.0.
+ int Src1ModsVal = Src1Mods->getImm();
+ assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
+
+ Src1Mods->setImm(Src0ModsVal);
+ Src0Mods->setImm(Src1ModsVal);
+ }
+
+ unsigned Reg = Src0.getReg();
+ unsigned SubReg = Src0.getSubReg();
+ if (Src1.isImm())
+ Src0.ChangeToImmediate(Src1.getImm());
+ else if (Src1.isFPImm())
+ Src0.ChangeToFPImmediate(Src1.getFPImm());
+ else
+ llvm_unreachable("Should only have immediates");
+
+ Src1.ChangeToRegister(Reg, false);
+ Src1.setSubReg(SubReg);
} else {
MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
}
@@ -407,6 +759,44 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
return MI;
}
+// This needs to be implemented because the source modifiers may be inserted
+// between the true commutable operands, and the base
+// TargetInstrInfo::commuteInstruction uses it.
+bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MCID.isCommutable())
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ if (Src0Idx == -1)
+ return false;
+
+ // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
+ // immediate.
+ if (!MI->getOperand(Src0Idx).isReg())
+ return false;
+
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ if (Src1Idx == -1)
+ return false;
+
+ if (!MI->getOperand(Src1Idx).isReg())
+ return false;
+
+ // If any source modifiers are set, the generic instruction commuting won't
+ // understand how to copy the source modifiers.
+ if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
+ hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+ return false;
+
+ SrcOpIdx1 = Src0Idx;
+ SrcOpIdx2 = Src1Idx;
+ return true;
+}
+
MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg,
@@ -443,10 +833,92 @@ SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
}
}
+static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
+ int WidthB, int OffsetB) {
+ int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+ int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ return LowOffset + LowWidth <= HighOffset;
+}
+
+bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
+ MachineInstr *MIb) const {
+ unsigned BaseReg0, Offset0;
+ unsigned BaseReg1, Offset1;
+
+ if (getLdStBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
+ getLdStBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
+ assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
+ "read2 / write2 not expected here yet");
+ unsigned Width0 = (*MIa->memoperands_begin())->getSize();
+ unsigned Width1 = (*MIb->memoperands_begin())->getSize();
+ if (BaseReg0 == BaseReg1 &&
+ offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+ MachineInstr *MIb,
+ AliasAnalysis *AA) const {
+ unsigned Opc0 = MIa->getOpcode();
+ unsigned Opc1 = MIb->getOpcode();
+
+ assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
+ "MIa must load from or modify a memory location");
+ assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
+ "MIb must load from or modify a memory location");
+
+ if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects())
+ return false;
+
+ // XXX - Can we relax this between address spaces?
+ if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
+ return false;
+
+ // TODO: Should we check the address space from the MachineMemOperand? That
+ // would allow us to distinguish objects we know don't alias based on the
+ // underlying addres space, even if it was lowered to a different one,
+ // e.g. private accesses lowered to use MUBUF instructions on a scratch
+ // buffer.
+ if (isDS(Opc0)) {
+ if (isDS(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+ return !isFLAT(Opc1);
+ }
+
+ if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
+ if (isMUBUF(Opc1) || isMTBUF(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+ return !isFLAT(Opc1) && !isSMRD(Opc1);
+ }
+
+ if (isSMRD(Opc0)) {
+ if (isSMRD(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+ return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
+ }
+
+ if (isFLAT(Opc0)) {
+ if (isFLAT(Opc1))
+ return checkInstOffsetsDoNotOverlap(MIa, MIb);
+
+ return false;
+ }
+
+ return false;
+}
+
namespace llvm {
namespace AMDGPU {
// Helper function generated by tablegen. We are wrapping this with
-// an SIInstrInfo function that reutrns bool rather than int.
+// an SIInstrInfo function that returns bool rather than int.
int isDS(uint16_t Opcode);
}
}
@@ -455,14 +927,26 @@ bool SIInstrInfo::isDS(uint16_t Opcode) const {
return ::AMDGPU::isDS(Opcode) != -1;
}
-int SIInstrInfo::isMIMG(uint16_t Opcode) const {
+bool SIInstrInfo::isMIMG(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MIMG;
}
-int SIInstrInfo::isSMRD(uint16_t Opcode) const {
+bool SIInstrInfo::isSMRD(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SMRD;
}
+bool SIInstrInfo::isMUBUF(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
+}
+
+bool SIInstrInfo::isMTBUF(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
+}
+
+bool SIInstrInfo::isFLAT(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::FLAT;
+}
+
bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP1;
}
@@ -541,9 +1025,99 @@ static bool compareMachineOp(const MachineOperand &Op0,
}
}
+bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
+ const MachineOperand &MO) const {
+ const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo];
+
+ assert(MO.isImm() || MO.isFPImm() || MO.isTargetIndex() || MO.isFI());
+
+ if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
+ return true;
+
+ if (OpInfo.RegClass < 0)
+ return false;
+
+ if (isLiteralConstant(MO))
+ return RI.regClassCanUseLiteralConstant(OpInfo.RegClass);
+
+ return RI.regClassCanUseInlineConstant(OpInfo.RegClass);
+}
+
+bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) {
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: {
+ // MUBUF instructions a 12-bit offset in bytes.
+ return isUInt<12>(OffsetSize);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // SMRD instructions have an 8-bit offset in dwords.
+ return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // The single offset versions have a 16-bit offset in bytes.
+ return isUInt<16>(OffsetSize);
+ }
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // Indirect register addressing does not use any offsets.
+ default:
+ return 0;
+ }
+}
+
+bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
+ return AMDGPU::getVOPe32(Opcode) != -1;
+}
+
+bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
+ // The src0_modifier operand is present on all instructions
+ // that have modifiers.
+
+ return AMDGPU::getNamedOperandIdx(Opcode,
+ AMDGPU::OpName::src0_modifiers) != -1;
+}
+
+bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
+ unsigned OpName) const {
+ const MachineOperand *Mods = getNamedOperand(MI, OpName);
+ return Mods && Mods->getImm();
+}
+
+bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
+ const MachineOperand &MO) const {
+ // Literal constants use the constant bus.
+ if (isLiteralConstant(MO))
+ return true;
+
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
+
+ // FLAT_SCR is just an SGPR pair.
+ if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
+ return true;
+
+ // EXEC register uses the constant bus.
+ if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
+ return true;
+
+ // SGPRs use the constant bus
+ if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
+ (!MO.isImplicit() &&
+ (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
+ AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
+ return true;
+ }
+
+ return false;
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI->getOpcode();
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
@@ -557,19 +1131,22 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
// Make sure the register classes are correct
- for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+ for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER: {
- int RegClass = Desc.OpInfo[i].RegClass;
- if (!RI.regClassCanUseImmediate(RegClass) &&
- (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
- ErrInfo = "Expected register, but got immediate";
- return false;
+ if ((MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) &&
+ !isImmOperandLegal(MI, i, MI->getOperand(i))) {
+ ErrInfo = "Illegal immediate value for operand.";
+ return false;
+ }
}
- }
break;
case MCOI::OPERAND_IMMEDIATE:
- if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
+ // Check if this operand is an immediate.
+ // FrameIndex operands will be replaced by immediates, so they are
+ // allowed.
+ if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm() &&
+ !MI->getOperand(i).isFI()) {
ErrInfo = "Expected immediate, but got non-immediate";
return false;
}
@@ -602,27 +1179,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
unsigned SGPRUsed = AMDGPU::NoRegister;
for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-
- // EXEC register uses the constant bus.
- if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
- ++ConstantBusCount;
-
- // SGPRs use the constant bus
- if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
- (!MO.isImplicit() &&
- (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
- AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
- if (SGPRUsed != MO.getReg()) {
+ if (usesConstantBus(MRI, MO)) {
+ if (MO.isReg()) {
+ if (MO.getReg() != SGPRUsed)
++ConstantBusCount;
- SGPRUsed = MO.getReg();
- }
+ SGPRUsed = MO.getReg();
+ } else {
+ ++ConstantBusCount;
}
}
- // Literal constants use the constant bus.
- if (isLiteralConstant(MO))
- ++ConstantBusCount;
}
if (ConstantBusCount > 1) {
ErrInfo = "VOP* instruction uses the constant bus more than once";
@@ -658,11 +1223,9 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Verify misc. restrictions on specific instructions.
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
- MI->dump();
-
- const MachineOperand &Src0 = MI->getOperand(2);
- const MachineOperand &Src1 = MI->getOperand(3);
- const MachineOperand &Src2 = MI->getOperand(4);
+ const MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ const MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ const MachineOperand &Src2 = MI->getOperand(Src2Idx);
if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
if (!compareMachineOp(Src0, Src1) &&
!compareMachineOp(Src0, Src2)) {
@@ -685,10 +1248,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_MOV_B32:
return MI.getOperand(1).isReg() ?
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
- case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
+ case AMDGPU::S_ADD_I32:
+ case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32;
case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
- case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
+ case AMDGPU::S_SUB_I32:
+ case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
+ case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
@@ -757,21 +1323,28 @@ bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
MachineBasicBlock::iterator I = MI;
+ MachineBasicBlock *MBB = MI->getParent();
MachineOperand &MO = MI->getOperand(OpIdx);
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
const TargetRegisterClass *RC = RI.getRegClass(RCID);
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
- if (MO.isReg()) {
+ if (MO.isReg())
Opcode = AMDGPU::COPY;
- } else if (RI.isSGPRClass(RC)) {
+ else if (RI.isSGPRClass(RC))
Opcode = AMDGPU::S_MOV_B32;
- }
+
const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
+ if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
+ VRC = &AMDGPU::VReg_64RegClass;
+ else
+ VRC = &AMDGPU::VReg_32RegClass;
+
unsigned Reg = MRI.createVirtualRegister(VRC);
- BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
- Reg).addOperand(MO);
+ DebugLoc DL = MBB->findDebugLoc(I);
+ BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg)
+ .addOperand(MO);
MO.ChangeToRegister(Reg, false);
}
@@ -791,13 +1364,15 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
// value so we don't need to worry about merging its subreg index with the
// SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
- NewSuperReg)
- .addOperand(SuperReg);
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
+ .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
+
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
+ .addReg(NewSuperReg, 0, SubIdx);
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
- SubReg)
- .addReg(NewSuperReg, 0, SubIdx);
return SubReg;
}
@@ -853,8 +1428,59 @@ unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
return Dst;
}
+bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
+ const MachineOperand *MO) const {
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const MCInstrDesc &InstDesc = get(MI->getOpcode());
+ const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
+ const TargetRegisterClass *DefinedRC =
+ OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
+ if (!MO)
+ MO = &MI->getOperand(OpIdx);
+
+ if (usesConstantBus(MRI, *MO)) {
+ unsigned SGPRUsed =
+ MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (i == OpIdx)
+ continue;
+ if (usesConstantBus(MRI, MI->getOperand(i)) &&
+ MI->getOperand(i).isReg() && MI->getOperand(i).getReg() != SGPRUsed) {
+ return false;
+ }
+ }
+ }
+
+ if (MO->isReg()) {
+ assert(DefinedRC);
+ const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg());
+
+ // In order to be legal, the common sub-class must be equal to the
+ // class of the current operand. For example:
+ //
+ // v_mov_b32 s0 ; Operand defined as vsrc_32
+ // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+ //
+ // s_sendmsg 0, s0 ; Operand defined as m0reg
+ // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+ return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ }
+
+
+ // Handle non-register types that are treated like immediates.
+ assert(MO->isImm() || MO->isFPImm() || MO->isTargetIndex() || MO->isFI());
+
+ if (!DefinedRC) {
+ // This operand expects an immediate.
+ return true;
+ }
+
+ return isImmOperandLegal(MI, OpIdx, *MO);
+}
+
void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
@@ -864,45 +1490,40 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
// Legalize VOP2
if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
- MachineOperand &Src0 = MI->getOperand(Src0Idx);
- MachineOperand &Src1 = MI->getOperand(Src1Idx);
-
- // If the instruction implicitly reads VCC, we can't have any SGPR operands,
- // so move any.
- bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
- if (ReadsVCC && Src0.isReg() &&
- RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
+ // Legalize src0
+ if (!isOperandLegal(MI, Src0Idx))
legalizeOpWithMove(MI, Src0Idx);
- return;
- }
- if (ReadsVCC && Src1.isReg() &&
- RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
- legalizeOpWithMove(MI, Src1Idx);
+ // Legalize src1
+ if (isOperandLegal(MI, Src1Idx))
return;
- }
- // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
- // be the first operand, and there can only be one.
- if (Src1.isImm() || Src1.isFPImm() ||
- (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
- if (MI->isCommutable()) {
- if (commuteInstruction(MI))
- return;
- }
- legalizeOpWithMove(MI, Src1Idx);
+ // Usually src0 of VOP2 instructions allow more types of inputs
+ // than src1, so try to commute the instruction to decrease our
+ // chances of having to insert a MOV instruction to legalize src1.
+ if (MI->isCommutable()) {
+ if (commuteInstruction(MI))
+ // If we are successful in commuting, then we know MI is legal, so
+ // we are done.
+ return;
}
+
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
}
// XXX - Do any VOP3 instructions read VCC?
// Legalize VOP3
if (isVOP3(MI->getOpcode())) {
- int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
- unsigned SGPRReg = AMDGPU::NoRegister;
+ int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
+
+ // Find the one SGPR operand we are allowed to use.
+ unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
if (Idx == -1)
- continue;
+ break;
MachineOperand &MO = MI->getOperand(Idx);
if (MO.isReg()) {
@@ -1002,106 +1623,212 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
// Legalize MUBUF* instructions
// FIXME: If we start using the non-addr64 instructions for compute, we
// may need to legalize them here.
+ int SRsrcIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
+ if (SRsrcIdx != -1) {
+ // We have an MUBUF instruction
+ MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
+ unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
+ if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
+ RI.getRegClass(SRsrcRC))) {
+ // The operands are legal.
+ // FIXME: We may need to legalize operands besided srsrc.
+ return;
+ }
- int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::srsrc);
- int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::vaddr);
- if (SRsrcIdx != -1 && VAddrIdx != -1) {
- const TargetRegisterClass *VAddrRC =
- RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
-
- if(VAddrRC->getSize() == 8 &&
- MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
- // We have a MUBUF instruction that uses a 64-bit vaddr register and
- // srsrc has the incorrect register class. In order to fix this, we
- // need to extract the pointer from the resource descriptor (srsrc),
- // add it to the value of vadd, then store the result in the vaddr
- // operand. Then, we need to set the pointer field of the resource
- // descriptor to zero.
+ MachineBasicBlock &MBB = *MI->getParent();
+ // Extract the the ptr from the resource descriptor.
- MachineBasicBlock &MBB = *MI->getParent();
- MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
- MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
- unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
- unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
-
- // SRsrcPtrLo = srsrc:sub0
- SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
-
- // SRsrcPtrHi = srsrc:sub1
- SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
-
- // VAddrLo = vaddr:sub0
- VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
- &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
-
- // VAddrHi = vaddr:sub1
- VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
- &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
-
- // NewVaddrLo = SRsrcPtrLo + VAddrLo
+ // SRsrcPtrLo = srsrc:sub0
+ unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+ // SRsrcPtrHi = srsrc:sub1
+ unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+ // Create an empty resource descriptor
+ unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+ // Zero64 = 0
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+ Zero64)
+ .addImm(0);
+
+ // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatLo)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+
+ // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ SRsrcFormatHi)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+
+ // NewSRsrc = {Zero64, SRsrcFormat}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
+
+ MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
+ unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ unsigned NewVAddrLo;
+ unsigned NewVAddrHi;
+ if (VAddr) {
+ // This is already an ADDR64 instruction so we need to add the pointer
+ // extracted from the resource descriptor to the current value of VAddr.
+ NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+
+ // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
NewVAddrLo)
.addReg(SRsrcPtrLo)
- .addReg(VAddrLo)
- .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
+ .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
- // NewVaddrHi = SRsrcPtrHi + VAddrHi
+ // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
NewVAddrHi)
.addReg(SRsrcPtrHi)
- .addReg(VAddrHi)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
.addReg(AMDGPU::VCC, RegState::ImplicitDefine)
.addReg(AMDGPU::VCC, RegState::Implicit);
- // NewVaddr = {NewVaddrHi, NewVaddrLo}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewVAddr)
- .addReg(NewVAddrLo)
- .addImm(AMDGPU::sub0)
- .addReg(NewVAddrHi)
- .addImm(AMDGPU::sub1);
+ } else {
+ // This instructions is the _OFFSET variant, so we need to convert it to
+ // ADDR64.
+ MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
+ MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
+ MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
+ assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF "
+ "with non-zero soffset is not implemented");
+ (void)SOffset;
+
+ // Create the new instruction.
+ unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
+ MachineInstr *Addr64 =
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+ .addOperand(*VData)
+ .addOperand(*SRsrc)
+ .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+ // This will be replaced later
+ // with the new value of vaddr.
+ .addOperand(*Offset);
+
+ MI->removeFromParent();
+ MI = Addr64;
+
+ NewVAddrLo = SRsrcPtrLo;
+ NewVAddrHi = SRsrcPtrHi;
+ VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
+ SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
+ }
- // Zero64 = 0
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
- Zero64)
- .addImm(0);
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+ NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
- // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- SRsrcFormatLo)
- .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
- // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
- SRsrcFormatHi)
- .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+ // Update the instruction to use NewVaddr
+ VAddr->setReg(NewVAddr);
+ // Update the instruction to use NewSRsrc
+ SRsrc->setReg(NewSRsrc);
+ }
+}
- // NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
+void SIInstrInfo::splitSMRD(MachineInstr *MI,
+ const TargetRegisterClass *HalfRC,
+ unsigned HalfImmOp, unsigned HalfSGPROp,
+ MachineInstr *&Lo, MachineInstr *&Hi) const {
- // Update the instruction to use NewVaddr
- MI->getOperand(VAddrIdx).setReg(NewVAddr);
- // Update the instruction to use NewSRsrc
- MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned RegLo = MRI.createVirtualRegister(HalfRC);
+ unsigned RegHi = MRI.createVirtualRegister(HalfRC);
+ unsigned HalfSize = HalfRC->getSize();
+ const MachineOperand *OffOp =
+ getNamedOperand(*MI, AMDGPU::OpName::offset);
+ const MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase);
+
+ if (OffOp) {
+ // Handle the _IMM variant
+ unsigned LoOffset = OffOp->getImm();
+ unsigned HiOffset = LoOffset + (HalfSize / 4);
+ Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
+ .addOperand(*SBase)
+ .addImm(LoOffset);
+
+ if (!isUInt<8>(HiOffset)) {
+ unsigned OffsetSGPR =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
+ .addImm(HiOffset << 2); // The immediate offset is in dwords,
+ // but offset in register is in bytes.
+ Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
+ .addOperand(*SBase)
+ .addReg(OffsetSGPR);
+ } else {
+ Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
+ .addOperand(*SBase)
+ .addImm(HiOffset);
}
+ } else {
+ // Handle the _SGPR variant
+ MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
+ Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
+ .addOperand(*SBase)
+ .addOperand(*SOff);
+ unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
+ .addOperand(*SOff)
+ .addImm(HalfSize);
+ Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
+ .addOperand(*SBase)
+ .addReg(OffsetSGPR);
+ }
+
+ unsigned SubLo, SubHi;
+ switch (HalfSize) {
+ case 4:
+ SubLo = AMDGPU::sub0;
+ SubHi = AMDGPU::sub1;
+ break;
+ case 8:
+ SubLo = AMDGPU::sub0_sub1;
+ SubHi = AMDGPU::sub2_sub3;
+ break;
+ case 16:
+ SubLo = AMDGPU::sub0_sub1_sub2_sub3;
+ SubHi = AMDGPU::sub4_sub5_sub6_sub7;
+ break;
+ case 32:
+ SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
+ SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
+ break;
+ default:
+ llvm_unreachable("Unhandled HalfSize");
}
+
+ BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
+ .addOperand(MI->getOperand(0))
+ .addReg(RegLo)
+ .addImm(SubLo)
+ .addReg(RegHi)
+ .addImm(SubHi);
}
void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
@@ -1112,7 +1839,7 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
case AMDGPU::S_LOAD_DWORDX2_IMM:
case AMDGPU::S_LOAD_DWORDX2_SGPR:
case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR:
+ case AMDGPU::S_LOAD_DWORDX4_SGPR: {
unsigned NewOpcode = getVALUOp(*MI);
unsigned RegOffset;
unsigned ImmOffset;
@@ -1159,14 +1886,44 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
.addImm(AMDGPU::sub2)
.addReg(DWord3)
.addImm(AMDGPU::sub3);
- MI->setDesc(get(NewOpcode));
- if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(MI->getOperand(1).getReg());
- } else {
- MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
- }
- MI->getOperand(1).setReg(SRsrc);
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ MI->setDesc(get(NewOpcode));
+ if (MI->getOperand(2).isReg()) {
+ MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ } else {
+ MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ }
+ MI->getOperand(1).setReg(SRsrc);
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+
+ const TargetRegisterClass *NewDstRC =
+ RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+ break;
+ }
+ case AMDGPU::S_LOAD_DWORDX8_IMM:
+ case AMDGPU::S_LOAD_DWORDX8_SGPR: {
+ MachineInstr *Lo, *Hi;
+ splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
+ AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
+ MI->eraseFromParent();
+ moveSMRDToVALU(Lo, MRI);
+ moveSMRDToVALU(Hi, MRI);
+ break;
+ }
+
+ case AMDGPU::S_LOAD_DWORDX16_IMM:
+ case AMDGPU::S_LOAD_DWORDX16_SGPR: {
+ MachineInstr *Lo, *Hi;
+ splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
+ AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
+ MI->eraseFromParent();
+ moveSMRDToVALU(Lo, MRI);
+ moveSMRDToVALU(Hi, MRI);
+ break;
+ }
}
}
@@ -1238,8 +1995,13 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Inst->eraseFromParent();
continue;
+ case AMDGPU::S_BFE_I64: {
+ splitScalar64BitBFE(Worklist, Inst);
+ Inst->eraseFromParent();
+ continue;
+ }
+
case AMDGPU::S_BFE_U64:
- case AMDGPU::S_BFE_I64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
}
@@ -1268,17 +2030,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// We are converting these to a BFE, so we need to add the missing
// operands for the size and offset.
unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
- Inst->addOperand(Inst->getOperand(1));
- Inst->getOperand(1).ChangeToImmediate(0);
- Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(Size));
- // XXX - Other pointless operands. There are 4, but it seems you only need
- // 3 to not hit an assertion later in MCInstLower.
- Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(0));
} else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
// The VALU version adds the second operand to the result, so insert an
// extra 0 operand.
@@ -1297,16 +2051,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
-
Inst->RemoveOperand(2); // Remove old immediate.
- Inst->addOperand(Inst->getOperand(1));
- Inst->getOperand(1).ChangeToImmediate(0);
- Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(Offset));
- Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(BitWidth));
- Inst->addOperand(MachineOperand::CreateImm(0));
- Inst->addOperand(MachineOperand::CreateImm(0));
}
// Update the destination register class.
@@ -1519,6 +2266,67 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
Worklist.push_back(Second);
}
+void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ uint32_t Imm = Inst->getOperand(2).getImm();
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+
+ (void) Offset;
+
+ // Only sext_inreg cases handled.
+ assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 &&
+ BitWidth <= 32 &&
+ Offset == 0 &&
+ "Not implemented");
+
+ if (BitWidth < 32) {
+ unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo)
+ .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0)
+ .addImm(0)
+ .addImm(BitWidth);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
+ .addImm(31)
+ .addReg(MidRegLo);
+
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
+ .addReg(MidRegLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(MidRegHi)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ return;
+ }
+
+ MachineOperand &Src = Inst->getOperand(1);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
+ .addImm(31)
+ .addReg(Src.getReg(), 0, AMDGPU::sub0);
+
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
+ .addReg(Src.getReg(), 0, AMDGPU::sub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(TmpReg)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+}
+
void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
MachineInstr *Inst) const {
// Add the implict and explicit register definitions.
@@ -1537,6 +2345,74 @@ void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
}
}
+unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
+ int OpIndices[3]) const {
+ const MCInstrDesc &Desc = get(MI->getOpcode());
+
+ // Find the one SGPR operand we are allowed to use.
+ unsigned SGPRReg = AMDGPU::NoRegister;
+
+ // First we need to consider the instruction's operand requirements before
+ // legalizing. Some operands are required to be SGPRs, such as implicit uses
+ // of VCC, but we are still bound by the constant bus requirement to only use
+ // one.
+ //
+ // If the operand's class is an SGPR, we can never move it.
+
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ // We only care about reads.
+ if (MO.isDef())
+ continue;
+
+ if (MO.getReg() == AMDGPU::VCC)
+ return AMDGPU::VCC;
+
+ if (MO.getReg() == AMDGPU::FLAT_SCR)
+ return AMDGPU::FLAT_SCR;
+ }
+
+ unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = OpIndices[i];
+ if (Idx == -1)
+ break;
+
+ const MachineOperand &MO = MI->getOperand(Idx);
+ if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
+ SGPRReg = MO.getReg();
+
+ if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ UsedSGPRs[i] = MO.getReg();
+ }
+
+ if (SGPRReg != AMDGPU::NoRegister)
+ return SGPRReg;
+
+ // We don't have a required SGPR operand, so we have a bit more freedom in
+ // selecting operands to move.
+
+ // Try to select the most used SGPR. If an SGPR is equal to one of the
+ // others, we choose that.
+ //
+ // e.g.
+ // V_FMA_F32 v0, s0, s0, s0 -> No moves
+ // V_FMA_F32 v0, s0, s1, s0 -> Move s1
+
+ if (UsedSGPRs[0] != AMDGPU::NoRegister) {
+ if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
+ SGPRReg = UsedSGPRs[0];
+ }
+
+ if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
+ if (UsedSGPRs[1] == UsedSGPRs[2])
+ SGPRReg = UsedSGPRs[1];
+ }
+
+ return SGPRReg;
+}
+
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@@ -1600,3 +2476,12 @@ void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}
+
+MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
+ unsigned OperandName) const {
+ int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
+ if (Idx == -1)
+ return nullptr;
+
+ return &MI.getOperand(Idx);
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 4c204d8..3bdbc9b 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
-#ifndef SIINSTRINFO_H
-#define SIINSTRINFO_H
+#ifndef LLVM_LIB_TARGET_R600_SIINSTRINFO_H
+#define LLVM_LIB_TARGET_R600_SIINSTRINFO_H
#include "AMDGPUInstrInfo.h"
#include "SIRegisterInfo.h"
@@ -52,9 +52,16 @@ private:
void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst) const;
+ void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const;
void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
+ bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
+ MachineInstr *MIb) const;
+
+ unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
+
public:
explicit SIInstrInfo(const AMDGPUSubtarget &st);
@@ -62,11 +69,30 @@ public:
return RI;
}
+ bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const override;
+
+ bool getLdStBaseRegImmOfs(MachineInstr *LdSt,
+ unsigned &BaseReg, unsigned &Offset,
+ const TargetRegisterInfo *TRI) const final;
+
+ bool shouldClusterLoads(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const final;
+
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
+ unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ RegScavenger *RS,
+ unsigned TmpReg,
+ unsigned Offset,
+ unsigned Size) const;
+
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -79,19 +105,22 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
unsigned commuteOpcode(unsigned Opcode) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI=false) const override;
+ bool NewMI = false) const override;
+ bool findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
bool isTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA = nullptr) const;
- unsigned getIEQOpcode() const override {
- llvm_unreachable("Unimplemented");
- }
+ bool areMemAccessesTriviallyDisjoint(
+ MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr) const override;
MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@@ -100,16 +129,42 @@ public:
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
bool isDS(uint16_t Opcode) const;
- int isMIMG(uint16_t Opcode) const;
- int isSMRD(uint16_t Opcode) const;
+ bool isMIMG(uint16_t Opcode) const;
+ bool isSMRD(uint16_t Opcode) const;
+ bool isMUBUF(uint16_t Opcode) const;
+ bool isMTBUF(uint16_t Opcode) const;
+ bool isFLAT(uint16_t Opcode) const;
bool isVOP1(uint16_t Opcode) const;
bool isVOP2(uint16_t Opcode) const;
bool isVOP3(uint16_t Opcode) const;
bool isVOPC(uint16_t Opcode) const;
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO) const;
bool isLiteralConstant(const MachineOperand &MO) const;
+ bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
+ const MachineOperand &MO) const;
+
+ /// \brief Return true if the given offset Size in bytes can be folded into
+ /// the immediate offsets of a memory instruction for the given address space.
+ static bool canFoldOffset(unsigned OffsetSize, unsigned AS) LLVM_READNONE;
+
+ /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
+ /// This function will return false if you pass it a 32-bit instruction.
+ bool hasVALU32BitEncoding(unsigned Opcode) const;
+
+ /// \brief Returns true if this operand uses the constant bus.
+ bool usesConstantBus(const MachineRegisterInfo &MRI,
+ const MachineOperand &MO) const;
+
+ /// \brief Return true if this instruction has any modifiers.
+ /// e.g. src[012]_mod, omod, clamp.
+ bool hasModifiers(unsigned Opcode) const;
+
+ bool hasModifiersSet(const MachineInstr &MI,
+ unsigned OpName) const;
+
bool verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const override;
@@ -141,10 +196,21 @@ public:
/// instead of MOV.
void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
+ /// \brief Check if \p MO is a legal operand if it was the \p OpIdx Operand
+ /// for \p MI.
+ bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
+ const MachineOperand *MO = nullptr) const;
+
/// \brief Legalize all operands in this instruction. This function may
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr *MI) const;
+ /// \brief Split an SMRD instruction into two smaller loads of half the
+ // size storing the results in \p Lo and \p Hi.
+ void splitSMRD(MachineInstr *MI, const TargetRegisterClass *HalfRC,
+ unsigned HalfImmOp, unsigned HalfSGPROp,
+ MachineInstr *&Lo, MachineInstr *&Hi) const;
+
void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
/// \brief Replace this instruction's opcode with the equivalent VALU
@@ -175,29 +241,52 @@ public:
unsigned SavReg, unsigned IndexReg) const;
void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
+
+ /// \brief Returns the operand named \p Op. If \p MI does not have an
+ /// operand named \c Op, this function returns nullptr.
+ MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
+
+ const MachineOperand *getNamedOperand(const MachineInstr &MI,
+ unsigned OpName) const {
+ return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
+ }
};
namespace AMDGPU {
int getVOPe64(uint16_t Opcode);
+ int getVOPe32(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
int getMCOpcode(uint16_t Opcode, unsigned Gen);
+ int getAddr64Inst(uint16_t Opcode);
+ int getAtomicRetOp(uint16_t Opcode);
+ int getAtomicNoRetOp(uint16_t Opcode);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
-
+ const uint64_t RSRC_TID_ENABLE = 1LL << 55;
} // End namespace AMDGPU
-} // End namespace llvm
+namespace SI {
+namespace KernelInputOffsets {
+
+/// Offsets in bytes from the start of the input buffer
+enum Offsets {
+ NGROUPS_X = 0,
+ NGROUPS_Y = 4,
+ NGROUPS_Z = 8,
+ GLOBAL_SIZE_X = 12,
+ GLOBAL_SIZE_Y = 16,
+ GLOBAL_SIZE_Z = 20,
+ LOCAL_SIZE_X = 24,
+ LOCAL_SIZE_Y = 28,
+ LOCAL_SIZE_Z = 32
+};
+
+} // End namespace KernelInputOffsets
+} // End namespace SI
-namespace SIInstrFlags {
- enum Flags {
- // First 4 bits are the instruction encoding
- VM_CNT = 1 << 0,
- EXP_CNT = 1 << 1,
- LGKM_CNT = 1 << 2
- };
-}
+} // End namespace llvm
-#endif //SIINSTRINFO_H
+#endif
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 774c9d1..713e84e 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -7,6 +7,32 @@
//
//===----------------------------------------------------------------------===//
+class vop {
+ field bits<9> SI3;
+}
+
+class vopc <bits<8> si> : vop {
+ field bits<8> SI = si;
+
+ field bits<9> SI3 = {0, si{7-0}};
+}
+
+class vop1 <bits<8> si> : vop {
+ field bits<8> SI = si;
+
+ field bits<9> SI3 = {1, 1, si{6-0}};
+}
+
+class vop2 <bits<6> si> : vop {
+ field bits<6> SI = si;
+
+ field bits<9> SI3 = {1, 0, 0, si{5-0}};
+}
+
+class vop3 <bits<9> si> : vop {
+ field bits<9> SI3 = si;
+}
+
// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
// in AMDGPUMCInstLower.h
def SISubtarget {
@@ -57,6 +83,10 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
+def SIconstdata_ptr : SDNode<
+ "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
+>;
+
// Transformation function, extract the lower 32bit of a 64bit immediate
def LO32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
@@ -132,7 +162,7 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
return false;
}
const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ static_cast<const SIRegisterInfo*>(TM.getSubtargetImpl()->getRegisterInfo());
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
@@ -142,15 +172,81 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
return false;
}]>;
+//===----------------------------------------------------------------------===//
+// Custom Operands
+//===----------------------------------------------------------------------===//
+
def FRAMEri32 : Operand<iPTR> {
let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
}
+def sopp_brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getSOPPBrEncoding";
+ let OperandType = "OPERAND_PCREL";
+}
+
+include "SIInstrFormats.td"
+
+let OperandType = "OPERAND_IMMEDIATE" in {
+
+def offen : Operand<i1> {
+ let PrintMethod = "printOffen";
+}
+def idxen : Operand<i1> {
+ let PrintMethod = "printIdxen";
+}
+def addr64 : Operand<i1> {
+ let PrintMethod = "printAddr64";
+}
+def mbuf_offset : Operand<i16> {
+ let PrintMethod = "printMBUFOffset";
+}
+def ds_offset : Operand<i16> {
+ let PrintMethod = "printDSOffset";
+}
+def ds_offset0 : Operand<i8> {
+ let PrintMethod = "printDSOffset0";
+}
+def ds_offset1 : Operand<i8> {
+ let PrintMethod = "printDSOffset1";
+}
+def glc : Operand <i1> {
+ let PrintMethod = "printGLC";
+}
+def slc : Operand <i1> {
+ let PrintMethod = "printSLC";
+}
+def tfe : Operand <i1> {
+ let PrintMethod = "printTFE";
+}
+
+def omod : Operand <i32> {
+ let PrintMethod = "printOModSI";
+}
+
+def ClampMod : Operand <i1> {
+ let PrintMethod = "printClampSI";
+}
+
+} // End OperandType = "OPERAND_IMMEDIATE"
+
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
+def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
+def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
+
+def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
+def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+
+def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
+def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
//===----------------------------------------------------------------------===//
// SI assembler operands
@@ -159,9 +255,20 @@ def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
def SIOperand {
int ZERO = 0x80;
int VCC = 0x6A;
+ int FLAT_SCR = 0x68;
}
-include "SIInstrFormats.td"
+def SRCMODS {
+ int NONE = 0;
+}
+
+def DSTCLAMP {
+ int NONE = 0;
+}
+
+def DSTOMOD {
+ int NONE = 0;
+}
//===----------------------------------------------------------------------===//
//
@@ -179,6 +286,35 @@ include "SIInstrFormats.td"
//
//===----------------------------------------------------------------------===//
+class SIMCInstr <string pseudo, int subtarget> {
+ string PseudoInstr = pseudo;
+ int Subtarget = subtarget;
+}
+
+//===----------------------------------------------------------------------===//
+// EXP classes
+//===----------------------------------------------------------------------===//
+
+class EXPCommon : InstSI<
+ (outs),
+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ [] > {
+
+ let EXP_CNT = 1;
+ let Uses = [EXEC];
+}
+
+multiclass EXP_m {
+
+ let isPseudo = 1 in {
+ def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
+ }
+
+ def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe;
+}
+
//===----------------------------------------------------------------------===//
// Scalar classes
//===----------------------------------------------------------------------===//
@@ -204,11 +340,21 @@ class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
opName#" $dst, $src0, $src1", pattern
>;
+class SOP2_SELECT_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
+ opName#" $dst, $src0, $src1 [$scc]", pattern
+>;
+
class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
opName#" $dst, $src0, $src1", pattern
>;
+class SOP2_64_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
@@ -227,27 +373,52 @@ class SOPC_64<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_64, i64, opName, cond>;
class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
- op, (outs SReg_32:$dst), (ins i16imm:$src0),
+ op, (outs SReg_32:$dst), (ins u16imm:$src0),
opName#" $dst, $src0", pattern
>;
class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
- op, (outs SReg_64:$dst), (ins i16imm:$src0),
+ op, (outs SReg_64:$dst), (ins u16imm:$src0),
opName#" $dst, $src0", pattern
>;
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+//===----------------------------------------------------------------------===//
+// SMRD classes
+//===----------------------------------------------------------------------===//
+
+class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ SMRD <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
+ string asm> :
+ SMRD <outs, ins, asm, []>,
+ SMRDe <op, imm>,
+ SIMCInstr<opName, SISubtarget.SI>;
+
+multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
+ string asm, list<dag> pattern> {
+
+ def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
+
+ def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
+
+}
+
+multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
RegisterClass dstClass> {
- def _IMM : SMRD <
- op, 1, (outs dstClass:$dst),
+ defm _IMM : SMRD_m <
+ op, opName#"_IMM", 1, (outs dstClass:$dst),
(ins baseClass:$sbase, u32imm:$offset),
- asm#" $dst, $sbase, $offset", []
+ opName#" $dst, $sbase, $offset", []
>;
- def _SGPR : SMRD <
- op, 0, (outs dstClass:$dst),
+ defm _SGPR : SMRD_m <
+ op, opName#"_SGPR", 0, (outs dstClass:$dst),
(ins baseClass:$sbase, SReg_32:$soff),
- asm#" $dst, $sbase, $soff", []
+ opName#" $dst, $sbase, $soff", []
>;
}
@@ -255,6 +426,197 @@ multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
// Vector ALU classes
//===----------------------------------------------------------------------===//
+// This must always be right before the operand being input modified.
+def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
+ let PrintMethod = "printOperandAndMods";
+}
+def InputModsNoDefault : Operand <i32> {
+ let PrintMethod = "printOperandAndMods";
+}
+
+class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+ int ret =
+ !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
+ !if (!eq(Src2.Value, untyped.Value), 2, // VOP2
+ 3)); // VOP3
+}
+
+// Returns the register class to use for the destination of VOP[123C]
+// instructions for the given VT.
+class getVALUDstForVT<ValueType VT> {
+ RegisterClass ret = !if(!eq(VT.Size, 32), VReg_32, VReg_64);
+}
+
+// Returns the register class to use for source 0 of VOP[12C]
+// instructions for the given VT.
+class getVOPSrc0ForVT<ValueType VT> {
+ RegisterClass ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+}
+
+// Returns the register class to use for source 1 of VOP[12C] for the
+// given VT.
+class getVOPSrc1ForVT<ValueType VT> {
+ RegisterClass ret = !if(!eq(VT.Size, 32), VReg_32, VReg_64);
+}
+
+// Returns the register classes for the source arguments of a VOP[12C]
+// instruction for the given SrcVTs.
+class getInRC32 <list<ValueType> SrcVT> {
+ list<RegisterClass> ret = [
+ getVOPSrc0ForVT<SrcVT[0]>.ret,
+ getVOPSrc1ForVT<SrcVT[1]>.ret
+ ];
+}
+
+// Returns the register class to use for sources of VOP3 instructions for the
+// given VT.
+class getVOP3SrcForVT<ValueType VT> {
+ RegisterClass ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
+}
+
+// Returns the register classes for the source arguments of a VOP3
+// instruction for the given SrcVTs.
+class getInRC64 <list<ValueType> SrcVT> {
+ list<RegisterClass> ret = [
+ getVOP3SrcForVT<SrcVT[0]>.ret,
+ getVOP3SrcForVT<SrcVT[1]>.ret,
+ getVOP3SrcForVT<SrcVT[2]>.ret
+ ];
+}
+
+// Returns 1 if the source arguments have modifiers, 0 if they do not.
+class hasModifiers<ValueType SrcVT> {
+ bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
+ !if(!eq(SrcVT.Value, f64.Value), 1, 0));
+}
+
+// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
+class getIns32 <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
+ dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
+ !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
+ (ins)));
+}
+
+// Returns the input arguments for VOP3 instructions for the given SrcVT.
+class getIns64 <RegisterClass Src0RC, RegisterClass Src1RC,
+ RegisterClass Src2RC, int NumSrcArgs,
+ bit HasModifiers> {
+
+ dag ret =
+ !if (!eq(NumSrcArgs, 1),
+ !if (!eq(HasModifiers, 1),
+ // VOP1 with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ ClampMod:$clamp, omod:$omod)
+ /* else */,
+ // VOP1 without modifiers
+ (ins Src0RC:$src0)
+ /* endif */ ),
+ !if (!eq(NumSrcArgs, 2),
+ !if (!eq(HasModifiers, 1),
+ // VOP 2 with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+ ClampMod:$clamp, omod:$omod)
+ /* else */,
+ // VOP2 without modifiers
+ (ins Src0RC:$src0, Src1RC:$src1)
+ /* endif */ )
+ /* NumSrcArgs == 3 */,
+ !if (!eq(HasModifiers, 1),
+ // VOP3 with modifiers
+ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+ InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+ InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
+ ClampMod:$clamp, omod:$omod)
+ /* else */,
+ // VOP3 without modifiers
+ (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
+ /* endif */ )));
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP[12C]
+// instruction. This does not add the _e32 suffix, so it can be reused
+// by getAsm64.
+class getAsm32 <int NumSrcArgs> {
+ string src1 = ", $src1";
+ string src2 = ", $src2";
+ string ret = " $dst, $src0"#
+ !if(!eq(NumSrcArgs, 1), "", src1)#
+ !if(!eq(NumSrcArgs, 3), src2, "");
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP3
+// instruction.
+class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+ string src0 = "$src0_modifiers,";
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+ string ret =
+ !if(!eq(HasModifiers, 0),
+ getAsm32<NumSrcArgs>.ret,
+ " $dst, "#src0#src1#src2#"$clamp"#"$omod");
+}
+
+
+class VOPProfile <list<ValueType> _ArgVT> {
+
+ field list<ValueType> ArgVT = _ArgVT;
+
+ field ValueType DstVT = ArgVT[0];
+ field ValueType Src0VT = ArgVT[1];
+ field ValueType Src1VT = ArgVT[2];
+ field ValueType Src2VT = ArgVT[3];
+ field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterClass Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
+ field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
+ field RegisterClass Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
+ field RegisterClass Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
+ field RegisterClass Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
+
+ field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+ field bit HasModifiers = hasModifiers<Src0VT>.ret;
+
+ field dag Outs = (outs DstRC:$dst);
+
+ field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
+ field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+ HasModifiers>.ret;
+
+ field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret;
+ field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+}
+
+def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
+def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
+def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
+def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
+def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
+def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
+def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
+def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
+def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
+
+def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
+def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
+def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
+def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
+def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+ let Src0RC32 = VCSrc_32;
+}
+def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
+def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+
+def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
+def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
+def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
+def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
+
+
class VOP <string opName> {
string OpName = opName;
}
@@ -264,197 +626,310 @@ class VOP2_REV <string revOp, bit isOrig> {
bit IsOrig = isOrig;
}
-class SIMCInstr <string pseudo, int subtarget> {
- string PseudoInstr = pseudo;
- int Subtarget = subtarget;
+class AtomicNoRet <string noRetOp, bit isRet> {
+ string NoRetOp = noRetOp;
+ bit IsRet = isRet;
+}
+
+class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+ VOP1Common <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
}
-multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
+multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
+ def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def "" : VOP3Common <outs, ins, "", pattern>, VOP <opName>,
- SIMCInstr<OpName, SISubtarget.NONE> {
- let isPseudo = 1;
- }
+ def _si : VOP1<op.SI, outs, ins, asm, []>,
+ SIMCInstr <opName, SISubtarget.SI>;
+}
- def _si : VOP3 <op, outs, ins, asm, []>, SIMCInstr<opName, SISubtarget.SI>;
+class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
+ bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
+ bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
+ bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ? ,0) ,0);
+ bits<2> omod = !if(HasModifiers, ?, 0);
+ bits<1> clamp = !if(HasModifiers, ?, 0);
+ bits<9> src1 = !if(HasSrc1, ?, 0);
+ bits<9> src2 = !if(HasSrc2, ?, 0);
}
-// This must always be right before the operand being input modified.
-def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
- let PrintMethod = "printOperandAndMods";
+class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+ VOP3Common <outs, ins, "", pattern>,
+ VOP <opName>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
}
-multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
- string opName, list<dag> pattern> {
+class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
+ VOP3 <op, outs, ins, asm, []>,
+ SIMCInstr<opName, SISubtarget.SI>;
- def _e32 : VOP1 <
- op, (outs drc:$dst), (ins src:$src0),
- opName#"_e32 $dst, $src0", pattern
- >, VOP <opName>;
+multiclass VOP3_m <vop3 op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName, int NumSrcArgs, bit HasMods = 1> {
+
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
+ !if(!eq(NumSrcArgs, 2), 0, 1),
+ HasMods>;
- def _e64 : VOP3 <
- {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- (outs drc:$dst),
- (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
- opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", []
- >, VOP <opName> {
- let src1 = SIOperand.ZERO;
- let src2 = SIOperand.ZERO;
- }
}
-multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, bit HasMods = 1> {
-multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<0, 0, HasMods>;
+}
-multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
- : VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
+multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit UseFullOp = 0> {
-multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern, string revOp> {
- def _e32 : VOP2 <
- op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
- opName#"_e32 $dst, $src0, $src1", pattern
- >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
- def _e64 : VOP3 <
- {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- (outs vrc:$dst),
- (ins InputMods:$src0_modifiers, arc:$src0,
- InputMods:$src1_modifiers, arc:$src1,
- i32imm:$clamp, i32imm:$omod),
- opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
- >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
- let src2 = SIOperand.ZERO;
- }
+ def _si : VOP3_Real_si <op.SI3,
+ outs, ins, asm, opName>,
+ VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>,
+ VOP3DisableFields<1, 0, HasMods>;
}
-multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
- string revOp = opName>
- : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
-
-multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
- string revOp = opName>
- : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
-
-multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
- RegisterClass src0_rc, string revOp = opName> {
-
- def _e32 : VOP2 <
- op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
- opName#"_e32 $dst, $src0, $src1", pattern
- >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _e64 : VOP3b <
- {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- (outs VReg_32:$dst),
- (ins InputMods: $src0_modifiers, VSrc_32:$src0,
- InputMods:$src1_modifiers, VSrc_32:$src1,
- i32imm:$clamp, i32imm:$omod),
- opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
- >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
- let src2 = SIOperand.ZERO;
- /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
- can write it into any SGPR. We currently don't use the carry out,
- so for now hardcode it to VCC as well */
- let sdst = SIOperand.VCC;
- }
+multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit UseFullOp = 0> {
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+ // The VOP2 variant puts the carry out into VCC, the VOP3 variant
+ // can write it into any SGPR. We currently don't use the carry out,
+ // so for now hardcode it to VCC as well.
+ let sdst = SIOperand.VCC, Defs = [VCC] in {
+ def _si : VOP3b <op.SI3, outs, ins, asm, pattern>,
+ VOP3DisableFields<1, 0, HasMods>,
+ SIMCInstr<opName, SISubtarget.SI>,
+ VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>;
+ } // End sdst = SIOperand.VCC, Defs = [VCC]
}
-multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
- def _e32 : VOPC <
- op, (ins arc:$src0, vrc:$src1),
- opName#"_e32 $dst, $src0, $src1", []
- >, VOP <opName> {
- let Defs = !if(defExec, [VCC, EXEC], [VCC]);
- }
+multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName,
+ bit HasMods, bit defExec> {
+
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
- def _e64 : VOP3 <
- {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- (outs SReg_64:$dst),
- (ins InputMods:$src0_modifiers, arc:$src0,
- InputMods:$src1_modifiers, arc:$src1,
- InstFlag:$clamp, InstFlag:$omod),
- opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod",
- !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
- [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
- )
- >, VOP <opName> {
+ def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+ VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
- let src2 = SIOperand.ZERO;
- let src2_modifiers = 0;
}
}
-multiclass VOPC_32 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
+multiclass VOP1_Helper <vop1 op, string opName, dag outs,
+ dag ins32, string asm32, list<dag> pat32,
+ dag ins64, string asm64, list<dag> pat64,
+ bit HasMods> {
-multiclass VOPC_64 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+ def _e32 : VOP1 <op.SI, outs, ins32, opName#asm32, pat32>, VOP<opName>;
-multiclass VOPCX_32 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
+ defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+}
+
+multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag> : VOP1_Helper <
+ op, opName, P.Outs,
+ P.Ins32, P.Asm32, [],
+ P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+ i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
+ P.HasModifiers
+>;
+
+class VOP2_e32 <bits<6> op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern, string revOp> :
+ VOP2 <op, outs, ins, opName#asm, pattern>,
+ VOP <opName>,
+ VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-multiclass VOPCX_64 <bits<8> op, string opName,
- ValueType vt = untyped, PatLeaf cond = COND_NULL>
- : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
+multiclass VOP2_Helper <vop2 op, string opName, dag outs,
+ dag ins32, string asm32, list<dag> pat32,
+ dag ins64, string asm64, list<dag> pat64,
+ string revOp, bit HasMods> {
+ def _e32 : VOP2_e32 <op.SI, opName, outs, ins32, asm32, pat32, revOp>;
-multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
- op, (outs VReg_32:$dst),
- (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
- VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2,
- InstFlag:$clamp, InstFlag:$omod),
- opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
+ defm _e64 : VOP3_2_m <op,
+ outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ >;
+}
+
+multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName> : VOP2_Helper <
+ op, opName, P.Outs,
+ P.Ins32, P.Asm32, [],
+ P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+ revOp, P.HasModifiers
>;
-class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
- op, (outs VReg_64:$dst),
- (ins VSrc_64:$src0, VSrc_32:$src1),
- opName#" $dst, $src0, $src1", pattern
->, VOP <opName> {
+multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
+ dag ins32, string asm32, list<dag> pat32,
+ dag ins64, string asm64, list<dag> pat64,
+ string revOp, bit HasMods> {
- let src2 = SIOperand.ZERO;
- let src0_modifiers = 0;
- let clamp = 0;
- let omod = 0;
+ def _e32 : VOP2_e32 <op.SI, opName, outs, ins32, asm32, pat32, revOp>;
+
+ defm _e64 : VOP3b_2_m <op,
+ outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ >;
}
-class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
- op, (outs VReg_64:$dst),
- (ins InputMods:$src0_modifiers, VSrc_64:$src0,
- InputMods:$src1_modifiers, VSrc_64:$src1,
- InputMods:$src2_modifiers, VSrc_64:$src2,
- InstFlag:$clamp, InstFlag:$omod),
- opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
->, VOP <opName>;
+multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag,
+ string revOp = opName> : VOP2b_Helper <
+ op, opName, P.Outs,
+ P.Ins32, P.Asm32, [],
+ P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+ revOp, P.HasModifiers
+>;
+
+multiclass VOPC_Helper <vopc op, string opName,
+ dag ins32, string asm32, list<dag> pat32,
+ dag out64, dag ins64, string asm64, list<dag> pat64,
+ bit HasMods, bit DefExec> {
+ def _e32 : VOPC <op.SI, ins32, opName#asm32, pat32>, VOP <opName> {
+ let Defs = !if(DefExec, [EXEC], []);
+ }
+
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64, opName,
+ HasMods, DefExec>;
+}
+
+multiclass VOPCInst <vopc op, string opName,
+ VOPProfile P, PatLeaf cond = COND_NULL,
+ bit DefExec = 0> : VOPC_Helper <
+ op, opName,
+ P.Ins32, P.Asm32, [],
+ (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ !if(P.HasModifiers,
+ [(set i1:$dst,
+ (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ cond))],
+ [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
+ P.HasModifiers, DefExec
+>;
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
-class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> : VOP3 <
- op, (outs vrc:$dst0, SReg_64:$dst1),
- (ins arc:$src0, arc:$src1, arc:$src2,
- InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
->, VOP <opName>;
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
-class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
+
+
+multiclass VOPCX <vopc op, string opName, VOPProfile P,
+ PatLeaf cond = COND_NULL>
+ : VOPCInst <op, opName, P, cond, 1>;
+
+multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+
+multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_F64_F64_F64, cond>;
+
+multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+
+multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
+ VOPCX <op, opName, VOP_I64_I64_I64, cond>;
+
+multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
+ list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
+ op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+>;
+
+multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
+ SDPatternOperator node = null_frag> : VOP3_Helper <
+ op, opName, P.Outs, P.Ins64, P.Asm64,
+ !if(!eq(P.NumSrcArgs, 3),
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
+ P.Src2VT:$src2))]),
+ !if(!eq(P.NumSrcArgs, 2),
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
+ /* P.NumSrcArgs == 1 */,
+ !if(P.HasModifiers,
+ [(set P.DstVT:$dst,
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+ i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
+ P.NumSrcArgs, P.HasModifiers
+>;
+
+multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> :
+ VOP3b_2_m <
+ op, (outs vrc:$vdst, SReg_64:$sdst),
+ (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
+ InputModsNoDefault:$src1_modifiers, arc:$src1,
+ InputModsNoDefault:$src2_modifiers, arc:$src2,
+ ClampMod:$clamp, omod:$omod),
+ opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
+ opName, opName, 1, 1
+>;
+
+multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
+multiclass VOP3b_32 <vop3 op, string opName, list<dag> pattern> :
VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
+class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
+ (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
+ (Inst i32:$src0_modifiers, P.Src0VT:$src0,
+ i32:$src1_modifiers, P.Src1VT:$src1,
+ i32:$src2_modifiers, P.Src2VT:$src2,
+ i1:$clamp,
+ i32:$omod)>;
+
//===----------------------------------------------------------------------===//
// Vector I/O classes
//===----------------------------------------------------------------------===//
@@ -466,13 +941,15 @@ class DS_1A <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
// Single load interpret the 2 i8imm operands as a single i16 offset.
let offset0 = offset{7-0};
let offset1 = offset{15-8};
+
+ let hasSideEffects = 0;
}
class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
- asm#" $vdst, $addr, $offset, [M0]",
+ (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset),
+ asm#" $vdst, $addr"#"$offset"#" [M0]",
[]> {
let data0 = 0;
let data1 = 0;
@@ -483,20 +960,21 @@ class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, u8imm:$offset0, u8imm:$offset1),
- asm#" $gds, $vdst, $addr, $offset0, $offset1, [M0]",
+ (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1),
+ asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
[]> {
let data0 = 0;
let data1 = 0;
let mayLoad = 1;
let mayStore = 0;
+ let hasSideEffects = 0;
}
class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u16imm:$offset),
- asm#" $addr, $data0, $offset [M0]",
+ (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset),
+ asm#" $addr, $data0"#"$offset"#" [M0]",
[]> {
let data1 = 0;
let mayStore = 1;
@@ -504,76 +982,204 @@ class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
let vdst = 0;
}
-class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
+class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u8imm:$offset0, u8imm:$offset1),
- asm#" $addr, $data0, $data1, $offset0, $offset1 [M0]",
+ (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, regClass:$data1,
+ ds_offset0:$offset0, ds_offset1:$offset1),
+ asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
[]> {
let mayStore = 1;
let mayLoad = 0;
+ let hasSideEffects = 0;
let vdst = 0;
}
// 1 address, 1 data.
-class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
- asm#" $vdst, $addr, $data0, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
+ asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
+ AtomicNoRet<noRetOp, 1> {
let data1 = 0;
let mayStore = 1;
let mayLoad = 1;
+
+ let hasPostISelHook = 1; // Adjusted to no return version.
}
// 1 address, 2 data.
-class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
- asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
+ asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
+ []>,
+ AtomicNoRet<noRetOp, 1> {
let mayStore = 1;
let mayLoad = 1;
+ let hasPostISelHook = 1; // Adjusted to no return version.
}
// 1 address, 2 data.
-class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
- asm#" $addr, $data0, $data1, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
+ asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
+ []>,
+ AtomicNoRet<noRetOp, 0> {
let mayStore = 1;
let mayLoad = 1;
}
// 1 address, 1 data.
-class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
- asm#" $addr, $data0, $offset, [M0]",
- []> {
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
+ asm#" $addr, $data0"#"$offset"#" [M0]",
+ []>,
+ AtomicNoRet<noRetOp, 0> {
let data1 = 0;
let mayStore = 1;
let mayLoad = 1;
}
-class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
- op,
- (outs),
+//===----------------------------------------------------------------------===//
+// MTBUF classes
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ MTBUF <outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+}
+
+class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
+ string asm> :
+ MTBUF <outs, ins, asm, []>,
+ MTBUFe <op>,
+ SIMCInstr<opName, SISubtarget.SI>;
+
+multiclass MTBUF_m <bits<3> op, string opName, dag outs, dag ins, string asm,
+ list<dag> pattern> {
+
+ def "" : MTBUF_Pseudo <opName, outs, ins, pattern>;
+
+ def _si : MTBUF_Real_si <op, opName, outs, ins, asm>;
+
+}
+
+let mayStore = 1, mayLoad = 0 in {
+
+multiclass MTBUF_Store_Helper <bits<3> op, string opName,
+ RegisterClass regClass> : MTBUF_m <
+ op, opName, (outs),
(ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
- asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
- #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
- []> {
- let mayStore = 1;
- let mayLoad = 0;
+ opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
+>;
+
+} // mayStore = 1, mayLoad = 0
+
+let mayLoad = 1, mayStore = 0 in {
+
+multiclass MTBUF_Load_Helper <bits<3> op, string opName,
+ RegisterClass regClass> : MTBUF_m <
+ op, opName, (outs regClass:$dst),
+ (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
+ i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
+>;
+
+} // mayLoad = 1, mayStore = 0
+
+class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
+
+ bit IsAddr64 = is_addr64;
+ string OpName = NAME # suffix;
+}
+
+class MUBUFAtomicAddr64 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
+ : MUBUF <op, outs, ins, asm, pattern> {
+
+ let offen = 0;
+ let idxen = 0;
+ let addr64 = 1;
+ let tfe = 0;
+ let lds = 0;
+ let soffset = 128;
+}
+
+class MUBUFAtomicOffset <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern>
+ : MUBUF <op, outs, ins, asm, pattern> {
+
+ let offen = 0;
+ let idxen = 0;
+ let addr64 = 0;
+ let tfe = 0;
+ let lds = 0;
+ let vaddr = 0;
+}
+
+multiclass MUBUF_Atomic <bits<7> op, string name, RegisterClass rc,
+ ValueType vt, SDPatternOperator atomic> {
+
+ let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in {
+
+ // No return variants
+ let glc = 0 in {
+
+ def _ADDR64 : MUBUFAtomicAddr64 <
+ op, (outs),
+ (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", []
+ >, MUBUFAddr64Table<1>, AtomicNoRet<NAME#"_ADDR64", 0>;
+
+ def _OFFSET : MUBUFAtomicOffset <
+ op, (outs),
+ (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
+ SSrc_32:$soffset, slc:$slc),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", []
+ >, MUBUFAddr64Table<0>, AtomicNoRet<NAME#"_OFFSET", 0>;
+ } // glc = 0
+
+ // Variant that return values
+ let glc = 1, Constraints = "$vdata = $vdata_in",
+ DisableEncoding = "$vdata_in" in {
+
+ def _RTN_ADDR64 : MUBUFAtomicAddr64 <
+ op, (outs rc:$vdata),
+ (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc",
+ [(set vt:$vdata,
+ (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset,
+ i1:$slc), vt:$vdata_in))]
+ >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet<NAME#"_ADDR64", 1>;
+
+ def _RTN_OFFSET : MUBUFAtomicOffset <
+ op, (outs rc:$vdata),
+ (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
+ SSrc_32:$soffset, slc:$slc),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
+ [(set vt:$vdata,
+ (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
+ i1:$slc), vt:$vdata_in))]
+ >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet<NAME#"_OFFSET", 1>;
+
+ } // glc = 1
+
+ } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
}
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
@@ -584,81 +1190,137 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
let addr64 = 0 in {
- let offen = 0, idxen = 0 in {
+ let offen = 0, idxen = 0, vaddr = 0 in {
def _OFFSET : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_32:$vaddr,
- u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
- i1imm:$slc, i1imm:$tfe),
- asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ (ins SReg_128:$srsrc,
+ mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+ [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
+ i32:$soffset, i16:$offset,
+ i1:$glc, i1:$slc, i1:$tfe)))]>,
+ MUBUFAddr64Table<0>;
}
- let offen = 1, idxen = 0, offset = 0 in {
+ let offen = 1, idxen = 0 in {
def _OFFEN : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_32:$vaddr,
- SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
- i1imm:$tfe),
- asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ SSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 0, idxen = 1 in {
def _IDXEN : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_32:$vaddr,
- u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
- i1imm:$slc, i1imm:$tfe),
- asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_64:$vaddr,
- SSrc_32:$soffset, i1imm:$glc,
- i1imm:$slc, i1imm:$tfe),
- asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
+ SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
}
}
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
- asm#" $vdata, $srsrc + $vaddr + $offset",
+ (ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
+ asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
- i64:$vaddr, u16imm:$offset)))]>;
+ i64:$vaddr, i16:$offset)))]>, MUBUFAddr64Table<1>;
}
}
}
-class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
- ValueType store_vt, SDPatternOperator st> :
- MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
- u16imm:$offset),
- name#" $vdata, $srsrc + $vaddr + $offset",
- [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset))]> {
+multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+ ValueType store_vt, SDPatternOperator st> {
+
+ let addr64 = 0, lds = 0 in {
+
+ def "" : MUBUF <
+ op, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SSrc_32:$soffset,
+ mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
+ []
+ >;
+
+ let offen = 0, idxen = 0, vaddr = 0 in {
+ def _OFFSET : MUBUF <
+ op, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
+ SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+ [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc,
+ i1:$tfe))]
+ >, MUBUFAddr64Table<0>;
+ } // offen = 0, idxen = 0, vaddr = 0
+
+ let offen = 1, idxen = 0 in {
+ def _OFFEN : MUBUF <
+ op, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
+ []
+ >;
+ } // end offen = 1, idxen = 0
+
+ } // End addr64 = 0, lds = 0
+
+ def _ADDR64 : MUBUF <
+ op, (outs),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
+ name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
+ [(st store_vt:$vdata,
+ (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>, MUBUFAddr64Table<1>
+ {
+
+ let mayLoad = 0;
+ let mayStore = 1;
+
+ // Encoding
+ let offen = 0;
+ let idxen = 0;
+ let glc = 0;
+ let addr64 = 1;
+ let lds = 0;
+ let slc = 0;
+ let tfe = 0;
+ let soffset = 128; // ZERO
+ }
+}
+
+class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
+ FLAT <op, (outs regClass:$data),
+ (ins VReg_64:$addr),
+ asm#" $data, $addr, [M0, FLAT_SCRATCH]", []> {
+ let glc = 0;
+ let slc = 0;
+ let tfe = 0;
+ let mayLoad = 1;
+}
+
+class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
+ FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr),
+ name#" $data, $addr, [M0, FLAT_SCRATCH]",
+ []> {
let mayLoad = 0;
let mayStore = 1;
// Encoding
- let offen = 0;
- let idxen = 0;
let glc = 0;
- let addr64 = 1;
- let lds = 0;
let slc = 0;
let tfe = 0;
- let soffset = 128; // ZERO
-}
-
-class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
- op,
- (outs regClass:$dst),
- (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
- i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
- i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
- asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
- #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
- []> {
- let mayLoad = 1;
- let mayStore = 0;
}
class MIMG_Mask <string op, int channels> {
@@ -799,6 +1461,15 @@ def getVOPe64 : InstrMapping {
let ValueCols = [["8"]];
}
+// Maps an opcode in e64 form to its e32 equivalent
+def getVOPe32 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["Size"];
+ let KeyCol = ["8"];
+ let ValueCols = [["4"]];
+}
+
// Maps an original opcode to its commuted version
def getCommuteRev : InstrMapping {
let FilterClass = "VOP2_REV";
@@ -841,4 +1512,30 @@ def getMCOpcode : InstrMapping {
let ValueCols = [[!cast<string>(SISubtarget.SI)]];
}
+def getAddr64Inst : InstrMapping {
+ let FilterClass = "MUBUFAddr64Table";
+ let RowFields = ["OpName"];
+ let ColFields = ["IsAddr64"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an atomic opcode to its version with a return value.
+def getAtomicRetOp : InstrMapping {
+ let FilterClass = "AtomicNoRet";
+ let RowFields = ["NoRetOp"];
+ let ColFields = ["IsRet"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an atomic opcode to its returnless version.
+def getAtomicNoRetOp : InstrMapping {
+ let FilterClass = "AtomicNoRet";
+ let RowFields = ["NoRetOp"];
+ let ColFields = ["IsRet"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index b3b44e2..90da7a9 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -31,13 +31,25 @@ def isSI : Predicate<"Subtarget.getGeneration() "
def isCI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
+def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
-def isCFDepth0 : Predicate<"isCFDepth0()">;
+def SWaitMatchClass : AsmOperandClass {
+ let Name = "SWaitCnt";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseSWaitCntOps";
+}
-def WAIT_FLAG : InstFlag<"printWaitFlag">;
+def WAIT_FLAG : InstFlag<"printWaitFlag"> {
+ let ParserMatchClass = SWaitMatchClass;
+}
let SubtargetPredicate = isSI in {
-let OtherPredicates = [isCFDepth0] in {
+
+//===----------------------------------------------------------------------===//
+// EXP Instructions
+//===----------------------------------------------------------------------===//
+
+defm EXP : EXP_m;
//===----------------------------------------------------------------------===//
// SMRD Instructions
@@ -48,125 +60,126 @@ let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
+ 0x08, "s_buffer_load_dword", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+ 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
>;
defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+ 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
>;
defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+ 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
>;
defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+ 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
} // mayLoad = 1
-//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
//===----------------------------------------------------------------------===//
// SOP1 Instructions
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
-
let isMoveImm = 1 in {
-def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
-def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
-def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
-def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+def S_MOV_B32 : SOP1_32 <0x00000003, "s_mov_b32", []>;
+def S_MOV_B64 : SOP1_64 <0x00000004, "s_mov_b64", []>;
+def S_CMOV_B32 : SOP1_32 <0x00000005, "s_cmov_b32", []>;
+def S_CMOV_B64 : SOP1_64 <0x00000006, "s_cmov_b64", []>;
} // End isMoveImm = 1
-def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32",
+def S_NOT_B32 : SOP1_32 <0x00000007, "s_not_b32",
[(set i32:$dst, (not i32:$src0))]
>;
-def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
+def S_NOT_B64 : SOP1_64 <0x00000008, "s_not_b64",
[(set i64:$dst, (not i64:$src0))]
>;
-def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
-def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32",
+def S_WQM_B32 : SOP1_32 <0x00000009, "s_wqm_b32", []>;
+def S_WQM_B64 : SOP1_64 <0x0000000a, "s_wqm_b64", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "s_brev_b32",
[(set i32:$dst, (AMDGPUbrev i32:$src0))]
>;
-def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
-} // End neverHasSideEffects = 1
+def S_BREV_B64 : SOP1_64 <0x0000000c, "s_brev_b64", []>;
-////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
-////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
-def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32",
+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "s_bcnt0_i32_b32", []>;
+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "s_bcnt0_i32_b64", []>;
+def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "s_bcnt1_i32_b32",
[(set i32:$dst, (ctpop i32:$src0))]
>;
-def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
+def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "s_bcnt1_i32_b64", []>;
-////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
-////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32",
+////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "s_ff0_i32_b32", []>;
+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "s_ff0_i32_b64", []>;
+def S_FF1_I32_B32 : SOP1_32 <0x00000013, "s_ff1_i32_b32",
[(set i32:$dst, (cttz_zero_undef i32:$src0))]
>;
-////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "s_ff1_i32_b64", []>;
-def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
+def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "s_flbit_i32_b32",
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
>;
-//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
-def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
-//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
-def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8",
+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "s_flbit_i32_b64", []>;
+def S_FLBIT_I32 : SOP1_32 <0x00000017, "s_flbit_i32", []>;
+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "s_flbit_i32_i64", []>;
+def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
>;
-def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
+def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "s_sext_i32_i16",
[(set i32:$dst, (sext_inreg i32:$src0, i16))]
>;
-////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
-////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
-////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
-////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
-def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
-def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
-def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
-def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "s_bitset0_b32", []>;
+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "s_bitset0_b64", []>;
+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "s_bitset1_b32", []>;
+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "s_bitset1_b64", []>;
+def S_GETPC_B64 : SOP1 <
+ 0x0000001f, (outs SReg_64:$dst), (ins), "s_getpc_b64 $dst", []
+> {
+ let SSRC0 = 0;
+}
+def S_SETPC_B64 : SOP1_64 <0x00000020, "s_setpc_b64", []>;
+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "s_swappc_b64", []>;
+def S_RFE_B64 : SOP1_64 <0x00000022, "s_rfe_b64", []>;
let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
-def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
-def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
-def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
-def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
-def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
-def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
-def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
-def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "s_and_saveexec_b64", []>;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "s_or_saveexec_b64", []>;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "s_xor_saveexec_b64", []>;
+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "s_andn2_saveexec_b64", []>;
+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "s_orn2_saveexec_b64", []>;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "s_nand_saveexec_b64", []>;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "s_nor_saveexec_b64", []>;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "s_xnor_saveexec_b64", []>;
} // End hasSideEffects = 1
-def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
-def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
-def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
-def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
-def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
-def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
-//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
-def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
-def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
-def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "s_quadmask_b32", []>;
+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "s_quadmask_b64", []>;
+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "s_movrels_b32", []>;
+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "s_movrels_b64", []>;
+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "s_movreld_b32", []>;
+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "s_movreld_b64", []>;
+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "s_cbranch_join", []>;
+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "s_mov_regrd_b32", []>;
+def S_ABS_I32 : SOP1_32 <0x00000034, "s_abs_i32", []>;
+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "s_mov_fed_b32", []>;
//===----------------------------------------------------------------------===//
// SOP2 Instructions
@@ -174,145 +187,150 @@ def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
-def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
-def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
+def S_ADD_U32 : SOP2_32 <0x00000000, "s_add_u32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "s_add_i32",
[(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
>;
} // End isCommutable = 1
-def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
+def S_SUB_U32 : SOP2_32 <0x00000001, "s_sub_u32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "s_sub_i32",
[(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
>;
let Uses = [SCC] in { // Carry in comes from SCC
let isCommutable = 1 in {
-def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
+def S_ADDC_U32 : SOP2_32 <0x00000004, "s_addc_u32",
[(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End isCommutable = 1
-def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
+def S_SUBB_U32 : SOP2_32 <0x00000005, "s_subb_u32",
[(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
} // End Uses = [SCC]
} // End Defs = [SCC]
-def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
+def S_MIN_I32 : SOP2_32 <0x00000006, "s_min_i32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
>;
-def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
+def S_MIN_U32 : SOP2_32 <0x00000007, "s_min_u32",
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
>;
-def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
+def S_MAX_I32 : SOP2_32 <0x00000008, "s_max_i32",
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
>;
-def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
+def S_MAX_U32 : SOP2_32 <0x00000009, "s_max_u32",
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
>;
-def S_CSELECT_B32 : SOP2 <
- 0x0000000a, (outs SReg_32:$dst),
- (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+def S_CSELECT_B32 : SOP2_SELECT_32 <
+ 0x0000000a, "s_cselect_b32",
[]
>;
-def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "s_cselect_b64", []>;
-def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
+def S_AND_B32 : SOP2_32 <0x0000000e, "s_and_b32",
[(set i32:$dst, (and i32:$src0, i32:$src1))]
>;
-def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+def S_AND_B64 : SOP2_64 <0x0000000f, "s_and_b64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
>;
-def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
+def S_OR_B32 : SOP2_32 <0x00000010, "s_or_b32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
-def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
+def S_OR_B64 : SOP2_64 <0x00000011, "s_or_b64",
[(set i64:$dst, (or i64:$src0, i64:$src1))]
>;
-def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
+def S_XOR_B32 : SOP2_32 <0x00000012, "s_xor_b32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
-def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
+def S_XOR_B64 : SOP2_64 <0x00000013, "s_xor_b64",
[(set i64:$dst, (xor i64:$src0, i64:$src1))]
>;
-def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
-def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
-def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
-def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
-def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
-def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
-def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
-def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
-def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
-def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+def S_ANDN2_B32 : SOP2_32 <0x00000014, "s_andn2_b32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x00000015, "s_andn2_b64", []>;
+def S_ORN2_B32 : SOP2_32 <0x00000016, "s_orn2_b32", []>;
+def S_ORN2_B64 : SOP2_64 <0x00000017, "s_orn2_b64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "s_nand_b32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "s_nand_b64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "s_nor_b32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "s_nor_b64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "s_xnor_b32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "s_xnor_b64", []>;
// Use added complexity so these patterns are preferred to the VALU patterns.
let AddedComplexity = 1 in {
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "s_lshl_b32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
-def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
+def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "s_lshl_b64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
+def S_LSHR_B32 : SOP2_32 <0x00000020, "s_lshr_b32",
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
-def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
+def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "s_lshr_b64",
[(set i64:$dst, (srl i64:$src0, i32:$src1))]
>;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
+def S_ASHR_I32 : SOP2_32 <0x00000022, "s_ashr_i32",
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
-def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
+def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "s_ashr_i64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
+
+def S_BFM_B32 : SOP2_32 <0x00000024, "s_bfm_b32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "s_bfm_b64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "s_mul_i32",
+ [(set i32:$dst, (mul i32:$src0, i32:$src1))]
+>;
+
} // End AddedComplexity = 1
-def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
-def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
-def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
-def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
-def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
-def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
-def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
-//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
-def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "s_bfe_u32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "s_bfe_i32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "s_bfe_u64", []>;
+def S_BFE_I64 : SOP2_64_32 <0x0000002a, "s_bfe_i64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "s_cbranch_g_fork", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "s_absdiff_i32", []>;
//===----------------------------------------------------------------------===//
// SOPC Instructions
//===----------------------------------------------------------------------===//
-def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">;
-def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">;
-def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">;
-def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32">;
-def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32">;
-def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32">;
-def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32">;
-def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32">;
-def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32">;
-def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32">;
-def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32">;
-def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">;
-////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
-////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
-////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
-////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
-//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "s_cmp_eq_i32">;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "s_cmp_lg_i32">;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "s_cmp_gt_i32">;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "s_cmp_ge_i32">;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "s_cmp_lt_i32">;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "s_cmp_le_i32">;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "s_cmp_eq_u32">;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "s_cmp_lg_u32">;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "s_cmp_gt_u32">;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "s_cmp_ge_u32">;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "s_cmp_lt_u32">;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "s_bitcmp0_b32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "s_bitcmp1_b32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "s_bitcmp0_b64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "s_bitcmp1_b64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "s_setvskip", []>;
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
-def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
-def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+let isReMaterializable = 1 in {
+def S_MOVK_I32 : SOPK_32 <0x00000000, "s_movk_i32", []>;
+} // End isReMaterializable = 1
+def S_CMOVK_I32 : SOPK_32 <0x00000002, "s_cmovk_i32", []>;
/*
This instruction is disabled for now until we can figure out how to teach
@@ -328,94 +346,87 @@ VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
def S_CMPK_EQ_I32 : SOPK <
0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
- "S_CMPK_EQ_I32",
+ "s_cmpk_eq_i32",
[(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
>;
*/
let isCompare = 1, Defs = [SCC] in {
-def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
-def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
-def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
-def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
-def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
-def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
-def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
-def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
-def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
-def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
-def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "s_cmpk_lg_i32", []>;
+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "s_cmpk_gt_i32", []>;
+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "s_cmpk_ge_i32", []>;
+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "s_cmpk_lt_i32", []>;
+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "s_cmpk_le_i32", []>;
+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "s_cmpk_eq_u32", []>;
+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "s_cmpk_lg_u32", []>;
+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "s_cmpk_gt_u32", []>;
+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "s_cmpk_ge_u32", []>;
+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "s_cmpk_lt_u32", []>;
+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "s_cmpk_le_u32", []>;
} // End isCompare = 1, Defs = [SCC]
let Defs = [SCC], isCommutable = 1 in {
- def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
- def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+ def S_ADDK_I32 : SOPK_32 <0x0000000f, "s_addk_i32", []>;
+ def S_MULK_I32 : SOPK_32 <0x00000010, "s_mulk_i32", []>;
}
-//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
-def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
-def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
-def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
-//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
-//def EXP : EXP_ <0x00000000, "EXP", []>;
-
-} // End let OtherPredicates = [isCFDepth0]
+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "s_cbranch_i_fork", []>;
+def S_GETREG_B32 : SOPK_32 <0x00000012, "s_getreg_b32", []>;
+def S_SETREG_B32 : SOPK_32 <0x00000013, "s_setreg_b32", []>;
+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "s_getreg_regrd_b32", []>;
+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "s_setreg_imm32_b32", []>;
+//def EXP : EXP_ <0x00000000, "exp", []>;
//===----------------------------------------------------------------------===//
// SOPP Instructions
//===----------------------------------------------------------------------===//
-def S_NOP : SOPP <0x00000000, (ins i16imm:$SIMM16), "S_NOP $SIMM16", []>;
+def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
let isTerminator = 1 in {
-def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
[(IL_retflag)]> {
- let SIMM16 = 0;
+ let simm16 = 0;
let isBarrier = 1;
let hasCtrlDep = 1;
}
let isBranch = 1 in {
def S_BRANCH : SOPP <
- 0x00000002, (ins brtarget:$target), "S_BRANCH $target",
- [(br bb:$target)]> {
+ 0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
+ [(br bb:$simm16)]> {
let isBarrier = 1;
}
let DisableEncoding = "$scc" in {
def S_CBRANCH_SCC0 : SOPP <
- 0x00000004, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC0 $target", []
+ 0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+ "s_cbranch_scc0 $simm16"
>;
def S_CBRANCH_SCC1 : SOPP <
- 0x00000005, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC1 $target",
- []
+ 0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+ "s_cbranch_scc1 $simm16"
>;
} // End DisableEncoding = "$scc"
def S_CBRANCH_VCCZ : SOPP <
- 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCZ $target",
- []
+ 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ "s_cbranch_vccz $simm16"
>;
def S_CBRANCH_VCCNZ : SOPP <
- 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCNZ $target",
- []
+ 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ "s_cbranch_vccnz $simm16"
>;
let DisableEncoding = "$exec" in {
def S_CBRANCH_EXECZ : SOPP <
- 0x00000008, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECZ $target",
- []
+ 0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+ "s_cbranch_execz $simm16"
>;
def S_CBRANCH_EXECNZ : SOPP <
- 0x00000009, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECNZ $target",
- []
+ 0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+ "s_cbranch_execnz $simm16"
>;
} // End DisableEncoding = "$exec"
@@ -424,37 +435,39 @@ def S_CBRANCH_EXECNZ : SOPP <
} // End isTerminator = 1
let hasSideEffects = 1 in {
-def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
+def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
[(int_AMDGPU_barrier_local)]
> {
- let SIMM16 = 0;
+ let simm16 = 0;
let isBarrier = 1;
let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
}
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
- []
->;
-//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
-//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
-//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
+def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
+def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
+def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
let Uses = [EXEC] in {
- def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16",
[(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
> {
let DisableEncoding = "$m0";
}
} // End Uses = [EXEC]
-//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
-//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
-//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
-//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
-//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
-//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
+def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
+def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
+ let simm16 = 0;
+}
+def S_INCPERFLEVEL : SOPP <0x00000014, (ins i16imm:$simm16), "s_incperflevel $simm16">;
+def S_DECPERFLEVEL : SOPP <0x00000015, (ins i16imm:$simm16), "s_decperflevel $simm16">;
+def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
+ let simm16 = 0;
+}
} // End hasSideEffects
//===----------------------------------------------------------------------===//
@@ -463,256 +476,256 @@ let Uses = [EXEC] in {
let isCompare = 1 in {
-defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
-defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_OLT>;
-defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_OLE>;
-defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_OGT>;
-defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32">;
-defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_OGE>;
-defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", f32, COND_O>;
-defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", f32, COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
-defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
-defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
-defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
-defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
-defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
-defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
+defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0>, "v_cmp_f_f32">;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2>, "v_cmp_eq_f32", COND_OEQ>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4>, "v_cmp_gt_f32", COND_OGT>;
+defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5>, "v_cmp_lg_f32">;
+defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6>, "v_cmp_ge_f32", COND_OGE>;
+defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7>, "v_cmp_o_f32", COND_O>;
+defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8>, "v_cmp_u_f32", COND_UO>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9>, "v_cmp_nge_f32">;
+defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa>, "v_cmp_nlg_f32">;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb>, "v_cmp_ngt_f32">;
+defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc>, "v_cmp_nle_f32">;
+defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd>, "v_cmp_neq_f32", COND_UNE>;
+defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe>, "v_cmp_nlt_f32">;
+defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf>, "v_cmp_tru_f32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_F32 : VOPCX_32 <0x00000010, "V_CMPX_F_F32">;
-defm V_CMPX_LT_F32 : VOPCX_32 <0x00000011, "V_CMPX_LT_F32">;
-defm V_CMPX_EQ_F32 : VOPCX_32 <0x00000012, "V_CMPX_EQ_F32">;
-defm V_CMPX_LE_F32 : VOPCX_32 <0x00000013, "V_CMPX_LE_F32">;
-defm V_CMPX_GT_F32 : VOPCX_32 <0x00000014, "V_CMPX_GT_F32">;
-defm V_CMPX_LG_F32 : VOPCX_32 <0x00000015, "V_CMPX_LG_F32">;
-defm V_CMPX_GE_F32 : VOPCX_32 <0x00000016, "V_CMPX_GE_F32">;
-defm V_CMPX_O_F32 : VOPCX_32 <0x00000017, "V_CMPX_O_F32">;
-defm V_CMPX_U_F32 : VOPCX_32 <0x00000018, "V_CMPX_U_F32">;
-defm V_CMPX_NGE_F32 : VOPCX_32 <0x00000019, "V_CMPX_NGE_F32">;
-defm V_CMPX_NLG_F32 : VOPCX_32 <0x0000001a, "V_CMPX_NLG_F32">;
-defm V_CMPX_NGT_F32 : VOPCX_32 <0x0000001b, "V_CMPX_NGT_F32">;
-defm V_CMPX_NLE_F32 : VOPCX_32 <0x0000001c, "V_CMPX_NLE_F32">;
-defm V_CMPX_NEQ_F32 : VOPCX_32 <0x0000001d, "V_CMPX_NEQ_F32">;
-defm V_CMPX_NLT_F32 : VOPCX_32 <0x0000001e, "V_CMPX_NLT_F32">;
-defm V_CMPX_TRU_F32 : VOPCX_32 <0x0000001f, "V_CMPX_TRU_F32">;
+defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10>, "v_cmpx_f_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11>, "v_cmpx_lt_f32">;
+defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12>, "v_cmpx_eq_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13>, "v_cmpx_le_f32">;
+defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14>, "v_cmpx_gt_f32">;
+defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15>, "v_cmpx_lg_f32">;
+defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16>, "v_cmpx_ge_f32">;
+defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17>, "v_cmpx_o_f32">;
+defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18>, "v_cmpx_u_f32">;
+defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19>, "v_cmpx_nge_f32">;
+defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a>, "v_cmpx_nlg_f32">;
+defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b>, "v_cmpx_ngt_f32">;
+defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c>, "v_cmpx_nle_f32">;
+defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d>, "v_cmpx_neq_f32">;
+defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e>, "v_cmpx_nlt_f32">;
+defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f>, "v_cmpx_tru_f32">;
} // End hasSideEffects = 1
-defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
-defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
-defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_OLE>;
-defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_OGT>;
-defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
-defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_OGE>;
-defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", f64, COND_O>;
-defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", f64, COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
-defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
-defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
-defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
-defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
-defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
-defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
+defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20>, "v_cmp_f_f64">;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22>, "v_cmp_eq_f64", COND_OEQ>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24>, "v_cmp_gt_f64", COND_OGT>;
+defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25>, "v_cmp_lg_f64">;
+defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26>, "v_cmp_ge_f64", COND_OGE>;
+defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27>, "v_cmp_o_f64", COND_O>;
+defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28>, "v_cmp_u_f64", COND_UO>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29>, "v_cmp_nge_f64">;
+defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a>, "v_cmp_nlg_f64">;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b>, "v_cmp_ngt_f64">;
+defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c>, "v_cmp_nle_f64">;
+defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d>, "v_cmp_neq_f64", COND_UNE>;
+defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e>, "v_cmp_nlt_f64">;
+defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f>, "v_cmp_tru_f64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_F64 : VOPCX_64 <0x00000030, "V_CMPX_F_F64">;
-defm V_CMPX_LT_F64 : VOPCX_64 <0x00000031, "V_CMPX_LT_F64">;
-defm V_CMPX_EQ_F64 : VOPCX_64 <0x00000032, "V_CMPX_EQ_F64">;
-defm V_CMPX_LE_F64 : VOPCX_64 <0x00000033, "V_CMPX_LE_F64">;
-defm V_CMPX_GT_F64 : VOPCX_64 <0x00000034, "V_CMPX_GT_F64">;
-defm V_CMPX_LG_F64 : VOPCX_64 <0x00000035, "V_CMPX_LG_F64">;
-defm V_CMPX_GE_F64 : VOPCX_64 <0x00000036, "V_CMPX_GE_F64">;
-defm V_CMPX_O_F64 : VOPCX_64 <0x00000037, "V_CMPX_O_F64">;
-defm V_CMPX_U_F64 : VOPCX_64 <0x00000038, "V_CMPX_U_F64">;
-defm V_CMPX_NGE_F64 : VOPCX_64 <0x00000039, "V_CMPX_NGE_F64">;
-defm V_CMPX_NLG_F64 : VOPCX_64 <0x0000003a, "V_CMPX_NLG_F64">;
-defm V_CMPX_NGT_F64 : VOPCX_64 <0x0000003b, "V_CMPX_NGT_F64">;
-defm V_CMPX_NLE_F64 : VOPCX_64 <0x0000003c, "V_CMPX_NLE_F64">;
-defm V_CMPX_NEQ_F64 : VOPCX_64 <0x0000003d, "V_CMPX_NEQ_F64">;
-defm V_CMPX_NLT_F64 : VOPCX_64 <0x0000003e, "V_CMPX_NLT_F64">;
-defm V_CMPX_TRU_F64 : VOPCX_64 <0x0000003f, "V_CMPX_TRU_F64">;
+defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30>, "v_cmpx_f_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31>, "v_cmpx_lt_f64">;
+defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32>, "v_cmpx_eq_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33>, "v_cmpx_le_f64">;
+defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34>, "v_cmpx_gt_f64">;
+defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35>, "v_cmpx_lg_f64">;
+defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36>, "v_cmpx_ge_f64">;
+defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37>, "v_cmpx_o_f64">;
+defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38>, "v_cmpx_u_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39>, "v_cmpx_nge_f64">;
+defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a>, "v_cmpx_nlg_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c>, "v_cmpx_nle_f64">;
+defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d>, "v_cmpx_neq_f64">;
+defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e>, "v_cmpx_nlt_f64">;
+defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f>, "v_cmpx_tru_f64">;
} // End hasSideEffects = 1
-defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
-defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
-defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">;
-defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">;
-defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">;
-defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">;
-defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">;
-defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">;
-defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">;
-defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">;
-defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">;
-defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">;
-defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">;
-defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
-defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
-defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
+defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
+defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
+defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
+defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
+defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
+defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
+defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
+defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
let hasSideEffects = 1 in {
-defm V_CMPSX_F_F32 : VOPCX_32 <0x00000050, "V_CMPSX_F_F32">;
-defm V_CMPSX_LT_F32 : VOPCX_32 <0x00000051, "V_CMPSX_LT_F32">;
-defm V_CMPSX_EQ_F32 : VOPCX_32 <0x00000052, "V_CMPSX_EQ_F32">;
-defm V_CMPSX_LE_F32 : VOPCX_32 <0x00000053, "V_CMPSX_LE_F32">;
-defm V_CMPSX_GT_F32 : VOPCX_32 <0x00000054, "V_CMPSX_GT_F32">;
-defm V_CMPSX_LG_F32 : VOPCX_32 <0x00000055, "V_CMPSX_LG_F32">;
-defm V_CMPSX_GE_F32 : VOPCX_32 <0x00000056, "V_CMPSX_GE_F32">;
-defm V_CMPSX_O_F32 : VOPCX_32 <0x00000057, "V_CMPSX_O_F32">;
-defm V_CMPSX_U_F32 : VOPCX_32 <0x00000058, "V_CMPSX_U_F32">;
-defm V_CMPSX_NGE_F32 : VOPCX_32 <0x00000059, "V_CMPSX_NGE_F32">;
-defm V_CMPSX_NLG_F32 : VOPCX_32 <0x0000005a, "V_CMPSX_NLG_F32">;
-defm V_CMPSX_NGT_F32 : VOPCX_32 <0x0000005b, "V_CMPSX_NGT_F32">;
-defm V_CMPSX_NLE_F32 : VOPCX_32 <0x0000005c, "V_CMPSX_NLE_F32">;
-defm V_CMPSX_NEQ_F32 : VOPCX_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
-defm V_CMPSX_NLT_F32 : VOPCX_32 <0x0000005e, "V_CMPSX_NLT_F32">;
-defm V_CMPSX_TRU_F32 : VOPCX_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
+defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
+defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
+defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
+defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
+defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
+defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
} // End hasSideEffects = 1
-defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
-defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
-defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">;
-defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">;
-defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">;
-defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">;
-defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">;
-defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">;
-defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">;
-defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">;
-defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">;
-defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">;
-defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">;
-defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">;
-defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">;
-defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">;
+defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
+defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
+defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
+defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
+defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
+defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
+defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
+defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">;
-defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">;
-defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">;
-defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">;
-defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">;
-defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">;
-defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">;
-defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">;
-defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">;
-defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">;
-defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">;
-defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">;
-defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">;
-defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">;
-defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">;
-defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
+defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
+defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
+defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
+defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
+defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
} // End hasSideEffects = 1, Defs = [EXEC]
-defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
-defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_SLT>;
-defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_SLE>;
-defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_SGT>;
-defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
-defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
-defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
+defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80>, "v_cmp_f_i32">;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82>, "v_cmp_eq_i32", COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84>, "v_cmp_gt_i32", COND_SGT>;
+defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85>, "v_cmp_ne_i32", COND_NE>;
+defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86>, "v_cmp_ge_i32", COND_SGE>;
+defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87>, "v_cmp_t_i32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_I32 : VOPCX_32 <0x00000090, "V_CMPX_F_I32">;
-defm V_CMPX_LT_I32 : VOPCX_32 <0x00000091, "V_CMPX_LT_I32">;
-defm V_CMPX_EQ_I32 : VOPCX_32 <0x00000092, "V_CMPX_EQ_I32">;
-defm V_CMPX_LE_I32 : VOPCX_32 <0x00000093, "V_CMPX_LE_I32">;
-defm V_CMPX_GT_I32 : VOPCX_32 <0x00000094, "V_CMPX_GT_I32">;
-defm V_CMPX_NE_I32 : VOPCX_32 <0x00000095, "V_CMPX_NE_I32">;
-defm V_CMPX_GE_I32 : VOPCX_32 <0x00000096, "V_CMPX_GE_I32">;
-defm V_CMPX_T_I32 : VOPCX_32 <0x00000097, "V_CMPX_T_I32">;
+defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90>, "v_cmpx_f_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91>, "v_cmpx_lt_i32">;
+defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92>, "v_cmpx_eq_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93>, "v_cmpx_le_i32">;
+defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94>, "v_cmpx_gt_i32">;
+defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95>, "v_cmpx_ne_i32">;
+defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96>, "v_cmpx_ge_i32">;
+defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97>, "v_cmpx_t_i32">;
} // End hasSideEffects = 1
-defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
-defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
-defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", i64, COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", i64, COND_SLE>;
-defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", i64, COND_SGT>;
-defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
-defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
-defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
+defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0>, "v_cmp_f_i64">;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2>, "v_cmp_eq_i64", COND_EQ>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4>, "v_cmp_gt_i64", COND_SGT>;
+defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5>, "v_cmp_ne_i64", COND_NE>;
+defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6>, "v_cmp_ge_i64", COND_SGE>;
+defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7>, "v_cmp_t_i64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_I64 : VOPCX_64 <0x000000b0, "V_CMPX_F_I64">;
-defm V_CMPX_LT_I64 : VOPCX_64 <0x000000b1, "V_CMPX_LT_I64">;
-defm V_CMPX_EQ_I64 : VOPCX_64 <0x000000b2, "V_CMPX_EQ_I64">;
-defm V_CMPX_LE_I64 : VOPCX_64 <0x000000b3, "V_CMPX_LE_I64">;
-defm V_CMPX_GT_I64 : VOPCX_64 <0x000000b4, "V_CMPX_GT_I64">;
-defm V_CMPX_NE_I64 : VOPCX_64 <0x000000b5, "V_CMPX_NE_I64">;
-defm V_CMPX_GE_I64 : VOPCX_64 <0x000000b6, "V_CMPX_GE_I64">;
-defm V_CMPX_T_I64 : VOPCX_64 <0x000000b7, "V_CMPX_T_I64">;
+defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0>, "v_cmpx_f_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1>, "v_cmpx_lt_i64">;
+defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2>, "v_cmpx_eq_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3>, "v_cmpx_le_i64">;
+defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4>, "v_cmpx_gt_i64">;
+defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5>, "v_cmpx_ne_i64">;
+defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6>, "v_cmpx_ge_i64">;
+defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7>, "v_cmpx_t_i64">;
} // End hasSideEffects = 1
-defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
-defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
-defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", i32, COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", i32, COND_ULE>;
-defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", i32, COND_UGT>;
-defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
-defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
-defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
+defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0>, "v_cmp_f_u32">;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2>, "v_cmp_eq_u32", COND_EQ>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4>, "v_cmp_gt_u32", COND_UGT>;
+defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5>, "v_cmp_ne_u32", COND_NE>;
+defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6>, "v_cmp_ge_u32", COND_UGE>;
+defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7>, "v_cmp_t_u32">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_U32 : VOPCX_32 <0x000000d0, "V_CMPX_F_U32">;
-defm V_CMPX_LT_U32 : VOPCX_32 <0x000000d1, "V_CMPX_LT_U32">;
-defm V_CMPX_EQ_U32 : VOPCX_32 <0x000000d2, "V_CMPX_EQ_U32">;
-defm V_CMPX_LE_U32 : VOPCX_32 <0x000000d3, "V_CMPX_LE_U32">;
-defm V_CMPX_GT_U32 : VOPCX_32 <0x000000d4, "V_CMPX_GT_U32">;
-defm V_CMPX_NE_U32 : VOPCX_32 <0x000000d5, "V_CMPX_NE_U32">;
-defm V_CMPX_GE_U32 : VOPCX_32 <0x000000d6, "V_CMPX_GE_U32">;
-defm V_CMPX_T_U32 : VOPCX_32 <0x000000d7, "V_CMPX_T_U32">;
+defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0>, "v_cmpx_f_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1>, "v_cmpx_lt_u32">;
+defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2>, "v_cmpx_eq_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3>, "v_cmpx_le_u32">;
+defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4>, "v_cmpx_gt_u32">;
+defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5>, "v_cmpx_ne_u32">;
+defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6>, "v_cmpx_ge_u32">;
+defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7>, "v_cmpx_t_u32">;
} // End hasSideEffects = 1
-defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
-defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
-defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", i64, COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", i64, COND_ULE>;
-defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", i64, COND_UGT>;
-defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
-defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
-defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
+defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0>, "v_cmp_f_u64">;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2>, "v_cmp_eq_u64", COND_EQ>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4>, "v_cmp_gt_u64", COND_UGT>;
+defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5>, "v_cmp_ne_u64", COND_NE>;
+defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6>, "v_cmp_ge_u64", COND_UGE>;
+defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7>, "v_cmp_t_u64">;
let hasSideEffects = 1 in {
-defm V_CMPX_F_U64 : VOPCX_64 <0x000000f0, "V_CMPX_F_U64">;
-defm V_CMPX_LT_U64 : VOPCX_64 <0x000000f1, "V_CMPX_LT_U64">;
-defm V_CMPX_EQ_U64 : VOPCX_64 <0x000000f2, "V_CMPX_EQ_U64">;
-defm V_CMPX_LE_U64 : VOPCX_64 <0x000000f3, "V_CMPX_LE_U64">;
-defm V_CMPX_GT_U64 : VOPCX_64 <0x000000f4, "V_CMPX_GT_U64">;
-defm V_CMPX_NE_U64 : VOPCX_64 <0x000000f5, "V_CMPX_NE_U64">;
-defm V_CMPX_GE_U64 : VOPCX_64 <0x000000f6, "V_CMPX_GE_U64">;
-defm V_CMPX_T_U64 : VOPCX_64 <0x000000f7, "V_CMPX_T_U64">;
+defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0>, "v_cmpx_f_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1>, "v_cmpx_lt_u64">;
+defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2>, "v_cmpx_eq_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3>, "v_cmpx_le_u64">;
+defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4>, "v_cmpx_gt_u64">;
+defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5>, "v_cmpx_ne_u64">;
+defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6>, "v_cmpx_ge_u64">;
+defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7>, "v_cmpx_t_u64">;
} // End hasSideEffects = 1
-defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
+defm V_CMP_CLASS_F32 : VOPC_F32 <vopc<0x88>, "v_cmp_class_f32">;
let hasSideEffects = 1 in {
-defm V_CMPX_CLASS_F32 : VOPCX_32 <0x00000098, "V_CMPX_CLASS_F32">;
+defm V_CMPX_CLASS_F32 : VOPCX_F32 <vopc<0x98>, "v_cmpx_class_f32">;
} // End hasSideEffects = 1
-defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
+defm V_CMP_CLASS_F64 : VOPC_F64 <vopc<0xa8>, "v_cmp_class_f64">;
let hasSideEffects = 1 in {
-defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+defm V_CMPX_CLASS_F64 : VOPCX_F64 <vopc<0xb8>, "v_cmpx_class_f64">;
} // End hasSideEffects = 1
} // End isCompare = 1
@@ -722,88 +735,88 @@ defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
//===----------------------------------------------------------------------===//
-def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>;
-def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>;
-def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>;
-def DS_INC_U32 : DS_1A1D_NORET <0x3, "DS_INC_U32", VReg_32>;
-def DS_DEC_U32 : DS_1A1D_NORET <0x4, "DS_DEC_U32", VReg_32>;
-def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>;
-def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>;
-def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>;
-def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>;
-def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>;
-def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>;
-def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>;
-def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>;
-def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>;
-def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>;
-def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>;
-def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>;
-
-def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>;
-def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>;
-def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>;
-def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "DS_INC_RTN_U32", VReg_32>;
-def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>;
-def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>;
-def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>;
-def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>;
-def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32>;
-def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32>;
-def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32>;
-def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32>;
-def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32>;
-def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32>;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32>;
-def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32>;
-def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32>;
-def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32>;
-def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32>;
+def DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VReg_32>;
+def DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VReg_32>;
+def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VReg_32>;
+def DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VReg_32>;
+def DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VReg_32>;
+def DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VReg_32>;
+def DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VReg_32>;
+def DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VReg_32>;
+def DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VReg_32>;
+def DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VReg_32>;
+def DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VReg_32>;
+def DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VReg_32>;
+def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VReg_32>;
+def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VReg_32>;
+def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VReg_32>;
+def DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VReg_32>;
+def DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VReg_32>;
+
+def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VReg_32, "ds_add_u32">;
+def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VReg_32, "ds_sub_u32">;
+def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VReg_32, "ds_rsub_u32">;
+def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VReg_32, "ds_inc_u32">;
+def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VReg_32, "ds_dec_u32">;
+def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VReg_32, "ds_min_i32">;
+def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VReg_32, "ds_max_i32">;
+def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VReg_32, "ds_min_u32">;
+def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VReg_32, "ds_max_u32">;
+def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VReg_32, "ds_and_b32">;
+def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VReg_32, "ds_or_b32">;
+def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VReg_32, "ds_xor_b32">;
+def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VReg_32, "ds_mskor_b32">;
+def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VReg_32>;
+//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VReg_32, "ds_wrxchg2_b32">;
+//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VReg_32, "ds_wrxchg2st64_b32">;
+def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VReg_32, "ds_cmpst_b32">;
+def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VReg_32, "ds_cmpst_f32">;
+def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VReg_32, "ds_min_f32">;
+def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VReg_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
-def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
+def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VReg_32, "ds_wrap_f32">;
} // End isCI
-def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>;
-def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>;
-def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>;
-def DS_INC_U64 : DS_1A1D_NORET <0x43, "DS_INC_U64", VReg_32>;
-def DS_DEC_U64 : DS_1A1D_NORET <0x44, "DS_DEC_U64", VReg_32>;
-def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>;
-def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>;
-def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>;
-def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>;
-def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>;
-def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>;
-def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>;
-def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>;
-def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>;
-def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>;
-def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>;
-def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>;
-
-def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>;
-def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>;
-def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>;
-def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "DS_INC_RTN_U64", VReg_64>;
-def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>;
-def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>;
-def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>;
-def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>;
-def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64>;
-def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64>;
-def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64>;
-def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64>;
-def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64>;
-def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64>;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64>;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64>;
-def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64>;
-def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64>;
-def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64>;
-def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
+def DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
+def DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
+def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
+def DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
+def DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
+def DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
+def DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
+def DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
+def DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
+def DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
+def DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
+def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
+def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
+def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
+def DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
+def DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
+
+def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
+def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
+def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
+def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
+def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
+def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
+def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
+def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
+def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
+def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
+def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
+def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
+def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
+//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
+//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
+def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
+def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_f64", VReg_64, "ds_min_f64">;
+def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_f64", VReg_64, "ds_max_f64">;
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
@@ -812,240 +825,336 @@ def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
// TODO: _SRC2_* forms
-def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
-def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
-def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
-def DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "DS_WRITE_B64", VReg_64>;
+def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VReg_32>;
+def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VReg_32>;
+def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VReg_32>;
+def DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
-def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
-def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
-def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
-def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
-def DS_READ_B64 : DS_Load_Helper <0x00000076, "DS_READ_B64", VReg_64>;
+def DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VReg_32>;
+def DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VReg_32>;
+def DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VReg_32>;
+def DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VReg_32>;
+def DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VReg_32>;
+def DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
// 2 forms.
-def DS_WRITE2_B32 : DS_Load2_Helper <0x0000000E, "DS_WRITE2_B32", VReg_64>;
-def DS_WRITE2_B64 : DS_Load2_Helper <0x0000004E, "DS_WRITE2_B64", VReg_128>;
+def DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VReg_32>;
+def DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VReg_32>;
+def DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
+def DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-def DS_READ2_B32 : DS_Load2_Helper <0x00000037, "DS_READ2_B32", VReg_64>;
-def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "DS_READ2_B64", VReg_128>;
-
-// TODO: DS_READ2ST64_B32, DS_READ2ST64_B64,
-// DS_WRITE2ST64_B32, DS_WRITE2ST64_B64
+def DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
+def DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
+def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
+def DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "buffer_load_format_x", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "buffer_load_format_xy", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "buffer_load_format_xyz", []>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "buffer_load_format_xyzw", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "buffer_store_format_x", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "buffer_store_format_xy", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "buffer_store_format_xyz", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "buffer_store_format_xyzw", []>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
- 0x00000008, "BUFFER_LOAD_UBYTE", VReg_32, i32, az_extloadi8_global
+ 0x00000008, "buffer_load_ubyte", VReg_32, i32, az_extloadi8_global
>;
defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
- 0x00000009, "BUFFER_LOAD_SBYTE", VReg_32, i32, sextloadi8_global
+ 0x00000009, "buffer_load_sbyte", VReg_32, i32, sextloadi8_global
>;
defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
- 0x0000000a, "BUFFER_LOAD_USHORT", VReg_32, i32, az_extloadi16_global
+ 0x0000000a, "buffer_load_ushort", VReg_32, i32, az_extloadi16_global
>;
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
- 0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32, i32, sextloadi16_global
+ 0x0000000b, "buffer_load_sshort", VReg_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- 0x0000000c, "BUFFER_LOAD_DWORD", VReg_32, i32, global_load
+ 0x0000000c, "buffer_load_dword", VReg_32, i32, global_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- 0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64, v2i32, global_load
+ 0x0000000d, "buffer_load_dwordx2", VReg_64, v2i32, global_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- 0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128, v4i32, global_load
->;
-
-def BUFFER_STORE_BYTE : MUBUF_Store_Helper <
- 0x00000018, "BUFFER_STORE_BYTE", VReg_32, i32, truncstorei8_global
->;
-
-def BUFFER_STORE_SHORT : MUBUF_Store_Helper <
- 0x0000001a, "BUFFER_STORE_SHORT", VReg_32, i32, truncstorei16_global
->;
-
-def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32, global_store
->;
-
-def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, v2i32, global_store
->;
-
-def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store
->;
-//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
-//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
-//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
-//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
-//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
-//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
-//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
-//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
-//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
-//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
-//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
-//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
-//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
-//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
-//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
-//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
-//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
-//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
-//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
-//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
-//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
-//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
-//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
-//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
-//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
-//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
-//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
-//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
-//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
-//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
-//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
-//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+ 0x0000000e, "buffer_load_dwordx4", VReg_128, v4i32, global_load
+>;
+
+defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
+ 0x00000018, "buffer_store_byte", VReg_32, i32, truncstorei8_global
+>;
+
+defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
+ 0x0000001a, "buffer_store_short", VReg_32, i32, truncstorei16_global
+>;
+
+defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
+ 0x0000001c, "buffer_store_dword", VReg_32, i32, global_store
+>;
+
+defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
+ 0x0000001d, "buffer_store_dwordx2", VReg_64, v2i32, global_store
+>;
+
+defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
+ 0x0000001e, "buffer_store_dwordx4", VReg_128, v4i32, global_store
+>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "buffer_atomic_swap", []>;
+defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
+ 0x00000030, "buffer_atomic_swap", VReg_32, i32, atomic_swap_global
+>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "buffer_atomic_cmpswap", []>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
+ 0x00000032, "buffer_atomic_add", VReg_32, i32, atomic_add_global
+>;
+defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
+ 0x00000033, "buffer_atomic_sub", VReg_32, i32, atomic_sub_global
+>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "buffer_atomic_rsub", []>;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
+ 0x00000035, "buffer_atomic_smin", VReg_32, i32, atomic_min_global
+>;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
+ 0x00000036, "buffer_atomic_umin", VReg_32, i32, atomic_umin_global
+>;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
+ 0x00000037, "buffer_atomic_smax", VReg_32, i32, atomic_max_global
+>;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
+ 0x00000038, "buffer_atomic_umax", VReg_32, i32, atomic_umax_global
+>;
+defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
+ 0x00000039, "buffer_atomic_and", VReg_32, i32, atomic_and_global
+>;
+defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
+ 0x0000003a, "buffer_atomic_or", VReg_32, i32, atomic_or_global
+>;
+defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
+ 0x0000003b, "buffer_atomic_xor", VReg_32, i32, atomic_xor_global
+>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "buffer_atomic_inc", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "buffer_atomic_dec", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "buffer_atomic_fcmpswap", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "buffer_atomic_fmin", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "buffer_atomic_fmax", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "buffer_atomic_swap_x2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "buffer_atomic_cmpswap_x2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "buffer_atomic_add_x2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "buffer_atomic_sub_x2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "buffer_atomic_rsub_x2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "buffer_atomic_smin_x2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "buffer_atomic_umin_x2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "buffer_atomic_smax_x2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "buffer_atomic_umax_x2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "buffer_atomic_and_x2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "buffer_atomic_or_x2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "buffer_atomic_xor_x2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "buffer_atomic_inc_x2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "buffer_atomic_dec_x2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "buffer_atomic_fcmpswap_x2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "buffer_atomic_fmin_x2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "buffer_atomic_fmax_x2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "buffer_wbinvl1_sc", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "buffer_wbinvl1", []>;
//===----------------------------------------------------------------------===//
// MTBUF Instructions
//===----------------------------------------------------------------------===//
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
-def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
-def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
-def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
-def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VReg_32>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>;
//===----------------------------------------------------------------------===//
// MIMG Instructions
//===----------------------------------------------------------------------===//
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
-//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
-//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
-//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
-//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
-//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
-//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
-//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
-//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "IMAGE_GET_RESINFO">;
-//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
-//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
-//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
-//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
-//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
-//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
-//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
-//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
-//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
-//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
-//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
-//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
-//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
-//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
-//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
-//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
-//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">;
-//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">;
-//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">;
-defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">;
-//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
-//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">;
-//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">;
-//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
-//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
-//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
-//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
-//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
-//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
-//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
-//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
-//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
-//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
-//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
-//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
-//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
-//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
-//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
-//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
-//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
-//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
-//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
-defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
-defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
-defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
-defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
-defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
-defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
-defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
-defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
-defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
-//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
-//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
-//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
-//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
-//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
-//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
-//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
-//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
-//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
-//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"image_store", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"image_store_mip", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"image_atomic_swap", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"image_atomic_cmpswap", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"image_atomic_add", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"image_atomic_sub", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"image_atomic_smin", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"image_atomic_umin", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"image_atomic_smax", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"image_atomic_umax", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"image_atomic_and", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"image_atomic_or", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"image_atomic_xor", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"image_atomic_inc", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"image_atomic_dec", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
+defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
+defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
+defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
+defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
+defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
+defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
+defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
+defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
+defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
+defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
+defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
+defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
+defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
+defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
+defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
+defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
+defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">;
+defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
+defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
+defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
+defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
+defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
+defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
+defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">;
+defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
+defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
+defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
+defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">;
+defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, "image_sample_cd_o">;
+defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">;
+defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
//===----------------------------------------------------------------------===//
+// Flat Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasFlatAddressSpace] in {
+def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x00000008, "flat_load_ubyte", VReg_32>;
+def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x00000009, "flat_load_sbyte", VReg_32>;
+def FLAT_LOAD_USHORT : FLAT_Load_Helper <0x0000000a, "flat_load_ushort", VReg_32>;
+def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0x0000000b, "flat_load_sshort", VReg_32>;
+def FLAT_LOAD_DWORD : FLAT_Load_Helper <0x0000000c, "flat_load_dword", VReg_32>;
+def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0x0000000d, "flat_load_dwordx2", VReg_64>;
+def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0x0000000e, "flat_load_dwordx4", VReg_128>;
+def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0x00000010, "flat_load_dwordx3", VReg_96>;
+
+def FLAT_STORE_BYTE : FLAT_Store_Helper <
+ 0x00000018, "flat_store_byte", VReg_32
+>;
+
+def FLAT_STORE_SHORT : FLAT_Store_Helper <
+ 0x0000001a, "flat_store_short", VReg_32
+>;
+
+def FLAT_STORE_DWORD : FLAT_Store_Helper <
+ 0x0000001c, "flat_store_dword", VReg_32
+>;
+
+def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
+ 0x0000001d, "flat_store_dwordx2", VReg_64
+>;
+
+def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
+ 0x0000001e, "flat_store_dwordx4", VReg_128
+>;
+
+def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
+ 0x0000001e, "flat_store_dwordx3", VReg_96
+>;
+
+//def FLAT_ATOMIC_SWAP : FLAT_ <0x00000030, "flat_atomic_swap", []>;
+//def FLAT_ATOMIC_CMPSWAP : FLAT_ <0x00000031, "flat_atomic_cmpswap", []>;
+//def FLAT_ATOMIC_ADD : FLAT_ <0x00000032, "flat_atomic_add", []>;
+//def FLAT_ATOMIC_SUB : FLAT_ <0x00000033, "flat_atomic_sub", []>;
+//def FLAT_ATOMIC_RSUB : FLAT_ <0x00000034, "flat_atomic_rsub", []>;
+//def FLAT_ATOMIC_SMIN : FLAT_ <0x00000035, "flat_atomic_smin", []>;
+//def FLAT_ATOMIC_UMIN : FLAT_ <0x00000036, "flat_atomic_umin", []>;
+//def FLAT_ATOMIC_SMAX : FLAT_ <0x00000037, "flat_atomic_smax", []>;
+//def FLAT_ATOMIC_UMAX : FLAT_ <0x00000038, "flat_atomic_umax", []>;
+//def FLAT_ATOMIC_AND : FLAT_ <0x00000039, "flat_atomic_and", []>;
+//def FLAT_ATOMIC_OR : FLAT_ <0x0000003a, "flat_atomic_or", []>;
+//def FLAT_ATOMIC_XOR : FLAT_ <0x0000003b, "flat_atomic_xor", []>;
+//def FLAT_ATOMIC_INC : FLAT_ <0x0000003c, "flat_atomic_inc", []>;
+//def FLAT_ATOMIC_DEC : FLAT_ <0x0000003d, "flat_atomic_dec", []>;
+//def FLAT_ATOMIC_FCMPSWAP : FLAT_ <0x0000003e, "flat_atomic_fcmpswap", []>;
+//def FLAT_ATOMIC_FMIN : FLAT_ <0x0000003f, "flat_atomic_fmin", []>;
+//def FLAT_ATOMIC_FMAX : FLAT_ <0x00000040, "flat_atomic_fmax", []>;
+//def FLAT_ATOMIC_SWAP_X2 : FLAT_X2 <0x00000050, "flat_atomic_swap_x2", []>;
+//def FLAT_ATOMIC_CMPSWAP_X2 : FLAT_X2 <0x00000051, "flat_atomic_cmpswap_x2", []>;
+//def FLAT_ATOMIC_ADD_X2 : FLAT_X2 <0x00000052, "flat_atomic_add_x2", []>;
+//def FLAT_ATOMIC_SUB_X2 : FLAT_X2 <0x00000053, "flat_atomic_sub_x2", []>;
+//def FLAT_ATOMIC_RSUB_X2 : FLAT_X2 <0x00000054, "flat_atomic_rsub_x2", []>;
+//def FLAT_ATOMIC_SMIN_X2 : FLAT_X2 <0x00000055, "flat_atomic_smin_x2", []>;
+//def FLAT_ATOMIC_UMIN_X2 : FLAT_X2 <0x00000056, "flat_atomic_umin_x2", []>;
+//def FLAT_ATOMIC_SMAX_X2 : FLAT_X2 <0x00000057, "flat_atomic_smax_x2", []>;
+//def FLAT_ATOMIC_UMAX_X2 : FLAT_X2 <0x00000058, "flat_atomic_umax_x2", []>;
+//def FLAT_ATOMIC_AND_X2 : FLAT_X2 <0x00000059, "flat_atomic_and_x2", []>;
+//def FLAT_ATOMIC_OR_X2 : FLAT_X2 <0x0000005a, "flat_atomic_or_x2", []>;
+//def FLAT_ATOMIC_XOR_X2 : FLAT_X2 <0x0000005b, "flat_atomic_xor_x2", []>;
+//def FLAT_ATOMIC_INC_X2 : FLAT_X2 <0x0000005c, "flat_atomic_inc_x2", []>;
+//def FLAT_ATOMIC_DEC_X2 : FLAT_X2 <0x0000005d, "flat_atomic_dec_x2", []>;
+//def FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_X2 <0x0000005e, "flat_atomic_fcmpswap_x2", []>;
+//def FLAT_ATOMIC_FMIN_X2 : FLAT_X2 <0x0000005f, "flat_atomic_fmin_x2", []>;
+//def FLAT_ATOMIC_FMAX_X2 : FLAT_X2 <0x00000060, "flat_atomic_fmax_x2", []>;
+
+} // End HasFlatAddressSpace predicate
+//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
-//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+//def V_NOP : VOP1_ <0x00000000, "v_nop", []>;
-let neverHasSideEffects = 1, isMoveImm = 1 in {
-defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
-} // End neverHasSideEffects = 1, isMoveImm = 1
+let isMoveImm = 1 in {
+defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
+} // End isMoveImm = 1
let Uses = [EXEC] in {
@@ -1053,136 +1162,139 @@ def V_READFIRSTLANE_B32 : VOP1 <
0x00000002,
(outs SReg_32:$vdst),
(ins VReg_32:$src0),
- "V_READFIRSTLANE_B32 $vdst, $src0",
+ "v_readfirstlane_b32 $vdst, $src0",
[]
>;
}
-defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
- [(set i32:$dst, (fp_to_sint f64:$src0))]
+defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
+ VOP_I32_F64, fp_to_sint
>;
-defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32",
- [(set f64:$dst, (sint_to_fp i32:$src0))]
+defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
+ VOP_F64_I32, sint_to_fp
>;
-defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
- [(set f32:$dst, (sint_to_fp i32:$src0))]
+defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
+ VOP_F32_I32, sint_to_fp
>;
-defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
- [(set f32:$dst, (uint_to_fp i32:$src0))]
+defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
+ VOP_F32_I32, uint_to_fp
>;
-defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32",
- [(set i32:$dst, (fp_to_uint f32:$src0))]
+defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
+ VOP_I32_F32, fp_to_uint
>;
-defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
- [(set i32:$dst, (fp_to_sint f32:$src0))]
+defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
+ VOP_I32_F32, fp_to_sint
>;
-defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
-defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32",
- [(set i32:$dst, (f32_to_f16 f32:$src0))]
+defm V_MOV_FED_B32 : VOP1Inst <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
+defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
+ VOP_I32_F32, fp_to_f16
>;
-defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16",
- [(set f32:$dst, (f16_to_f32 i32:$src0))]
+defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
+ VOP_F32_I32, f16_to_fp
>;
-//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
-//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
-//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
-defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
- [(set f32:$dst, (fround f64:$src0))]
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "v_cvt_rpi_i32_f32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "v_cvt_flr_i32_f32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "v_cvt_off_f32_i4", []>;
+defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
+ VOP_F32_F64, fround
>;
-defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
- [(set f64:$dst, (fextend f32:$src0))]
+defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
+ VOP_F64_F32, fextend
>;
-defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte0 i32:$src0))]
+defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte0
>;
-defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte1 i32:$src0))]
+defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte1
>;
-defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte2 i32:$src0))]
+defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte2
>;
-defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3",
- [(set f32:$dst, (AMDGPUcvt_f32_ubyte3 i32:$src0))]
+defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
+ VOP_F32_I32, AMDGPUcvt_f32_ubyte3
>;
-defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64",
- [(set i32:$dst, (fp_to_uint f64:$src0))]
+defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
+ VOP_I32_F64, fp_to_uint
>;
-defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32",
- [(set f64:$dst, (uint_to_fp i32:$src0))]
+defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
+ VOP_F64_I32, uint_to_fp
>;
-defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
- [(set f32:$dst, (AMDGPUfract f32:$src0))]
+defm V_FRACT_F32 : VOP1Inst <vop1<0x20>, "v_fract_f32",
+ VOP_F32_F32, AMDGPUfract
>;
-defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
- [(set f32:$dst, (ftrunc f32:$src0))]
+defm V_TRUNC_F32 : VOP1Inst <vop1<0x21>, "v_trunc_f32",
+ VOP_F32_F32, ftrunc
>;
-defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
- [(set f32:$dst, (fceil f32:$src0))]
+defm V_CEIL_F32 : VOP1Inst <vop1<0x22>, "v_ceil_f32",
+ VOP_F32_F32, fceil
>;
-defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
- [(set f32:$dst, (frint f32:$src0))]
+defm V_RNDNE_F32 : VOP1Inst <vop1<0x23>, "v_rndne_f32",
+ VOP_F32_F32, frint
>;
-defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
- [(set f32:$dst, (ffloor f32:$src0))]
+defm V_FLOOR_F32 : VOP1Inst <vop1<0x24>, "v_floor_f32",
+ VOP_F32_F32, ffloor
>;
-defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
- [(set f32:$dst, (fexp2 f32:$src0))]
+defm V_EXP_F32 : VOP1Inst <vop1<0x25>, "v_exp_f32",
+ VOP_F32_F32, fexp2
>;
-defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
-defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
- [(set f32:$dst, (flog2 f32:$src0))]
+defm V_LOG_CLAMP_F32 : VOP1Inst <vop1<0x26>, "v_log_clamp_f32", VOP_F32_F32>;
+defm V_LOG_F32 : VOP1Inst <vop1<0x27>, "v_log_f32",
+ VOP_F32_F32, flog2
>;
-defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
-defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
-defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
- [(set f32:$dst, (AMDGPUrcp f32:$src0))]
+defm V_RCP_CLAMP_F32 : VOP1Inst <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
+defm V_RCP_LEGACY_F32 : VOP1Inst <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
+defm V_RCP_F32 : VOP1Inst <vop1<0x2a>, "v_rcp_f32",
+ VOP_F32_F32, AMDGPUrcp
+>;
+defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b>, "v_rcp_iflag_f32", VOP_F32_F32>;
+defm V_RSQ_CLAMP_F32 : VOP1Inst <vop1<0x2c>, "v_rsq_clamp_f32",
+ VOP_F32_F32, AMDGPUrsq_clamped
+>;
+defm V_RSQ_LEGACY_F32 : VOP1Inst <vop1<0x2d>, "v_rsq_legacy_f32",
+ VOP_F32_F32, AMDGPUrsq_legacy
>;
-defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
-defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32",
- [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))]
+defm V_RSQ_F32 : VOP1Inst <vop1<0x2e>, "v_rsq_f32",
+ VOP_F32_F32, AMDGPUrsq
>;
-defm V_RSQ_LEGACY_F32 : VOP1_32 <
- 0x0000002d, "V_RSQ_LEGACY_F32",
- [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))]
+defm V_RCP_F64 : VOP1Inst <vop1<0x2f>, "v_rcp_f64",
+ VOP_F64_F64, AMDGPUrcp
>;
-defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
- [(set f32:$dst, (AMDGPUrsq f32:$src0))]
+defm V_RCP_CLAMP_F64 : VOP1Inst <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
+defm V_RSQ_F64 : VOP1Inst <vop1<0x31>, "v_rsq_f64",
+ VOP_F64_F64, AMDGPUrsq
>;
-defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
- [(set f64:$dst, (AMDGPUrcp f64:$src0))]
+defm V_RSQ_CLAMP_F64 : VOP1Inst <vop1<0x32>, "v_rsq_clamp_f64",
+ VOP_F64_F64, AMDGPUrsq_clamped
>;
-defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
-defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
- [(set f64:$dst, (AMDGPUrsq f64:$src0))]
+defm V_SQRT_F32 : VOP1Inst <vop1<0x33>, "v_sqrt_f32",
+ VOP_F32_F32, fsqrt
>;
-defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64",
- [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))]
+defm V_SQRT_F64 : VOP1Inst <vop1<0x34>, "v_sqrt_f64",
+ VOP_F64_F64, fsqrt
>;
-defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
- [(set f32:$dst, (fsqrt f32:$src0))]
+defm V_SIN_F32 : VOP1Inst <vop1<0x35>, "v_sin_f32",
+ VOP_F32_F32, AMDGPUsin
>;
-defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
- [(set f64:$dst, (fsqrt f64:$src0))]
+defm V_COS_F32 : VOP1Inst <vop1<0x36>, "v_cos_f32",
+ VOP_F32_F32, AMDGPUcos
>;
-defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
-defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
-defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
-defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
-defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
-defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
-defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
-//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
-defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
-defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
-//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
-defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
-//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
-defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
-defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
-defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+defm V_NOT_B32 : VOP1Inst <vop1<0x37>, "v_not_b32", VOP_I32_I32>;
+defm V_BFREV_B32 : VOP1Inst <vop1<0x38>, "v_bfrev_b32", VOP_I32_I32>;
+defm V_FFBH_U32 : VOP1Inst <vop1<0x39>, "v_ffbh_u32", VOP_I32_I32>;
+defm V_FFBL_B32 : VOP1Inst <vop1<0x3a>, "v_ffbl_b32", VOP_I32_I32>;
+defm V_FFBH_I32 : VOP1Inst <vop1<0x3b>, "v_ffbh_i32", VOP_I32_I32>;
+//defm V_FREXP_EXP_I32_F64 : VOPInst <0x0000003c, "v_frexp_exp_i32_f64", VOP_I32_F32>;
+defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d>, "v_frexp_mant_f64", VOP_F64_F64>;
+defm V_FRACT_F64 : VOP1Inst <vop1<0x3e>, "v_fract_f64", VOP_F64_F64>;
+//defm V_FREXP_EXP_I32_F32 : VOPInst <0x0000003f, "v_frexp_exp_i32_f32", VOP_I32_F32>;
+defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40>, "v_frexp_mant_f32", VOP_F32_F32>;
+//def V_CLREXCP : VOP1_ <0x00000041, "v_clrexcp", []>;
+defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42>, "v_movreld_b32", VOP_I32_I32>;
+defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43>, "v_movrels_b32", VOP_I32_I32>;
+defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44>, "v_movrelsd_b32", VOP_I32_I32>;
//===----------------------------------------------------------------------===//
@@ -1193,7 +1305,7 @@ def V_INTERP_P1_F32 : VINTRP <
0x00000000,
(outs VReg_32:$dst),
(ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
+ "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
[]> {
let DisableEncoding = "$m0";
}
@@ -1202,7 +1314,7 @@ def V_INTERP_P2_F32 : VINTRP <
0x00000001,
(outs VReg_32:$dst),
(ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
+ "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
[]> {
let Constraints = "$src0 = $dst";
@@ -1214,7 +1326,7 @@ def V_INTERP_MOV_F32 : VINTRP <
0x00000002,
(outs VReg_32:$dst),
(ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
+ "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]",
[]> {
let DisableEncoding = "$m0";
}
@@ -1225,16 +1337,15 @@ def V_INTERP_MOV_F32 : VINTRP <
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
- "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
+ "v_cndmask_b32_e32 $dst, $src0, $src1, [$vcc]",
[]
>{
let DisableEncoding = "$vcc";
}
def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
- InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
+ (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
+ "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
[(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
> {
let src0_modifiers = 0;
@@ -1246,7 +1357,7 @@ def V_READLANE_B32 : VOP2 <
0x00000001,
(outs SReg_32:$vdst),
(ins VReg_32:$src0, SSrc_32:$vsrc1),
- "V_READLANE_B32 $vdst, $src0, $vsrc1",
+ "v_readlane_b32 $vdst, $src0, $vsrc1",
[]
>;
@@ -1254,245 +1365,320 @@ def V_WRITELANE_B32 : VOP2 <
0x00000002,
(outs VReg_32:$vdst),
(ins SReg_32:$src0, SSrc_32:$vsrc1),
- "V_WRITELANE_B32 $vdst, $src0, $vsrc1",
+ "v_writelane_b32 $vdst, $src0, $vsrc1",
[]
>;
let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
- [(set f32:$dst, (fadd f32:$src0, f32:$src1))]
+defm V_ADD_F32 : VOP2Inst <vop2<0x3>, "v_add_f32",
+ VOP_F32_F32_F32, fadd
>;
-defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
- [(set f32:$dst, (fsub f32:$src0, f32:$src1))]
+defm V_SUB_F32 : VOP2Inst <vop2<0x4>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
+defm V_SUBREV_F32 : VOP2Inst <vop2<0x5>, "v_subrev_f32",
+ VOP_F32_F32_F32, null_frag, "v_sub_f32"
>;
-defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
} // End isCommutable = 1
-defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
-
let isCommutable = 1 in {
-defm V_MUL_LEGACY_F32 : VOP2_32 <
- 0x00000007, "V_MUL_LEGACY_F32",
- [(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
+defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
+ VOP_F32_F32_F32
>;
-defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
- [(set f32:$dst, (fmul f32:$src0, f32:$src1))]
+defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7>, "v_mul_legacy_f32",
+ VOP_F32_F32_F32, int_AMDGPU_mul
>;
+defm V_MUL_F32 : VOP2Inst <vop2<0x8>, "v_mul_f32",
+ VOP_F32_F32_F32, fmul
+>;
-defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24",
- [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))]
+defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9>, "v_mul_i32_i24",
+ VOP_I32_I32_I32, AMDGPUmul_i24
>;
-//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
-defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24",
- [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))]
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "v_mul_hi_i32_i24", []>;
+defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb>, "v_mul_u32_u24",
+ VOP_I32_I32_I32, AMDGPUmul_u24
>;
-//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "v_mul_hi_u32_u24", []>;
-defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
- [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
+defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
+ VOP_F32_F32_F32, AMDGPUfmin_legacy
>;
-defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
- [(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
+defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
+ VOP_F32_F32_F32, AMDGPUfmax_legacy
>;
-defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
-defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
- [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
- [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
- [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
- [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>;
+defm V_MIN_F32 : VOP2Inst <vop2<0xf>, "v_min_f32", VOP_F32_F32_F32, fminnum>;
+defm V_MAX_F32 : VOP2Inst <vop2<0x10>, "v_max_f32", VOP_F32_F32_F32, fmaxnum>;
+defm V_MIN_I32 : VOP2Inst <vop2<0x11>, "v_min_i32", VOP_I32_I32_I32, AMDGPUsmin>;
+defm V_MAX_I32 : VOP2Inst <vop2<0x12>, "v_max_i32", VOP_I32_I32_I32, AMDGPUsmax>;
+defm V_MIN_U32 : VOP2Inst <vop2<0x13>, "v_min_u32", VOP_I32_I32_I32, AMDGPUumin>;
+defm V_MAX_U32 : VOP2Inst <vop2<0x14>, "v_max_u32", VOP_I32_I32_I32, AMDGPUumax>;
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
- [(set i32:$dst, (srl i32:$src0, i32:$src1))]
->;
+defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
-defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
+defm V_LSHRREV_B32 : VOP2Inst <
+ vop2<0x16>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32"
+>;
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
- [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
+ VOP_I32_I32_I32, sra
+>;
+defm V_ASHRREV_I32 : VOP2Inst <
+ vop2<0x18>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32"
>;
-defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
- [(set i32:$dst, (shl i32:$src0, i32:$src1))]
->;
+defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
}
-defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
+defm V_LSHLREV_B32 : VOP2Inst <
+ vop2<0x1a>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32"
+>;
-defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
- [(set i32:$dst, (and i32:$src0, i32:$src1))]>;
-defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
- [(set i32:$dst, (or i32:$src0, i32:$src1))]
+defm V_AND_B32 : VOP2Inst <vop2<0x1b>, "v_and_b32",
+ VOP_I32_I32_I32, and>;
+defm V_OR_B32 : VOP2Inst <vop2<0x1c>, "v_or_b32",
+ VOP_I32_I32_I32, or
>;
-defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
- [(set i32:$dst, (xor i32:$src0, i32:$src1))]
+defm V_XOR_B32 : VOP2Inst <vop2<0x1d>, "v_xor_b32",
+ VOP_I32_I32_I32, xor
>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
- [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
-defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
-defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
-defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
-defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
-defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
-defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32",
+ VOP_I32_I32_I32, AMDGPUbfm>;
+
+let isCommutable = 1 in {
+defm V_MAC_F32 : VOP2Inst <vop2<0x1f>, "v_mac_f32", VOP_F32_F32_F32>;
+} // End isCommutable = 1
+
+defm V_MADMK_F32 : VOP2Inst <vop2<0x20>, "v_madmk_f32", VOP_F32_F32_F32>;
+
+let isCommutable = 1 in {
+defm V_MADAK_F32 : VOP2Inst <vop2<0x21>, "v_madak_f32", VOP_F32_F32_F32>;
+} // End isCommutable = 1
+
+
+defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>;
+defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32",
+
+ VOP_I32_I32_I32
+>;
+defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32",
+ VOP_I32_I32_I32
+>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
- [(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
- [(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
- "V_SUB_I32">;
+defm V_ADD_I32 : VOP2bInst <vop2<0x25>, "v_add_i32",
+ VOP_I32_I32_I32, add
+>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26>, "v_sub_i32",
+ VOP_I32_I32_I32, sub
+>;
+defm V_SUBREV_I32 : VOP2bInst <vop2<0x27>, "v_subrev_i32",
+ VOP_I32_I32_I32, null_frag, "v_sub_i32"
+>;
let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32",
- [(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32",
- [(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
- "V_SUBB_U32">;
+defm V_ADDC_U32 : VOP2bInst <vop2<0x28>, "v_addc_u32",
+ VOP_I32_I32_I32_VCC, adde
+>;
+defm V_SUBB_U32 : VOP2bInst <vop2<0x29>, "v_subb_u32",
+ VOP_I32_I32_I32_VCC, sube
+>;
+defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a>, "v_subbrev_u32",
+ VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
+>;
+
} // End Uses = [VCC]
} // End isCommutable = 1, Defs = [VCC]
-defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
-////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
-////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
-////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
- [(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
+defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32",
+ VOP_F32_F32_I32, AMDGPUldexp
+>;
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32",
+ VOP_I32_F32_F32, int_SI_packf16
>;
-////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
-////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
//===----------------------------------------------------------------------===//
// VOP3 Instructions
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140>, "v_mad_legacy_f32",
+ VOP_F32_F32_F32_F32
+>;
-defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
-defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32",
- [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
+defm V_MAD_F32 : VOP3Inst <vop3<0x141>, "v_mad_f32",
+ VOP_F32_F32_F32_F32, fmad
>;
-defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
- [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))]
+
+defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142>, "v_mad_i32_i24",
+ VOP_I32_I32_I32_I32, AMDGPUmad_i24
>;
-defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
- [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))]
+defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143>, "v_mad_u32_u24",
+ VOP_I32_I32_I32_I32, AMDGPUmad_u24
>;
+} // End isCommutable = 1
-} // End neverHasSideEffects
+defm V_CUBEID_F32 : VOP3Inst <vop3<0x144>, "v_cubeid_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_CUBESC_F32 : VOP3Inst <vop3<0x145>, "v_cubesc_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_CUBETC_F32 : VOP3Inst <vop3<0x146>, "v_cubetc_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147>, "v_cubema_f32",
+ VOP_F32_F32_F32_F32
+>;
+defm V_BFE_U32 : VOP3Inst <vop3<0x148>, "v_bfe_u32",
+ VOP_I32_I32_I32_I32, AMDGPUbfe_u32
+>;
+defm V_BFE_I32 : VOP3Inst <vop3<0x149>, "v_bfe_i32",
+ VOP_I32_I32_I32_I32, AMDGPUbfe_i32
+>;
+defm V_BFI_B32 : VOP3Inst <vop3<0x14a>, "v_bfi_b32",
+ VOP_I32_I32_I32_I32, AMDGPUbfi
+>;
-defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
-defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
-defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
-defm V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+let isCommutable = 1 in {
+defm V_FMA_F32 : VOP3Inst <vop3<0x14b>, "v_fma_f32",
+ VOP_F32_F32_F32_F32, fma
+>;
+defm V_FMA_F64 : VOP3Inst <vop3<0x14c>, "v_fma_f64",
+ VOP_F64_F64_F64_F64, fma
+>;
+} // End isCommutable = 1
-let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
-defm V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32",
- [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>;
-defm V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32",
- [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
-}
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>;
+defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e>, "v_alignbit_b32",
+ VOP_I32_I32_I32_I32
+>;
+defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f>, "v_alignbyte_b32",
+ VOP_I32_I32_I32_I32
+>;
+defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
+ VOP_F32_F32_F32_F32>;
+defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
+ VOP_F32_F32_F32_F32, AMDGPUfmin3>;
-defm V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
- [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
-defm V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
- [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
->;
-def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
- [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
->;
-//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
-defm V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
-
-defm V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
-defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
-////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
-////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
-////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
-////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
-////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
-////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
-////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
-////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
-////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
-//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
-//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
-//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
-defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
- [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
->;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
- [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
->;
-
-def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
- [(set i64:$dst, (shl i64:$src0, i32:$src1))]
+defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32",
+ VOP_I32_I32_I32_I32, AMDGPUsmin3
>;
-def V_LSHR_B64 : VOP3_64_32 <0x00000162, "V_LSHR_B64",
- [(set i64:$dst, (srl i64:$src0, i32:$src1))]
+defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32",
+ VOP_I32_I32_I32_I32, AMDGPUumin3
>;
-def V_ASHR_I64 : VOP3_64_32 <0x00000163, "V_ASHR_I64",
- [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32",
+ VOP_F32_F32_F32_F32, AMDGPUfmax3
+>;
+defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32",
+ VOP_I32_I32_I32_I32, AMDGPUsmax3
+>;
+defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32",
+ VOP_I32_I32_I32_I32, AMDGPUumax3
+>;
+//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
+//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
+//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
+defm V_SAD_U32 : VOP3Inst <vop3<0x15d>, "v_sad_u32",
+ VOP_I32_I32_I32_I32
+>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
+defm V_DIV_FIXUP_F32 : VOP3Inst <
+ vop3<0x15f>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
+>;
+defm V_DIV_FIXUP_F64 : VOP3Inst <
+ vop3<0x160>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
+>;
+
+defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
+ VOP_I64_I64_I32, shl
+>;
+defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
+ VOP_I64_I64_I32, srl
+>;
+defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
+ VOP_I64_I64_I32, sra
>;
let isCommutable = 1 in {
-def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
-def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
-def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
-def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+defm V_ADD_F64 : VOP3Inst <vop3<0x164>, "v_add_f64",
+ VOP_F64_F64_F64, fadd
+>;
+defm V_MUL_F64 : VOP3Inst <vop3<0x165>, "v_mul_f64",
+ VOP_F64_F64_F64, fmul
+>;
+
+defm V_MIN_F64 : VOP3Inst <vop3<0x166>, "v_min_f64",
+ VOP_F64_F64_F64, fminnum
+>;
+defm V_MAX_F64 : VOP3Inst <vop3<0x167>, "v_max_f64",
+ VOP_F64_F64_F64, fmaxnum
+>;
} // isCommutable = 1
-def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+defm V_LDEXP_F64 : VOP3Inst <vop3<0x168>, "v_ldexp_f64",
+ VOP_F64_F64_I32, AMDGPUldexp
+>;
let isCommutable = 1 in {
-defm V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
-defm V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
-defm V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
-defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169>, "v_mul_lo_u32",
+ VOP_I32_I32_I32
+>;
+defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a>, "v_mul_hi_u32",
+ VOP_I32_I32_I32
+>;
+defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b>, "v_mul_lo_i32",
+ VOP_I32_I32_I32
+>;
+defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c>, "v_mul_hi_i32",
+ VOP_I32_I32_I32
+>;
} // isCommutable = 1
-def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d>, "v_div_scale_f32", []>;
// Double precision division pre-scale.
-def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e>, "v_div_scale_f64", []>;
-defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
- [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
+let isCommutable = 1 in {
+defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f>, "v_div_fmas_f32",
+ VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64",
- [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))]
+defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170>, "v_div_fmas_f64",
+ VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
>;
-//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
-//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
-//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64",
- [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))]
+} // End isCommutable = 1
+
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>;
+
+defm V_TRIG_PREOP_F64 : VOP3Inst <
+ vop3<0x174>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
>;
//===----------------------------------------------------------------------===//
@@ -1517,6 +1703,15 @@ def V_OR_I1 : InstSI <
[(set i1:$dst, (or i1:$src0, i1:$src1))]
>;
+def V_XOR_I1 : InstSI <
+ (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
+ [(set i1:$dst, (xor i1:$src0, i1:$src1))]
+>;
+
+let hasSideEffects = 1 in {
+def SGPR_USE : InstSI <(outs),(ins), "", []>;
+}
+
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
@@ -1544,7 +1739,7 @@ def SI_ELSE : InstSI <
def SI_LOOP : InstSI <
(outs),
(ins SReg_64:$saved, brtarget:$target),
- "SI_LOOP $saved, $target",
+ "si_loop $saved, $target",
[(int_SI_loop i64:$saved, bb:$target)]
>;
@@ -1553,35 +1748,35 @@ def SI_LOOP : InstSI <
def SI_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src),
- "SI_ELSE $dst, $src",
+ "si_else $dst, $src",
[(set i64:$dst, (int_SI_break i64:$src))]
>;
def SI_IF_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, SReg_64:$src),
- "SI_IF_BREAK $dst, $vcc, $src",
+ "si_if_break $dst, $vcc, $src",
[(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
>;
def SI_ELSE_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src0, SReg_64:$src1),
- "SI_ELSE_BREAK $dst, $src0, $src1",
+ "si_else_break $dst, $src0, $src1",
[(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
>;
def SI_END_CF : InstSI <
(outs),
(ins SReg_64:$saved),
- "SI_END_CF $saved",
+ "si_end_cf $saved",
[(int_SI_end_cf i64:$saved)]
>;
def SI_KILL : InstSI <
(outs),
(ins VSrc_32:$src),
- "SI_KILL $src",
+ "si_kill $src",
[(int_AMDGPU_kill f32:$src)]
>;
@@ -1623,14 +1818,14 @@ def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
def SI_INDIRECT_SRC : InstSI <
(outs VReg_32:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
- "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ "si_indirect_src $dst, $temp, $src, $idx, $off",
[]
>;
class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
(outs rc:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
- "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ "si_indirect_dst $dst, $temp, $src, $idx, $off, $val",
[]
> {
let Constraints = "$src = $dst";
@@ -1646,18 +1841,10 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
let usesCustomInserter = 1 in {
-// This pseudo instruction takes a pointer as input and outputs a resource
-// constant that can be used with the ADDR64 MUBUF instructions.
-def SI_ADDR64_RSRC : InstSI <
- (outs SReg_128:$srsrc),
- (ins SSrc_64:$ptr),
- "", []
->;
-
def V_SUB_F64 : InstSI <
(outs VReg_64:$dst),
(ins VReg_64:$src0, VReg_64:$src1),
- "V_SUB_F64 $dst, $src0, $src1",
+ "v_sub_f64 $dst, $src0, $src1",
[(set f64:$dst, (fsub f64:$src0, f64:$src1))]
>;
@@ -1666,14 +1853,14 @@ def V_SUB_F64 : InstSI <
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
def _SAVE : InstSI <
- (outs VReg_32:$dst),
+ (outs),
(ins sgpr_class:$src, i32imm:$frame_idx),
"", []
>;
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
- (ins VReg_32:$src, i32imm:$frame_idx),
+ (ins i32imm:$frame_idx),
"", []
>;
@@ -1685,6 +1872,37 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
+ def _SAVE : InstSI <
+ (outs),
+ (ins vgpr_class:$src, i32imm:$frame_idx),
+ "", []
+ >;
+
+ def _RESTORE : InstSI <
+ (outs vgpr_class:$dst),
+ (ins i32imm:$frame_idx),
+ "", []
+ >;
+}
+
+defm SI_SPILL_V32 : SI_SPILL_VGPR <VReg_32>;
+defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
+defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
+defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
+defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
+defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
+
+let Defs = [SCC] in {
+
+def SI_CONSTDATA_PTR : InstSI <
+ (outs SReg_64:$dst),
+ (ins),
+ "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
+>;
+
+} // End Defs = [SCC]
+
} // end IsCodeGenOnly, isPseudo
} // end SubtargetPredicate = SI
@@ -1693,7 +1911,9 @@ let Predicates = [isSI] in {
def : Pat<
(int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
- (V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
+ (V_CNDMASK_B32_e64 $src2, $src1,
+ (V_CMP_GT_F32_e64 SRCMODS.NONE, 0, SRCMODS.NONE, $src0,
+ DSTCLAMP.NONE, DSTOMOD.NONE))
>;
def : Pat <
@@ -1766,27 +1986,26 @@ def : Pat <
// SOP1 Patterns
//===----------------------------------------------------------------------===//
-let Predicates = [isSI, isCFDepth0] in {
-
def : Pat <
(i64 (ctpop i64:$src)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_BCNT1_I32_B64 $src), sub0),
- (S_MOV_B32 0), sub1)
+ (i64 (REG_SEQUENCE SReg_64,
+ (S_BCNT1_I32_B64 $src), sub0,
+ (S_MOV_B32 0), sub1))
>;
-} // Predicates = [isSI, isCFDepth0]
-
-let Predicates = [isSI] in {
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
+// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
+// case, the sgpr-copies pass will fix this to use the vector version.
def : Pat <
- (i1 (xor i1:$src0, i1:$src1)),
- (S_XOR_B64 $src0, $src1)
+ (i32 (addc i32:$src0, i32:$src1)),
+ (S_ADD_U32 $src0, $src1)
>;
+let Predicates = [isSI] in {
+
//===----------------------------------------------------------------------===//
// SOPP Patterns
//===----------------------------------------------------------------------===//
@@ -1800,67 +2019,106 @@ def : Pat <
// VOP1 Patterns
//===----------------------------------------------------------------------===//
-def : RcpPat<V_RCP_F32_e32, f32>;
+let Predicates = [UnsafeFPMath] in {
def : RcpPat<V_RCP_F64_e32, f64>;
-defm : RsqPat<V_RSQ_F32_e32, f32>;
defm : RsqPat<V_RSQ_F64_e32, f64>;
+defm : RsqPat<V_RSQ_F32_e32, f32>;
+}
//===----------------------------------------------------------------------===//
// VOP2 Patterns
//===----------------------------------------------------------------------===//
-class BinOp64Pat <SDNode node, Instruction inst> : Pat <
- (node i64:$src0, i64:$src1),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (inst (EXTRACT_SUBREG i64:$src0, sub0),
- (EXTRACT_SUBREG i64:$src1, sub0)), sub0),
- (inst (EXTRACT_SUBREG i64:$src0, sub1),
- (EXTRACT_SUBREG i64:$src1, sub1)), sub1)
->;
-
-def : BinOp64Pat <or, V_OR_B32_e32>;
-def : BinOp64Pat <xor, V_XOR_B32_e32>;
-
-class SextInReg <ValueType vt, int ShiftAmt> : Pat <
- (sext_inreg i32:$src0, vt),
- (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
->;
-
-def : SextInReg <i8, 24>;
-def : SextInReg <i16, 16>;
-
def : Pat <
(i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
- (V_BCNT_U32_B32_e32 $popcnt, $val)
->;
-
-def : Pat <
- (i32 (ctpop i32:$popcnt)),
- (V_BCNT_U32_B32_e64 $popcnt, 0, 0, 0)
->;
-
-def : Pat <
- (i64 (ctpop i64:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (V_BCNT_U32_B32_e32 (EXTRACT_SUBREG $src, sub1),
- (V_BCNT_U32_B32_e64 (EXTRACT_SUBREG $src, sub0), 0, 0, 0)),
- sub0),
- (V_MOV_B32_e32 0), sub1)
+ (V_BCNT_U32_B32_e64 $popcnt, $val)
>;
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
+// Image + sampler
class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, i32:$dmask, i32:$unorm,
+ (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
(opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
(as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
$addr, $rsrc, $sampler)
>;
+multiclass SampleRawPatterns<SDPatternOperator name, string opcode> {
+ def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
+ def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
+ def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
+ def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>;
+ def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>;
+}
+
+// Image only
+class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v8i32:$rsrc, i32:$dmask, i32:$unorm,
+ i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+ (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+ (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+ $addr, $rsrc)
+>;
+
+multiclass ImagePatterns<SDPatternOperator name, string opcode> {
+ def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
+ def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
+ def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
+}
+
+// Basic sample
+defm : SampleRawPatterns<int_SI_image_sample, "IMAGE_SAMPLE">;
+defm : SampleRawPatterns<int_SI_image_sample_cl, "IMAGE_SAMPLE_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_d, "IMAGE_SAMPLE_D">;
+defm : SampleRawPatterns<int_SI_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_l, "IMAGE_SAMPLE_L">;
+defm : SampleRawPatterns<int_SI_image_sample_b, "IMAGE_SAMPLE_B">;
+defm : SampleRawPatterns<int_SI_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_lz, "IMAGE_SAMPLE_LZ">;
+defm : SampleRawPatterns<int_SI_image_sample_cd, "IMAGE_SAMPLE_CD">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">;
+
+// Sample with comparison
+defm : SampleRawPatterns<int_SI_image_sample_c, "IMAGE_SAMPLE_C">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d, "IMAGE_SAMPLE_C_D">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_l, "IMAGE_SAMPLE_C_L">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b, "IMAGE_SAMPLE_C_B">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">;
+
+// Sample with offsets
+defm : SampleRawPatterns<int_SI_image_sample_o, "IMAGE_SAMPLE_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_d_o, "IMAGE_SAMPLE_D_O">;
+defm : SampleRawPatterns<int_SI_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_l_o, "IMAGE_SAMPLE_L_O">;
+defm : SampleRawPatterns<int_SI_image_sample_b_o, "IMAGE_SAMPLE_B_O">;
+defm : SampleRawPatterns<int_SI_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">;
+
+// Sample with comparison and offsets
+defm : SampleRawPatterns<int_SI_image_sample_c_o, "IMAGE_SAMPLE_C_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
+
+// Gather opcodes
// Only the variants which make sense are defined.
def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>;
def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>;
@@ -1905,6 +2163,10 @@ def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
+def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
+defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">;
+defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
+
/* SIsample for simple 1D texture lookup */
def : Pat <
(SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
@@ -2143,62 +2405,63 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
/********** Src & Dst modifiers **********/
/********** =================== **********/
-def FCLAMP_SI : AMDGPUShaderInst <
- (outs VReg_32:$dst),
- (ins VSrc_32:$src0),
- "FCLAMP_SI $dst, $src0",
- []
-> {
- let usesCustomInserter = 1;
-}
-
def : Pat <
- (AMDGPUclamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
- (FCLAMP_SI f32:$src)
+ (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
+ (f32 FP_ZERO), (f32 FP_ONE)),
+ (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod)
>;
/********** ================================ **********/
/********** Floating point absolute/negative **********/
/********** ================================ **********/
-// Manipulate the sign bit directly, as e.g. using the source negation modifier
-// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
-// breaking the piglit *s-floatBitsToInt-neg* tests
-
-// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
-// removing these patterns
+// Prevent expanding both fneg and fabs.
+// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f32:$src)),
(V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
>;
-def FABS_SI : AMDGPUShaderInst <
- (outs VReg_32:$dst),
- (ins VSrc_32:$src0),
- "FABS_SI $dst, $src0",
- []
-> {
- let usesCustomInserter = 1;
-}
+// FIXME: Should use S_OR_B32
+def : Pat <
+ (fneg (fabs f64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ sub0,
+ (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x80000000)), // Set sign bit.
+ sub1)
+>;
def : Pat <
(fabs f32:$src),
- (FABS_SI f32:$src)
+ (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff))
>;
-def FNEG_SI : AMDGPUShaderInst <
- (outs VReg_32:$dst),
- (ins VSrc_32:$src0),
- "FNEG_SI $dst, $src0",
- []
-> {
- let usesCustomInserter = 1;
-}
-
def : Pat <
(fneg f32:$src),
- (FNEG_SI f32:$src)
+ (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000))
+>;
+
+def : Pat <
+ (fabs f64:$src),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ sub0,
+ (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x7fffffff)), // Set sign bit.
+ sub1)
+>;
+
+def : Pat <
+ (fneg f64:$src),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ sub0,
+ (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x80000000)),
+ sub1)
>;
/********** ================== **********/
@@ -2260,44 +2523,31 @@ def : Pat <
>;
def : Pat<
- (fdiv f32:$src0, f32:$src1),
- (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
->;
-
-def : Pat<
(fdiv f64:$src0, f64:$src1),
- (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0))
->;
-
-def : Pat <
- (fcos f32:$src0),
- (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
->;
-
-def : Pat <
- (fsin f32:$src0),
- (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+ (V_MUL_F64 0 /* src0_modifiers */, $src0,
+ 0 /* src1_modifiers */, (V_RCP_F64_e32 $src1),
+ 0 /* clamp */, 0 /* omod */)
>;
def : Pat <
(int_AMDGPU_cube v4f32:$src),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub0),
- (V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub1),
- (V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub2),
- (V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0),
- (EXTRACT_SUBREG $src, sub1),
- (EXTRACT_SUBREG $src, sub2)),
- sub3)
+ (REG_SEQUENCE VReg_128,
+ (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1),
+ 0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub0,
+ (V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+ 0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub1,
+ (V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub2,
+ (V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
+ 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+ 0 /* clamp */, 0 /* omod */), sub3)
>;
def : Pat <
@@ -2316,7 +2566,7 @@ def : Ext32Pat <anyext>;
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
+ (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
@@ -2330,7 +2580,7 @@ def : Pat <
def : Pat <
(int_SI_tid),
(V_MBCNT_HI_U32_B32_e32 0xffffffff,
- (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0))
+ (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
>;
//===----------------------------------------------------------------------===//
@@ -2341,84 +2591,73 @@ def : IMad24Pat<V_MAD_I32_I24>;
def : UMad24Pat<V_MAD_U32_U24>;
def : Pat <
- (fadd f64:$src0, f64:$src1),
- (V_ADD_F64 $src0, $src1, (i64 0))
->;
-
-def : Pat <
- (fmul f64:$src0, f64:$src1),
- (V_MUL_F64 $src0, $src1, (i64 0))
->;
-
-def : Pat <
- (mul i32:$src0, i32:$src1),
- (V_MUL_LO_I32 $src0, $src1, (i32 0))
->;
-
-def : Pat <
(mulhu i32:$src0, i32:$src1),
- (V_MUL_HI_U32 $src0, $src1, (i32 0))
+ (V_MUL_HI_U32 $src0, $src1)
>;
def : Pat <
(mulhs i32:$src0, i32:$src1),
- (V_MUL_HI_I32 $src0, $src1, (i32 0))
+ (V_MUL_HI_I32 $src0, $src1)
>;
-defm : BFIPatterns <V_BFI_B32, S_MOV_B32>;
+def : Vop3ModPat<V_MAD_F32, VOP_F32_F32_F32_F32, AMDGPUmad>;
+
+
+defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
/********** ======================= **********/
/********** Load/Store Patterns **********/
/********** ======================= **********/
-multiclass DSReadPat <DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (vt (frag (add i32:$ptr, (i32 IMM16bit:$offset)))),
- (inst (i1 0), $ptr, (as_i16imm $offset))
- >;
+class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
+ (inst (i1 0), $ptr, (as_i16imm $offset))
+>;
- def : Pat <
- (frag i32:$src0),
- (vt (inst 0, $src0, 0))
- >;
-}
+def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
+def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
+def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
+def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
+def : DSReadPat <DS_READ_B32, i32, local_load>;
-defm : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
-defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
-defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
-defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
-defm : DSReadPat <DS_READ_B32, i32, local_load>;
-defm : DSReadPat <DS_READ_B64, v2i32, local_load>;
+let AddedComplexity = 100 in {
-multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag vt:$value, (add i32:$ptr, (i32 IMM16bit:$offset))),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
- >;
+def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
- def : Pat <
- (frag vt:$val, i32:$ptr),
- (inst 0, $ptr, $val, 0)
- >;
-}
+} // End AddedComplexity = 100
-defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
-defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
-defm : DSWritePat <DS_WRITE_B32, i32, local_store>;
-defm : DSWritePat <DS_WRITE_B64, v2i32, local_store>;
+def : Pat <
+ (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ i8:$offset1))),
+ (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1)
+>;
-multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
- >;
+class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+>;
- def : Pat <
- (frag i32:$ptr, vt:$val),
- (inst 0, $ptr, $val, 0)
- >;
-}
+def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
+def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
+def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+
+let AddedComplexity = 100 in {
+
+def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
+} // End AddedComplexity = 100
+
+def : Pat <
+ (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ i8:$offset1)),
+ (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
+>;
+
+class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
//
@@ -2430,69 +2669,56 @@ multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
// needs to be a VGPR. The SGPR copy pass will fix this, and it's
// easier since there is no v_mov_b64.
-multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
- Instruction LoadImm, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
- >;
-
- def : Pat <
- (frag i32:$ptr, (vt 1)),
- (inst 0, $ptr, (LoadImm (vt -1)), 0)
- >;
-}
+class DSAtomicIncRetPat<DS inst, ValueType vt,
+ Instruction LoadImm, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
+ (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
+>;
-multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
- def : Pat <
- (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
- >;
- def : Pat <
- (frag i32:$ptr, vt:$cmp, vt:$swap),
- (inst 0, $ptr, $cmp, $swap, 0)
- >;
-}
+class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
+ (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
+>;
// 32-bit atomics.
-defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, atomic_load_add_local>;
-defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, atomic_load_sub_local>;
-
-defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
-defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
-defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
-defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
-defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
-
-defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_sub_local>;
+
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
+
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
// 64-bit atomics.
-defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, atomic_load_add_local>;
-defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, atomic_load_sub_local>;
+def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_add_local>;
+def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
-defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
-defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
-defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
-defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
-defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
-defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
-defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
-defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
//===----------------------------------------------------------------------===//
@@ -2502,43 +2728,50 @@ defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> {
def : Pat <
- (vt (constant_ld (add i64:$ptr, i64:$offset))),
- (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
+ (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))),
+ (Instr_ADDR64 $srsrc, $vaddr, $offset)
>;
}
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32,
- sextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32,
- az_extloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32,
- sextloadi16_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32,
- az_extloadi16_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
- constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
- constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
- constant_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
+
+class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
+ (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
+ i32:$soffset, u16imm:$offset))),
+ (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+>;
+
+def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
MUBUF bothen> {
def : Pat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (offset $rsrc, (as_i16imm $offset), $soffset, (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
def : Pat <
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm, 1, 0, imm:$glc, imm:$slc,
+ imm:$offset, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
+ (offen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
@@ -2566,6 +2799,32 @@ defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
+ (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
+ u16imm:$offset)),
+ (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+>;
+
+def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+
+/*
+class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
+ (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)),
+ (Instr $value, $srsrc, $vaddr, $offset)
+>;
+
+def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
+def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
+
+*/
+
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//
@@ -2590,28 +2849,39 @@ def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
let SubtargetPredicate = isCI in {
// Sea island new arithmetic instructinos
-let neverHasSideEffects = 1 in {
-defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64",
- [(set f64:$dst, (ftrunc f64:$src0))]
+defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
+ VOP_F64_F64, ftrunc
>;
-defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64",
- [(set f64:$dst, (fceil f64:$src0))]
+defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
+ VOP_F64_F64, fceil
>;
-defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64",
- [(set f64:$dst, (ffloor f64:$src0))]
+defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
+ VOP_F64_F64, ffloor
>;
-defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64",
- [(set f64:$dst, (frint f64:$src0))]
+defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
+ VOP_F64_F64, frint
>;
-defm V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
-defm V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
-defm V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
-def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>;
+defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
+ VOP_I32_I32_I32
+>;
+defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
+ VOP_I32_I32_I32
+>;
+defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
+ VOP_I32_I32_I32
+>;
+
+let isCommutable = 1 in {
+defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
+ VOP_I64_I32_I32_I64
+>;
// XXX - Does this set VCC?
-def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
-} // End neverHasSideEffects = 1
+defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
+ VOP_I64_I32_I32_I64
+>;
+} // End isCommutable = 1
// Remaining instructions:
// FLAT_*
@@ -2636,6 +2906,37 @@ def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
} // End iSCI
+//===----------------------------------------------------------------------===//
+// Flat Patterns
+//===----------------------------------------------------------------------===//
+
+class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
+ PatFrag flat_ld> :
+ Pat <(vt (flat_ld i64:$ptr)),
+ (Instr_ADDR64 $ptr)
+>;
+
+def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
+def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
+
+class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
+ Pat <(st vt:$value, i64:$ptr),
+ (Instr $value, $ptr)
+ >;
+
+def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
+def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
+def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
+def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
/********** ====================== **********/
/********** Indirect adressing **********/
@@ -2685,44 +2986,37 @@ defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
def : Pat<(i32 (sext_inreg i32:$src, i1)),
(S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
-// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
-// might not be worth the effort, and will need to expand to shifts when
-// fixing SGPR copies.
-
// Handle sext_inreg in i64
def : Pat <
(i64 (sext_inreg i64:$src, i1)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0), // 0 | 1 << 16
- (S_MOV_B32 -1), sub1)
+ (S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16
>;
def : Pat <
(i64 (sext_inreg i64:$src, i8)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
- (S_MOV_B32 -1), sub1)
+ (S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16
>;
def : Pat <
(i64 (sext_inreg i64:$src, i16)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
- (S_MOV_B32 -1), sub1)
+ (S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16
+>;
+
+def : Pat <
+ (i64 (sext_inreg i64:$src, i32)),
+ (S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16
>;
class ZExt_i64_i32_Pat <SDNode ext> : Pat <
(i64 (ext i32:$src)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
- (S_MOV_B32 0), sub1)
+ (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1)
>;
class ZExt_i64_i1_Pat <SDNode ext> : Pat <
(i64 (ext i1:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0),
- (S_MOV_B32 0), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
+ (S_MOV_B32 0), sub1)
>;
@@ -2733,17 +3027,14 @@ def : ZExt_i64_i1_Pat<anyext>;
def : Pat <
(i64 (sext i32:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
- (S_ASHR_I32 $src, 31), sub1)
+ (REG_SEQUENCE SReg_64, $src, sub0,
+ (S_ASHR_I32 $src, 31), sub1)
>;
def : Pat <
(i64 (sext i1:$src)),
- (INSERT_SUBREG
- (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)),
- (V_CNDMASK_B32_e64 0, -1, $src), sub0),
+ (REG_SEQUENCE VReg_64,
+ (V_CNDMASK_B32_e64 0, -1, $src), sub0,
(V_CNDMASK_B32_e64 0, -1, $src), sub1)
>;
@@ -2778,20 +3069,20 @@ def : Pat <
def : Pat <
(i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+ (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
>;
-// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
-// case, the sgpr-copies pass will fix this to use the vector version.
def : Pat <
- (i32 (addc i32:$src0, i32:$src1)),
- (S_ADD_I32 $src0, $src1)
+ (i32 (bswap i32:$a)),
+ (V_BFI_B32 (S_MOV_B32 0x00ff00ff),
+ (V_ALIGNBIT_B32 $a, $a, 24),
+ (V_ALIGNBIT_B32 $a, $a, 8))
>;
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
-def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index df690a4..027a0a2 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -54,14 +54,12 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
-
// Fully-flexible SAMPLE instruction.
class SampleRaw : Intrinsic <
[llvm_v4f32_ty], // vdata(VGPR)
[llvm_anyint_ty, // vaddr(VGPR)
- llvm_v32i8_ty, // rsrc(SGPR)
- llvm_v16i8_ty, // sampler(SGPR)
+ llvm_v8i32_ty, // rsrc(SGPR)
+ llvm_v4i32_ty, // sampler(SGPR)
llvm_i32_ty, // dmask(imm)
llvm_i32_ty, // unorm(imm)
llvm_i32_ty, // r128(imm)
@@ -72,10 +70,68 @@ let TargetPrefix = "SI", isTarget = 1 in {
llvm_i32_ty], // lwe(imm)
[IntrNoMem]>;
- def int_SI_sample : Sample;
- def int_SI_sampleb : Sample;
- def int_SI_sampled : Sample;
- def int_SI_samplel : Sample;
+ // Image instruction without a sampler.
+ class Image : Intrinsic <
+ [llvm_v4f32_ty], // vdata(VGPR)
+ [llvm_anyint_ty, // vaddr(VGPR)
+ llvm_v8i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // dmask(imm)
+ llvm_i32_ty, // unorm(imm)
+ llvm_i32_ty, // r128(imm)
+ llvm_i32_ty, // da(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty, // tfe(imm)
+ llvm_i32_ty], // lwe(imm)
+ [IntrNoMem]>;
+
+ // Basic sample
+ def int_SI_image_sample : SampleRaw;
+ def int_SI_image_sample_cl : SampleRaw;
+ def int_SI_image_sample_d : SampleRaw;
+ def int_SI_image_sample_d_cl : SampleRaw;
+ def int_SI_image_sample_l : SampleRaw;
+ def int_SI_image_sample_b : SampleRaw;
+ def int_SI_image_sample_b_cl : SampleRaw;
+ def int_SI_image_sample_lz : SampleRaw;
+ def int_SI_image_sample_cd : SampleRaw;
+ def int_SI_image_sample_cd_cl : SampleRaw;
+
+ // Sample with comparison
+ def int_SI_image_sample_c : SampleRaw;
+ def int_SI_image_sample_c_cl : SampleRaw;
+ def int_SI_image_sample_c_d : SampleRaw;
+ def int_SI_image_sample_c_d_cl : SampleRaw;
+ def int_SI_image_sample_c_l : SampleRaw;
+ def int_SI_image_sample_c_b : SampleRaw;
+ def int_SI_image_sample_c_b_cl : SampleRaw;
+ def int_SI_image_sample_c_lz : SampleRaw;
+ def int_SI_image_sample_c_cd : SampleRaw;
+ def int_SI_image_sample_c_cd_cl : SampleRaw;
+
+ // Sample with offsets
+ def int_SI_image_sample_o : SampleRaw;
+ def int_SI_image_sample_cl_o : SampleRaw;
+ def int_SI_image_sample_d_o : SampleRaw;
+ def int_SI_image_sample_d_cl_o : SampleRaw;
+ def int_SI_image_sample_l_o : SampleRaw;
+ def int_SI_image_sample_b_o : SampleRaw;
+ def int_SI_image_sample_b_cl_o : SampleRaw;
+ def int_SI_image_sample_lz_o : SampleRaw;
+ def int_SI_image_sample_cd_o : SampleRaw;
+ def int_SI_image_sample_cd_cl_o : SampleRaw;
+
+ // Sample with comparison and offsets
+ def int_SI_image_sample_c_o : SampleRaw;
+ def int_SI_image_sample_c_cl_o : SampleRaw;
+ def int_SI_image_sample_c_d_o : SampleRaw;
+ def int_SI_image_sample_c_d_cl_o : SampleRaw;
+ def int_SI_image_sample_c_l_o : SampleRaw;
+ def int_SI_image_sample_c_b_o : SampleRaw;
+ def int_SI_image_sample_c_b_cl_o : SampleRaw;
+ def int_SI_image_sample_c_lz_o : SampleRaw;
+ def int_SI_image_sample_c_cd_o : SampleRaw;
+ def int_SI_image_sample_c_cd_cl_o : SampleRaw;
// Basic gather4
def int_SI_gather4 : SampleRaw;
@@ -111,8 +167,19 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_getlod : SampleRaw;
- def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ // Image instrinsics.
+ def int_SI_image_load : Image;
+ def int_SI_image_load_mip : Image;
+ def int_SI_getresinfo : Image;
+ // Deprecated image and sample intrinsics.
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_SI_sample : Sample;
+ def int_SI_sampleb : Sample;
+ def int_SI_sampled : Sample;
+ def int_SI_samplel : Sample;
+ def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
/* Interpolation Intrinsics */
diff --git a/lib/Target/R600/SILoadStoreOptimizer.cpp b/lib/Target/R600/SILoadStoreOptimizer.cpp
new file mode 100644
index 0000000..4140196
--- /dev/null
+++ b/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -0,0 +1,417 @@
+//===-- SILoadStoreOptimizer.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to fuse DS instructions with close by immediate offsets.
+// This will fuse operations such as
+// ds_read_b32 v0, v2 offset:16
+// ds_read_b32 v1, v2 offset:32
+// ==>
+// ds_read2_b32 v[0:1], v2, offset0:4 offset1:8
+//
+//
+// Future improvements:
+//
+// - This currently relies on the scheduler to place loads and stores next to
+// each other, and then only merges adjacent pairs of instructions. It would
+// be good to be more flexible with interleaved instructions, and possibly run
+// before scheduling. It currently missing stores of constants because loading
+// the constant into the data register is placed between the stores, although
+// this is arguably a scheduling problem.
+//
+// - Live interval recomputing seems inefficient. This currently only matches
+// one pair, and recomputes live intervals and moves on to the next pair. It
+// would be better to compute a list of all merges that need to occur
+//
+// - With a list of instructions to process, we can also merge more. If a
+// cluster of loads have offsets that are too large to fit in the 8-bit
+// offsets, but are close enough to fit in the 8 bits, we can add to the base
+// pointer and use the new reduced offsets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-load-store-opt"
+
+namespace {
+
+class SILoadStoreOptimizer : public MachineFunctionPass {
+private:
+ const TargetMachine *TM;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+
+
+ static bool offsetsCanBeCombined(unsigned Offset0,
+ unsigned Offset1,
+ unsigned EltSize);
+
+ MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I,
+ unsigned EltSize);
+
+ void updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx);
+
+ MachineBasicBlock::iterator mergeRead2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize);
+
+ MachineBasicBlock::iterator mergeWrite2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize);
+
+public:
+ static char ID;
+
+ SILoadStoreOptimizer() :
+ MachineFunctionPass(ID),
+ TM(nullptr),
+ TII(nullptr),
+ TRI(nullptr),
+ MRI(nullptr),
+ LIS(nullptr) {
+
+ }
+
+ SILoadStoreOptimizer(const TargetMachine &TM_) :
+ MachineFunctionPass(ID),
+ TM(&TM_),
+ TII(static_cast<const SIInstrInfo*>(TM->getSubtargetImpl()->getInstrInfo())) {
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool optimizeBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI Load / Store Optimizer";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<LiveIntervals>();
+
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
+ "SI Load / Store Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
+ "SI Load / Store Optimizer", false, false)
+
+char SILoadStoreOptimizer::ID = 0;
+
+char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID;
+
+FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) {
+ return new SILoadStoreOptimizer(TM);
+}
+
+bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
+ unsigned Offset1,
+ unsigned Size) {
+ // XXX - Would the same offset be OK? Is there any reason this would happen or
+ // be useful?
+ if (Offset0 == Offset1)
+ return false;
+
+ // This won't be valid if the offset isn't aligned.
+ if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
+ return false;
+
+ unsigned EltOffset0 = Offset0 / Size;
+ unsigned EltOffset1 = Offset1 / Size;
+
+ // Check if the new offsets fit in the reduced 8-bit range.
+ if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1))
+ return true;
+
+ // If the offset in elements doesn't fit in 8-bits, we might be able to use
+ // the stride 64 versions.
+ if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
+ return false;
+
+ return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64);
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
+ unsigned EltSize){
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator MBBI = I;
+ ++MBBI;
+
+ if (MBBI->getOpcode() != I->getOpcode())
+ return E;
+
+ // Don't merge volatiles.
+ if (MBBI->hasOrderedMemoryRef())
+ return E;
+
+ int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
+ const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
+ const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
+
+ // Check same base pointer. Be careful of subregisters, which can occur with
+ // vectors of pointers.
+ if (AddrReg0.getReg() == AddrReg1.getReg() &&
+ AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
+ int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
+ AMDGPU::OpName::offset);
+ unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
+ unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+
+ // Check both offsets fit in the reduced range.
+ if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
+ return MBBI;
+ }
+
+ return E;
+}
+
+void SILoadStoreOptimizer::updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx) {
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg),
+ E = MRI->reg_end(); I != E; ) {
+ MachineOperand &O = *I;
+ ++I;
+ O.substVirtReg(DstReg, SubIdx, *TRI);
+ }
+}
+
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize) {
+ MachineBasicBlock *MBB = I->getParent();
+
+ // Be careful, since the addresses could be subregisters themselves in weird
+ // cases, like vectors of pointers.
+ const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+
+ unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
+ unsigned DestReg1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();
+
+ unsigned Offset0
+ = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+ unsigned Offset1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
+
+ unsigned NewOffset0 = Offset0 / EltSize;
+ unsigned NewOffset1 = Offset1 / EltSize;
+ unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
+
+ // Prefer the st64 form if we can use it, even if we can fit the offset in the
+ // non st64 version. I'm not sure if there's any real reason to do this.
+ bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
+ if (UseST64) {
+ NewOffset0 /= 64;
+ NewOffset1 /= 64;
+ Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
+ }
+
+ assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
+ (NewOffset0 != NewOffset1) &&
+ "Computed offset doesn't fit");
+
+ const MCInstrDesc &Read2Desc = TII->get(Opc);
+
+ const TargetRegisterClass *SuperRC
+ = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+ unsigned DestReg = MRI->createVirtualRegister(SuperRC);
+
+ DebugLoc DL = I->getDebugLoc();
+ MachineInstrBuilder Read2
+ = BuildMI(*MBB, I, DL, Read2Desc, DestReg)
+ .addImm(0) // gds
+ .addOperand(*AddrReg) // addr
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addMemOperand(*I->memoperands_begin())
+ .addMemOperand(*Paired->memoperands_begin());
+
+ LIS->InsertMachineInstrInMaps(Read2);
+
+ unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+ unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+ updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
+ updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
+
+ LIS->RemoveMachineInstrFromMaps(I);
+ LIS->RemoveMachineInstrFromMaps(Paired);
+ I->eraseFromParent();
+ Paired->eraseFromParent();
+
+ LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
+ LIS->shrinkToUses(&AddrRegLI);
+
+ LIS->getInterval(DestReg); // Create new LI
+
+ DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
+ return Read2.getInstr();
+}
+
+MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned EltSize) {
+ MachineBasicBlock *MBB = I->getParent();
+
+ // Be sure to use .addOperand(), and not .addReg() with these. We want to be
+ // sure we preserve the subregister index and any register flags set on them.
+ const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+ const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
+ const MachineOperand *Data1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
+
+
+ unsigned Offset0
+ = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+ unsigned Offset1
+ = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
+
+ unsigned NewOffset0 = Offset0 / EltSize;
+ unsigned NewOffset1 = Offset1 / EltSize;
+ unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
+
+ // Prefer the st64 form if we can use it, even if we can fit the offset in the
+ // non st64 version. I'm not sure if there's any real reason to do this.
+ bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
+ if (UseST64) {
+ NewOffset0 /= 64;
+ NewOffset1 /= 64;
+ Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
+ }
+
+ assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
+ (NewOffset0 != NewOffset1) &&
+ "Computed offset doesn't fit");
+
+ const MCInstrDesc &Write2Desc = TII->get(Opc);
+ DebugLoc DL = I->getDebugLoc();
+
+ MachineInstrBuilder Write2
+ = BuildMI(*MBB, I, DL, Write2Desc)
+ .addImm(0) // gds
+ .addOperand(*Addr) // addr
+ .addOperand(*Data0) // data0
+ .addOperand(*Data1) // data1
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addMemOperand(*I->memoperands_begin())
+ .addMemOperand(*Paired->memoperands_begin());
+
+ // XXX - How do we express subregisters here?
+ unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
+
+ LIS->RemoveMachineInstrFromMaps(I);
+ LIS->RemoveMachineInstrFromMaps(Paired);
+ I->eraseFromParent();
+ Paired->eraseFromParent();
+
+ LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
+
+ DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
+ return Write2.getInstr();
+}
+
+// Scan through looking for adjacent LDS operations with constant offsets from
+// the same base register. We rely on the scheduler to do the hard work of
+// clustering nearby loads, and assume these are all adjacent.
+bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
+ MachineInstr &MI = *I;
+
+ // Don't combine if volatile.
+ if (MI.hasOrderedMemoryRef()) {
+ ++I;
+ continue;
+ }
+
+ unsigned Opc = MI.getOpcode();
+ if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
+ unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
+ MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+ if (Match != E) {
+ Modified = true;
+ I = mergeRead2Pair(I, Match, Size);
+ } else {
+ ++I;
+ }
+
+ continue;
+ } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
+ unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
+ MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+ if (Match != E) {
+ Modified = true;
+ I = mergeWrite2Pair(I, Match, Size);
+ } else {
+ ++I;
+ }
+
+ continue;
+ }
+
+ ++I;
+ }
+
+ return Modified;
+}
+
+bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo *STM = MF.getTarget().getSubtargetImpl();
+ TRI = static_cast<const SIRegisterInfo*>(STM->getRegisterInfo());
+ TII = static_cast<const SIInstrInfo*>(STM->getInstrInfo());
+ MRI = &MF.getRegInfo();
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
+
+ assert(!MRI->isSSA());
+
+ bool Modified = false;
+
+ for (MachineBasicBlock &MBB : MF)
+ Modified |= optimizeBlock(MBB);
+
+ return Modified;
+}
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 9f5ff29..9702565 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -49,8 +49,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -147,7 +149,7 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
- if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
+ if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->getShaderType() !=
ShaderType::PIXEL ||
!shouldSkip(&MBB, &MBB.getParent()->back()))
return;
@@ -298,11 +300,13 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
const MachineOperand &Op = MI.getOperand(0);
- // Kill is only allowed in pixel / geometry shaders
- assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
- ShaderType::PIXEL ||
- MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
- ShaderType::GEOMETRY);
+#ifndef NDEBUG
+ const SIMachineFunctionInfo *MFI
+ = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ // Kill is only allowed in pixel / geometry shaders.
+ assert(MFI->getShaderType() == ShaderType::PIXEL ||
+ MFI->getShaderType() == ShaderType::GEOMETRY);
+#endif
// Clear this thread from the exec mask if the operand is negative
if ((Op.isImm() || Op.isFPImm())) {
@@ -440,13 +444,15 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
- TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI =
+ static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
bool NeedM0 = false;
bool NeedWQM = false;
+ bool NeedFlat = false;
unsigned Depth = 0;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -463,6 +469,12 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
NeedWQM = true;
}
+ // Flat uses m0 in case it needs to access LDS.
+ if (TII->isFLAT(MI.getOpcode())) {
+ NeedM0 = true;
+ NeedFlat = true;
+ }
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
@@ -528,7 +540,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::V_INTERP_MOV_F32:
NeedWQM = true;
break;
-
}
}
}
@@ -540,11 +551,50 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
InitM0ForLDS(MBB.getFirstNonPHI());
}
- if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
+ if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
}
+ // FIXME: This seems inappropriate to do here.
+ if (NeedFlat && MFI->IsKernel) {
+ // Insert the prologue initializing the SGPRs pointing to the scratch space
+ // for flat accesses.
+ const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+
+ // TODO: What to use with function calls?
+
+ // FIXME: This is reporting stack size that is used in a scratch buffer
+ // rather than registers as well.
+ uint64_t StackSizeBytes = FrameInfo->getStackSize();
+
+ int IndirectBegin
+ = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF);
+ // Convert register index to 256-byte unit.
+ uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256);
+
+ assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&
+ "Stack limits should be smaller than 16-bits");
+
+ // Initialize the flat scratch register pair.
+ // TODO: Can we use one s_mov_b64 here?
+
+ // Offset is in units of 256-bytes.
+ MachineBasicBlock &MBB = MF.front();
+ DebugLoc NoDL;
+ MachineBasicBlock::iterator Start = MBB.getFirstNonPHI();
+ const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32);
+
+ assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes));
+
+ BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO)
+ .addImm(StackOffset);
+
+ // Documentation says size is "per-thread scratch size in bytes"
+ BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI)
+ .addImm(StackSizeBytes);
+ }
+
return true;
}
diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp
index 738c90b..65b892c 100644
--- a/lib/Target/R600/SILowerI1Copies.cpp
+++ b/lib/Target/R600/SILowerI1Copies.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "si-i1-copies"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -39,14 +40,14 @@ public:
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const override {
- return "SI Lower il Copies";
+ const char *getPassName() const override {
+ return "SI Lower i1 Copies";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineDominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -55,10 +56,10 @@ public:
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower il Copies", false, false)
+ "SI Lower i1 Copies", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
- "SI Lower il Copies", false, false)
+ "SI Lower i1 Copies", false, false)
char SILowerI1Copies::ID = 0;
@@ -70,9 +71,9 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- MF.getTarget().getInstrInfo());
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
std::vector<unsigned> I1Defs;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -102,6 +103,20 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (MI.getOpcode() == AMDGPU::V_XOR_I1) {
+ I1Defs.push_back(MI.getOperand(0).getReg());
+ MI.setDesc(TII->get(AMDGPU::V_XOR_B32_e32));
+ continue;
+ }
+
+ if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ unsigned Reg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ if (RC == &AMDGPU::VReg_1RegClass)
+ MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass);
+ continue;
+ }
+
if (MI.getOpcode() != AMDGPU::COPY ||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
@@ -120,21 +135,13 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
.addOperand(MI.getOperand(0))
.addImm(0)
.addImm(-1)
- .addOperand(MI.getOperand(1))
- .addImm(0)
- .addImm(0)
- .addImm(0)
- .addImm(0);
+ .addOperand(MI.getOperand(1));
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
.addOperand(MI.getOperand(0))
- .addImm(0)
.addOperand(MI.getOperand(1))
- .addImm(0)
- .addImm(0)
- .addImm(0)
.addImm(0);
MI.eraseFromParent();
}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index e2df950..d58f31d 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,8 +10,10 @@
#include "SIMachineFunctionInfo.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -26,71 +28,49 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
+ TIDReg(AMDGPU::NoRegister),
PSInputAddr(0),
- SpillTracker() { }
+ NumUserSGPRs(0),
+ LDSWaveSpillSize(0) { }
-static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
- unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
+ MachineFunction *MF,
+ unsigned FrameIndex,
+ unsigned SubIdx) {
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(
+ MF->getTarget().getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
+ Offset += SubIdx * 4;
- // We need to add this register as live out for the function, in order to
- // have the live range calculated directly.
- //
- // When register spilling begins, we have already calculated the live
- // live intervals for all the registers. Since we are spilling SGPRs to
- // VGPRs, we need to update the Lane VGPR's live interval every time we
- // spill or restore a register.
- //
- // Unfortunately, there is no good way to update the live interval as
- // the TargetInstrInfo callbacks for spilling and restoring don't give
- // us access to the live interval information.
- //
- // We are lucky, though, because the InlineSpiller calls
- // LiveRangeEdit::calculateRegClassAndHint() which iterates through
- // all the new register that have been created when restoring a register
- // and calls LiveIntervals::getInterval(), which creates and computes
- // the live interval for the newly created register. However, once this
- // live intervals is created, it doesn't change and since we usually reuse
- // the Lane VGPR multiple times, this means any uses after the first aren't
- // added to the live interval.
- //
- // To work around this, we add Lane VGPRs to the functions live out list,
- // so that we can guarantee its live range will cover all of its uses.
+ unsigned LaneVGPRIdx = Offset / (64 * 4);
+ unsigned Lane = (Offset / 4) % 64;
- for (MachineBasicBlock &MBB : *MF) {
- if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) {
- MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true));
- return VGPR;
- }
- }
+ struct SpilledReg Spill;
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Could not find S_ENDPGM instruction.");
+ if (!LaneVGPRs.count(LaneVGPRIdx)) {
+ unsigned LaneVGPR = TRI->findUnusedVGPR(MRI);
+ LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
+ MRI.setPhysRegUsed(LaneVGPR);
- return VGPR;
-}
-
-unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes(
- MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) {
- unsigned StartLane = CurrentLane;
- CurrentLane += NumRegs;
- if (!LaneVGPR) {
- LaneVGPR = createLaneVGPR(MRI, MF);
- } else {
- if (CurrentLane >= MAX_LANES) {
- StartLane = CurrentLane = 0;
- LaneVGPR = createLaneVGPR(MRI, MF);
+ // Add this register as live-in to all blocks to avoid machine verifer
+ // complaining about use of an undefined physical register.
+ for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
+ BI != BE; ++BI) {
+ BI->addLiveIn(LaneVGPR);
}
}
- return StartLane;
-}
-void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
- unsigned Reg,
- int Lane) {
- SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane);
+ Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
+ Spill.Lane = Lane;
+ return Spill;
}
-const SIMachineFunctionInfo::SpilledReg&
-SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) {
- return SpilledRegisters[FrameIndex];
+unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
+ const MachineFunction &MF) const {
+ const AMDGPUSubtarget &ST = MF.getTarget().getSubtarget<AMDGPUSubtarget>();
+ // FIXME: We should get this information from kernel attributes if it
+ // is available.
+ return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 96e619b..6bb8f9d 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
-#ifndef SIMACHINEFUNCTIONINFO_H_
-#define SIMACHINEFUNCTIONINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
#include "AMDGPUMachineFunction.h"
+#include "SIRegisterInfo.h"
#include <map>
namespace llvm {
@@ -26,6 +27,9 @@ class MachineRegisterInfo;
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
void anchor() override;
+
+ unsigned TIDReg;
+
public:
struct SpilledReg {
@@ -36,32 +40,23 @@ public:
bool hasLane() { return Lane != -1;}
};
- struct RegSpillTracker {
- private:
- unsigned CurrentLane;
- std::map<unsigned, SpilledReg> SpilledRegisters;
- public:
- unsigned LaneVGPR;
- RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
- /// \p NumRegs The number of consecutive registers what need to be spilled.
- /// This function will ensure that all registers are stored in
- /// the same VGPR.
- /// \returns The lane to be used for storing the first register.
- unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF,
- unsigned NumRegs = 1);
- void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
- const SpilledReg& getSpilledReg(unsigned FrameIndex);
- bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
- };
-
// SIMachineFunctionInfo definition
SIMachineFunctionInfo(const MachineFunction &MF);
+ SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
+ unsigned SubIdx);
unsigned PSInputAddr;
- struct RegSpillTracker SpillTracker;
+ unsigned NumUserSGPRs;
+ std::map<unsigned, unsigned> LaneVGPRs;
+ unsigned LDSWaveSpillSize;
+ bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
+ unsigned getTIDReg() const { return TIDReg; };
+ void setTIDReg(unsigned Reg) { TIDReg = Reg; }
+
+ unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
} // End namespace llvm
-#endif //_SIMACHINEFUNCTIONINFO_H_
+#endif
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index d0b677a..cffea12 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -16,6 +16,12 @@
#include "SIRegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
using namespace llvm;
@@ -26,9 +32,19 @@ SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::EXEC);
+
+ // EXEC_LO and EXEC_HI could be allocated and used as regular register,
+ // but this seems likely to result in bugs, so I'm marking them as reserved.
+ Reserved.set(AMDGPU::EXEC_LO);
+ Reserved.set(AMDGPU::EXEC_HI);
+
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
- TII->reserveIndirectRegisters(Reserved, MF);
+ Reserved.set(AMDGPU::FLAT_SCR);
+
+ // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
+ Reserved.set(AMDGPU::VGPR255);
+ Reserved.set(AMDGPU::VGPR254);
+
return Reserved;
}
@@ -37,6 +53,213 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return RC->getNumRegs();
}
+bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
+ return Fn.getFrameInfo()->hasStackObjects();
+}
+
+static unsigned getNumSubRegsForSpillOp(unsigned Op) {
+
+ switch (Op) {
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_V512_SAVE:
+ case AMDGPU::SI_SPILL_V512_RESTORE:
+ return 16;
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V256_RESTORE:
+ return 8;
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_V128_SAVE:
+ case AMDGPU::SI_SPILL_V128_RESTORE:
+ return 4;
+ case AMDGPU::SI_SPILL_V96_SAVE:
+ case AMDGPU::SI_SPILL_V96_RESTORE:
+ return 3;
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_V64_SAVE:
+ case AMDGPU::SI_SPILL_V64_RESTORE:
+ return 2;
+ case AMDGPU::SI_SPILL_S32_SAVE:
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ case AMDGPU::SI_SPILL_V32_RESTORE:
+ return 1;
+ default: llvm_unreachable("Invalid spill opcode");
+ }
+}
+
+void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineOperand &FIOp = MI->getOperand(FIOperandNum);
+ int Index = MI->getOperand(FIOperandNum).getIndex();
+
+ switch (MI->getOpcode()) {
+ // SGPR register spill
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S32_SAVE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ struct SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->getSpilledReg(MF, Index, i);
+
+ if (Spill.VGPR == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+ }
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
+ .addReg(SubReg)
+ .addImm(Spill.Lane);
+
+ }
+ MI->eraseFromParent();
+ break;
+ }
+
+ // SGPR register restore
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ bool isM0 = SubReg == AMDGPU::M0;
+ struct SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->getSpilledReg(MF, Index, i);
+
+ if (Spill.VGPR == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+ }
+
+ if (isM0) {
+ SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
+ }
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
+ if (isM0) {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(SubReg);
+ }
+ }
+ TII->insertNOPs(MI, 3);
+ MI->eraseFromParent();
+ break;
+ }
+
+ // VGPR register spill
+ case AMDGPU::SI_SPILL_V512_SAVE:
+ case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V128_SAVE:
+ case AMDGPU::SI_SPILL_V96_SAVE:
+ case AMDGPU::SI_SPILL_V64_SAVE:
+ case AMDGPU::SI_SPILL_V32_SAVE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ int64_t Offset = FrameInfo->getObjectOffset(Index);
+ unsigned Size = NumSubRegs * 4;
+ unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+
+ for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
+ unsigned SubReg = NumSubRegs > 1 ?
+ getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) :
+ SrcReg;
+ Offset += (i * 4);
+ MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize);
+
+ unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
+ Offset, Size);
+
+ if (AddrReg == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Ran out of VGPRs for spilling VGPRS");
+ AddrReg = AMDGPU::VGPR0;
+ }
+
+ // Store the value in LDS
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32))
+ .addImm(0) // gds
+ .addReg(AddrReg, RegState::Kill) // addr
+ .addReg(SubReg) // data0
+ .addImm(0); // offset
+ }
+
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::SI_SPILL_V32_RESTORE:
+ case AMDGPU::SI_SPILL_V64_RESTORE:
+ case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_V256_RESTORE:
+ case AMDGPU::SI_SPILL_V512_RESTORE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned DstReg = MI->getOperand(0).getReg();
+ int64_t Offset = FrameInfo->getObjectOffset(Index);
+ unsigned Size = NumSubRegs * 4;
+ unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+
+ // FIXME: We could use DS_READ_B64 here to optimize for larger registers.
+ for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
+ unsigned SubReg = NumSubRegs > 1 ?
+ getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) :
+ DstReg;
+
+ Offset += (i * 4);
+ unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
+ Offset, Size);
+ if (AddrReg == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Ran out of VGPRs for spilling VGPRs");
+ AddrReg = AMDGPU::VGPR0;
+ }
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg)
+ .addImm(0) // gds
+ .addReg(AddrReg, RegState::Kill) // addr
+ .addImm(0); //offset
+ }
+ MI->eraseFromParent();
+ break;
+ }
+
+ default: {
+ int64_t Offset = FrameInfo->getObjectOffset(Index);
+ FIOp.ChangeToImmediate(Offset);
+ if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) {
+ unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VReg_32RegClass, MI, SPAdj);
+ BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addImm(Offset);
+ FIOp.ChangeToRegister(TmpReg, false);
+ }
+ }
+ }
+}
+
const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
@@ -52,13 +275,17 @@ unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
- const TargetRegisterClass *BaseClasses[] = {
+ static const TargetRegisterClass *BaseClasses[] = {
&AMDGPU::VReg_32RegClass,
&AMDGPU::SReg_32RegClass,
&AMDGPU::VReg_64RegClass,
&AMDGPU::SReg_64RegClass,
+ &AMDGPU::VReg_96RegClass,
+ &AMDGPU::VReg_128RegClass,
&AMDGPU::SReg_128RegClass,
- &AMDGPU::SReg_256RegClass
+ &AMDGPU::VReg_256RegClass,
+ &AMDGPU::SReg_256RegClass,
+ &AMDGPU::VReg_512RegClass
};
for (const TargetRegisterClass *BaseClass : BaseClasses) {
@@ -69,13 +296,6 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
return nullptr;
}
-bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const {
- if (!RC) {
- return false;
- }
- return !hasVGPRs(RC);
-}
-
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) ||
getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
@@ -122,11 +342,53 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
const TargetRegisterClass *SubRC,
unsigned Channel) const {
+
+ switch (Reg) {
+ case AMDGPU::VCC:
+ switch(Channel) {
+ case 0: return AMDGPU::VCC_LO;
+ case 1: return AMDGPU::VCC_HI;
+ default: llvm_unreachable("Invalid SubIdx for VCC");
+ }
+
+ case AMDGPU::FLAT_SCR:
+ switch (Channel) {
+ case 0:
+ return AMDGPU::FLAT_SCR_LO;
+ case 1:
+ return AMDGPU::FLAT_SCR_HI;
+ default:
+ llvm_unreachable("Invalid SubIdx for FLAT_SCR");
+ }
+ break;
+
+ case AMDGPU::EXEC:
+ switch (Channel) {
+ case 0:
+ return AMDGPU::EXEC_LO;
+ case 1:
+ return AMDGPU::EXEC_HI;
+ default:
+ llvm_unreachable("Invalid SubIdx for EXEC");
+ }
+ break;
+ }
+
+ const TargetRegisterClass *RC = getPhysRegClass(Reg);
+ // 32-bit registers don't have sub-registers, so we can just return the
+ // Reg. We need to have this check here, because the calculation below
+ // using getHWRegIndex() will fail with special 32-bit registers like
+ // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.
+ if (RC->getSize() == 4) {
+ assert(Channel == 0);
+ return Reg;
+ }
+
unsigned Index = getHWRegIndex(Reg);
return SubRC->getRegister(Index + Channel);
}
-bool SIRegisterInfo::regClassCanUseImmediate(int RCID) const {
+bool SIRegisterInfo::regClassCanUseLiteralConstant(int RCID) const {
switch (RCID) {
default: return false;
case AMDGPU::SSrc_32RegClassID:
@@ -137,7 +399,68 @@ bool SIRegisterInfo::regClassCanUseImmediate(int RCID) const {
}
}
-bool SIRegisterInfo::regClassCanUseImmediate(
+bool SIRegisterInfo::regClassCanUseLiteralConstant(
const TargetRegisterClass *RC) const {
- return regClassCanUseImmediate(RC->getID());
+ return regClassCanUseLiteralConstant(RC->getID());
+}
+
+bool SIRegisterInfo::regClassCanUseInlineConstant(int RCID) const {
+ if (regClassCanUseLiteralConstant(RCID))
+ return true;
+
+ switch (RCID) {
+ default: return false;
+ case AMDGPU::VCSrc_32RegClassID:
+ case AMDGPU::VCSrc_64RegClassID:
+ return true;
+ }
+}
+
+bool SIRegisterInfo::regClassCanUseInlineConstant(
+ const TargetRegisterClass *RC) const {
+ return regClassCanUseInlineConstant(RC->getID());
}
+
+
+unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
+ enum PreloadedValue Value) const {
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ switch (Value) {
+ case SIRegisterInfo::TGID_X:
+ return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
+ case SIRegisterInfo::TGID_Y:
+ return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
+ case SIRegisterInfo::TGID_Z:
+ return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
+ case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
+ return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
+ case SIRegisterInfo::SCRATCH_PTR:
+ return AMDGPU::SGPR2_SGPR3;
+ case SIRegisterInfo::INPUT_PTR:
+ return AMDGPU::SGPR0_SGPR1;
+ case SIRegisterInfo::TIDIG_X:
+ return AMDGPU::VGPR0;
+ case SIRegisterInfo::TIDIG_Y:
+ return AMDGPU::VGPR1;
+ case SIRegisterInfo::TIDIG_Z:
+ return AMDGPU::VGPR2;
+ }
+ llvm_unreachable("unexpected preloaded value type");
+}
+
+/// \brief Returns a register that is not used at any point in the function.
+/// If all registers are used, then this function will return
+// AMDGPU::NoRegister.
+unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const {
+
+ const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (!MRI.isPhysRegUsed(*I))
+ return *I;
+ }
+ return AMDGPU::NoRegister;
+}
+
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index c9305fb..c7e54db 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
-#ifndef SIREGISTERINFO_H_
-#define SIREGISTERINFO_H_
+#ifndef LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
+#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
@@ -29,6 +29,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
+ bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const override;
+
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
@@ -40,7 +46,20 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
/// \returns true if this class contains only SGPR registers
- bool isSGPRClass(const TargetRegisterClass *RC) const;
+ bool isSGPRClass(const TargetRegisterClass *RC) const {
+ if (!RC)
+ return false;
+
+ return !hasVGPRs(RC);
+ }
+
+ /// \returns true if this class ID contains only SGPR registers
+ bool isSGPRClassID(unsigned RCID) const {
+ if (static_cast<int>(RCID) == -1)
+ return false;
+
+ return isSGPRClass(getRegClass(RCID));
+ }
/// \returns true if this class contains VGPR registers.
bool hasVGPRs(const TargetRegisterClass *RC) const;
@@ -62,14 +81,41 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
unsigned Channel) const;
/// \returns True if operands defined with this register class can accept
- /// inline immediates.
- bool regClassCanUseImmediate(int RCID) const;
+ /// a literal constant (i.e. any 32-bit immediate).
+ bool regClassCanUseLiteralConstant(int RCID) const;
+
+ /// \returns True if operands defined with this register class can accept
+ /// a literal constant (i.e. any 32-bit immediate).
+ bool regClassCanUseLiteralConstant(const TargetRegisterClass *RC) const;
+
+ /// \returns True if operands defined with this register class can accept
+ /// an inline constant. i.e. An integer value in the range (-16, 64) or
+ /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
+ bool regClassCanUseInlineConstant(int RCID) const;
/// \returns True if operands defined with this register class can accept
- /// inline immediates.
- bool regClassCanUseImmediate(const TargetRegisterClass *RC) const;
+ /// a literal constant. i.e. A value in the range (-16, 64).
+ bool regClassCanUseInlineConstant(const TargetRegisterClass *RC) const;
+
+ enum PreloadedValue {
+ TGID_X,
+ TGID_Y,
+ TGID_Z,
+ SCRATCH_WAVE_OFFSET,
+ SCRATCH_PTR,
+ INPUT_PTR,
+ TIDIG_X,
+ TIDIG_Y,
+ TIDIG_Z
+ };
+
+ /// \brief Returns the physical register that \p Value is stored in.
+ unsigned getPreloadedValue(const MachineFunction &MF,
+ enum PreloadedValue Value) const;
+
+ unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) const;
};
} // End namespace llvm
-#endif // SIREGISTERINFO_H_
+#endif
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 8974b63..45c2b41 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -27,10 +27,28 @@ def VCC : RegisterWithSubRegs<"VCC", [VCC_LO, VCC_HI]> {
let HWEncoding = 106;
}
-def EXEC : SIReg<"EXEC", 126>;
+def EXEC_LO : SIReg<"exec_lo", 126>;
+def EXEC_HI : SIReg<"exec_hi", 127>;
+
+def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = 126;
+}
+
def SCC : SIReg<"SCC", 253>;
def M0 : SIReg <"M0", 124>;
+def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
+def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
+
+// Pair to indicate location of scratch space for flat accesses.
+def FLAT_SCR : RegisterWithSubRegs <"FLAT_SCR", [FLAT_SCR_LO, FLAT_SCR_HI]> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sub0, sub1];
+ let HWEncoding = 104;
+}
+
// SGPR registers
foreach Index = 0-101 in {
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
@@ -152,20 +170,24 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
//===----------------------------------------------------------------------===//
// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
+ let CopyCost = -1; // Theoretically it is possible to read from SCC,
+ // but it should never be necessary.
+}
+
def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add SGPR_32, M0Reg, VCC_LO)
+ (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>;
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
- (add SGPR_64Regs, VCCReg, EXECReg)
+ (add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
>;
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
@@ -192,18 +214,30 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
//===----------------------------------------------------------------------===//
-// [SV]Src_(32|64) register classes, can have either an immediate or an register
+// SSrc_* Operands with an SGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
+//===----------------------------------------------------------------------===//
+// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
+//===----------------------------------------------------------------------===//
+
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
//===----------------------------------------------------------------------===//
+// VCSrc_* Operands with an SGPR, VGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+def VCSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
+
+def VCSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+
+//===----------------------------------------------------------------------===//
// SGPR and VGPR register classes
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp
new file mode 100644
index 0000000..45e83f5
--- /dev/null
+++ b/lib/Target/R600/SIShrinkInstructions.cpp
@@ -0,0 +1,271 @@
+//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// The pass tries to use the 32-bit encoding for instructions when possible.
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-shrink-instructions"
+
+STATISTIC(NumInstructionsShrunk,
+ "Number of 64-bit instruction reduced to 32-bit.");
+STATISTIC(NumLiteralConstantsFolded,
+ "Number of literal constants folded into 32-bit instructions.");
+
+namespace llvm {
+ void initializeSIShrinkInstructionsPass(PassRegistry&);
+}
+
+using namespace llvm;
+
+namespace {
+
+class SIShrinkInstructions : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIShrinkInstructions() : MachineFunctionPass(ID) {
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI Shrink Instructions";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+
+char SIShrinkInstructions::ID = 0;
+
+FunctionPass *llvm::createSIShrinkInstructionsPass() {
+ return new SIShrinkInstructions();
+}
+
+static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+ if (!MO->isReg())
+ return false;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
+ return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
+
+ return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
+}
+
+static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
+ const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+
+ const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ // Can't shrink instruction with three operands.
+ if (Src2)
+ return false;
+
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src1Mod =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+
+ if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
+ return false;
+
+ // We don't need to check src0, all input types are legal, so just make sure
+ // src0 isn't using any modifiers.
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
+ return false;
+
+ // Check output modifiers
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return false;
+
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ return false;
+
+ return true;
+}
+
+/// \brief This function checks \p MI for operands defined by a move immediate
+/// instruction and then folds the literal constant into the instruction if it
+/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
+/// and will only fold literal constants if we are still in SSA.
+static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
+ MachineRegisterInfo &MRI, bool TryToCommute = true) {
+
+ if (!MRI.isSSA())
+ return;
+
+ assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
+ TII->isVOPC(MI.getOpcode()));
+
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+
+ // Only one literal constant is allowed per instruction, so if src0 is a
+ // literal constant then we can't do any folding.
+ if ((Src0->isImm() || Src0->isFPImm()) && TII->isLiteralConstant(*Src0))
+ return;
+
+
+ // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
+ // SGPR, we cannot commute the instruction, so we can't fold any literal
+ // constants.
+ if (Src0->isReg() && !isVGPR(Src0, TRI, MRI))
+ return;
+
+ // Try to fold Src0
+ if (Src0->isReg()) {
+ unsigned Reg = Src0->getReg();
+ MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+ if (Def && Def->isMoveImmediate()) {
+ MachineOperand &MovSrc = Def->getOperand(1);
+ bool ConstantFolded = false;
+
+ if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
+ Src0->ChangeToImmediate(MovSrc.getImm());
+ ConstantFolded = true;
+ } else if (MovSrc.isFPImm()) {
+ const ConstantFP *CFP = MovSrc.getFPImm();
+ if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
+ Src0->ChangeToFPImmediate(CFP);
+ ConstantFolded = true;
+ }
+ }
+ if (ConstantFolded) {
+ if (MRI.use_empty(Reg))
+ Def->eraseFromParent();
+ ++NumLiteralConstantsFolded;
+ return;
+ }
+ }
+ }
+
+ // We have failed to fold src0, so commute the instruction and try again.
+ if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI))
+ foldImmediates(MI, TII, MRI, false);
+
+}
+
+bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ std::vector<unsigned> I1Defs;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
+ if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
+ const MachineOperand &Src = MI.getOperand(1);
+
+ // TODO: Handle FPImm?
+ if (Src.isImm()) {
+ if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src)) {
+ MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+ continue;
+ }
+ }
+ }
+
+ if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
+ continue;
+
+ if (!canShrink(MI, TII, TRI, MRI)) {
+ // Try commuting the instruction and see if that enables us to shrink
+ // it.
+ if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
+ !canShrink(MI, TII, TRI, MRI))
+ continue;
+ }
+
+ int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
+
+ // Op32 could be -1 here if we started with an instruction that had a
+ // a 32-bit encoding and then commuted it to an instruction that did not.
+ if (Op32 == -1)
+ continue;
+
+ if (TII->isVOPC(Op32)) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ // VOPC instructions can only write to the VCC register. We can't
+ // force them to use VCC here, because the register allocator has
+ // trouble with sequences like this, which cause the allocator to run
+ // out of registers if vreg0 and vreg1 belong to the VCCReg register
+ // class:
+ // vreg0 = VOPC;
+ // vreg1 = VOPC;
+ // S_AND_B64 vreg0, vreg1
+ //
+ // So, instead of forcing the instruction to write to VCC, we provide
+ // a hint to the register allocator to use VCC and then we we will run
+ // this pass again after RA and shrink it if it outputs to VCC.
+ MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
+ continue;
+ }
+ if (DstReg != AMDGPU::VCC)
+ continue;
+ }
+
+ // We can shrink this instruction
+ DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
+
+ MachineInstrBuilder Inst32 =
+ BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
+
+ // dst
+ Inst32.addOperand(MI.getOperand(0));
+
+ Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+
+ const MachineOperand *Src1 =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1)
+ Inst32.addOperand(*Src1);
+
+ ++NumInstructionsShrunk;
+ MI.eraseFromParent();
+
+ foldImmediates(*Inst32, TII, MRI);
+ DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
+
+
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index 367963a..9318dc1 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -87,7 +87,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
Value *BitCast = Builder.CreateBitCast(Ptr,
PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
LoadInst *Load = Builder.CreateLoad(BitCast);
- SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
I.getAllMetadataOtherThanDebugLoc(MD);
for (unsigned i = 0, e = MD.size(); i != e; ++i) {
Load->setMetadata(MD[i].first, MD[i].second);
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index a9aab86..0fa56e6 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -95,44 +95,6 @@ something that reassoc doesn't think about yet.
//===---------------------------------------------------------------------===//
-This function: (derived from GCC PR19988)
-double foo(double x, double y) {
- return ((x + 0.1234 * y) * (x + -0.1234 * y));
-}
-
-compiles to:
-_foo:
- movapd %xmm1, %xmm2
- mulsd LCPI1_1(%rip), %xmm1
- mulsd LCPI1_0(%rip), %xmm2
- addsd %xmm0, %xmm1
- addsd %xmm0, %xmm2
- movapd %xmm1, %xmm0
- mulsd %xmm2, %xmm0
- ret
-
-Reassociate should be able to turn it into:
-
-double foo(double x, double y) {
- return ((x + 0.1234 * y) * (x - 0.1234 * y));
-}
-
-Which allows the multiply by constant to be CSE'd, producing:
-
-_foo:
- mulsd LCPI1_0(%rip), %xmm1
- movapd %xmm1, %xmm2
- addsd %xmm0, %xmm2
- subsd %xmm1, %xmm0
- mulsd %xmm2, %xmm0
- ret
-
-This doesn't need -ffast-math support at all. This is particularly bad because
-the llvm-gcc frontend is canonicalizing the later into the former, but clang
-doesn't have this problem.
-
-//===---------------------------------------------------------------------===//
-
These two functions should generate the same code on big-endian systems:
int g(int *j,int *l) { return memcmp(j,l,4); }
@@ -771,7 +733,7 @@ f (unsigned long a, unsigned long b, unsigned long c)
return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0);
}
Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with
-"clang -emit-llvm-bc | opt -std-compile-opts".
+"clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -784,7 +746,7 @@ void clear_pmd_range(unsigned long start, unsigned long end)
}
The expression should optimize to something like
"!((start|end)&~PMD_MASK). Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -803,7 +765,7 @@ int f(int x, int y)
return (abs(x)) >= 0;
}
This should optimize to x == INT_MIN. (With -fwrapv.) Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -841,117 +803,117 @@ rshift_gt (unsigned int a)
All should simplify to a single comparison. All of these are
currently not optimized with "clang -emit-llvm-bc | opt
--std-compile-opts".
+-O3".
//===---------------------------------------------------------------------===//
From GCC Bug 32605:
int c(int* x) {return (char*)x+2 == (char*)x;}
Should combine to 0. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it).
+-emit-llvm-bc | opt -O3" (although llc can optimize it).
//===---------------------------------------------------------------------===//
int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
Should be combined to "((b >> 1) | b) & 1". Currently not optimized
-with "clang -emit-llvm-bc | opt -std-compile-opts".
+with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);}
Should combine to "x | (y & 3)". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
Should fold to "(~a & c) | (a & b)". Currently not optimized with
-"clang -emit-llvm-bc | opt -std-compile-opts".
+"clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a,int b) {return (~(a|b))|a;}
Should fold to "a|~b". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b) {return (a&&b) || (a&&!b);}
Should fold to "a". Currently not optimized with "clang -emit-llvm-bc
-| opt -std-compile-opts".
+| opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b, int c) {return (a&&b) || (!a&&c);}
Should fold to "a ? b : c", or at least something sane. Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);}
Should fold to a && (b || c). Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int x) {return x | ((x & 8) ^ 8);}
Should combine to x | 8. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int x) {return x ^ ((x & 8) ^ 8);}
Should also combine to x | 8. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int a(int x) {return ((x | -9) ^ 8) & x;}
Should combine to x & -9. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;}
Should combine to "a * 0x88888888 >> 31". Currently not optimized
-with "clang -emit-llvm-bc | opt -std-compile-opts".
+with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(char* x) {if ((*x & 32) == 0) return b();}
There's an unnecessary zext in the generated code with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned a(unsigned long long x) {return 40 * (x >> 1);}
Should combine to "20 * (((unsigned)x) & -2)". Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int g(int x) { return (x - 10) < 0; }
Should combine to "x <= 9" (the sub has nsw). Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int g(int x) { return (x + 10) < 0; }
Should combine to "x < -10" (the add has nsw). Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
int f(int i, int j) { return i < j + 1; }
int g(int i, int j) { return j > i - 1; }
Should combine to "i <= j" (the add/sub has nsw). Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
unsigned f(unsigned x) { return ((x & 7) + 1) & 15; }
The & 15 part should be optimized away, it doesn't change the result. Currently
-not optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+not optimized with "clang -emit-llvm-bc | opt -O3".
//===---------------------------------------------------------------------===//
@@ -1163,7 +1125,7 @@ There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
GCC testsuite, ones we don't get yet are (checked through loadpre25):
[CRIT EDGE BREAKING]
-loadpre3.c predcom-4.c
+predcom-4.c
[PRE OF READONLY CALL]
loadpre5.c
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 9df0054..d0b362c 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -48,7 +48,7 @@ class SparcAsmParser : public MCTargetAsmParser {
// public interface of the MCTargetAsmParser.
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -386,7 +386,7 @@ public:
bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
@@ -408,7 +408,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -444,7 +444,7 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc)
return Error(StartLoc, "invalid register name");
}
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
unsigned VariantID);
bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index cebda92..c486411 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -2,9 +2,8 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td)
tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM SparcGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM SparcGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM SparcGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel)
@@ -24,8 +23,6 @@ add_llvm_target(SparcCodeGen
SparcSubtarget.cpp
SparcTargetMachine.cpp
SparcSelectionDAGInfo.cpp
- SparcJITInfo.cpp
- SparcCodeEmitter.cpp
SparcMCInstLower.cpp
SparcTargetObjectFile.cpp
)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index f3441ff..28369fd 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -110,7 +110,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
MachineBasicBlock::iterator MI = I;
@@ -187,7 +187,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (J->getOpcode() == SP::RESTORErr
|| J->getOpcode() == SP::RESTOREri) {
// change retl to ret.
- slot->setDesc(TM.getInstrInfo()->get(SP::RET));
+ slot->setDesc(TM.getSubtargetImpl()->getInstrInfo()->get(SP::RET));
return J;
}
}
@@ -329,7 +329,8 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
{
// Check Reg and all aliased Registers.
- for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
+ for (MCRegAliasIterator AI(Reg, TM.getSubtargetImpl()->getRegisterInfo(),
+ true);
AI.isValid(); ++AI)
if (RegSet.count(*AI))
return true;
@@ -482,7 +483,7 @@ bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB,
if (PrevInst->isBundledWithSucc())
return false;
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
switch (PrevInst->getOpcode()) {
default: break;
diff --git a/lib/Target/Sparc/Disassembler/LLVMBuild.txt b/lib/Target/Sparc/Disassembler/LLVMBuild.txt
index c27398f..bd5397d 100644
--- a/lib/Target/Sparc/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Sparc/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SparcDisassembler
parent = Sparc
-required_libraries = MC SparcInfo Support
+required_libraries = MCDisassembler SparcInfo Support
add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 4df0990..8bc4ca9 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -16,7 +16,6 @@
#include "SparcSubtarget.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -27,23 +26,17 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
-/// SparcDisassembler - a disassembler class for Sparc.
+/// A disassembler class for Sparc.
class SparcDisassembler : public MCDisassembler {
public:
- /// Constructor - Initializes the disassembler.
- ///
- SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
- MCDisassembler(STI, Ctx)
- {}
+ SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
virtual ~SparcDisassembler() {}
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
}
@@ -213,47 +206,37 @@ static DecodeStatus DecodeSWAP(MCInst &Inst, unsigned insn, uint64_t Address,
#include "SparcGenDisassemblerTables.inc"
-/// readInstruction - read four bytes from the MemoryObject
-/// and return 32 bit word.
-static DecodeStatus readInstruction32(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint32_t &insn) {
- uint8_t Bytes[4];
-
+/// Read four bytes from the ArrayRef and return 32 bit word.
+static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint32_t &Insn) {
// We want to read exactly 4 Bytes of data.
- if (region.readBytes(address, 4, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 4) {
+ Size = 0;
return MCDisassembler::Fail;
}
// Encoded as a big-endian 32-bit word in the stream.
- insn = (Bytes[3] << 0) |
- (Bytes[2] << 8) |
- (Bytes[1] << 16) |
- (Bytes[0] << 24);
+ Insn =
+ (Bytes[3] << 0) | (Bytes[2] << 8) | (Bytes[1] << 16) | (Bytes[0] << 24);
return MCDisassembler::Success;
}
-
-DecodeStatus
-SparcDisassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const {
uint32_t Insn;
- DecodeStatus Result = readInstruction32(Region, Address, Size, Insn);
+ DecodeStatus Result = readInstruction32(Bytes, Address, Size, Insn);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableSparc32, instr, Insn, Address,
- this, STI);
+ Result =
+ decodeInstruction(DecoderTableSparc32, Instr, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 8fe4075..c96d5ad 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SparcINSTPRINTER_H
-#define SparcINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
+#define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index d42bcee..8d79396 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SPARC_FIXUPKINDS_H
-#define LLVM_SPARC_FIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCFIXUPKINDS_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 6875fc6..3a9c987 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -35,7 +35,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
Data64bitsDirective = (isV9) ? "\t.xword\t" : nullptr;
ZeroDirective = "\t.skip\t";
CommentString = "!";
- HasLEB128 = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
@@ -43,7 +42,8 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
- if (TheTriple.getOS() == llvm::Triple::Solaris)
+ if (TheTriple.getOS() == llvm::Triple::Solaris ||
+ TheTriple.getOS() == llvm::Triple::OpenBSD)
UseIntegratedAssembler = true;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index e126b68..84de551 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCTARGETASMINFO_H
-#define SPARCTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCASMINFO_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index 7f01ab0..d97e3a2 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -161,8 +161,9 @@ Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
bool
SparcMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return getSubExpr()->EvaluateAsRelocatable(Res, Layout);
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ return getSubExpr()->EvaluateAsRelocatable(Res, Layout, Fixup);
}
static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index f0d0ef3..f72c6c4 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SPARCMCEXPR_H
-#define LLVM_SPARCMCEXPR_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCEXPR_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCEXPR_H
#include "SparcFixupKinds.h"
#include "llvm/MC/MCExpr.h"
@@ -87,7 +87,8 @@ public:
/// @}
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override;
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 571017d..3cc4314 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -125,10 +125,8 @@ static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack) {
- MCStreamer *S =
- createELFStreamer(Context, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ const MCSubtargetInfo &STI, bool RelaxAll) {
+ MCStreamer *S = createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
new SparcTargetELFStreamer(*S);
return S;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index c8029a8..c31943d 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCMCTARGETDESC_H
-#define SPARCMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCTARGETDESC_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index bcc0291..c2a95b4 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \
SparcGenAsmWriter.inc SparcGenAsmMatcher.inc \
SparcGenDAGISel.inc SparcGenDisassemblerTables.inc \
SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \
- SparcGenCodeEmitter.inc SparcGenMCCodeEmitter.inc
+ SparcGenMCCodeEmitter.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
index 34e68cf..647c276 100644
--- a/lib/Target/Sparc/README.txt
+++ b/lib/Target/Sparc/README.txt
@@ -56,6 +56,4 @@ int %t1(int %a, int %b) {
leaf fns.
* Fill delay slots
-* Implement JIT support
-
* Use %g0 directly to materialize 0. No instruction is required.
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index de20aaa..96378d5 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_SPARC_H
-#define TARGET_SPARC_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARC_H
+#define LLVM_LIB_TARGET_SPARC_SPARC_H
#include "MCTargetDesc/SparcMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
@@ -29,8 +29,6 @@ namespace llvm {
FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
- FunctionPass *createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
- JITCodeEmitter &JCE);
void LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
MCInst &OutMI,
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 1b7330e..6432003 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -296,7 +296,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand (opNum);
SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags();
@@ -450,7 +450,8 @@ void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataSection());
- unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+ unsigned PtrSize =
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSize(0);
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize);
diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp
deleted file mode 100644
index 247da2a..0000000
--- a/lib/Target/Sparc/SparcCodeEmitter.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-//===-- Sparc/SparcCodeEmitter.cpp - Convert Sparc Code to Machine Code ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===---------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the Sparc machine instructions
-// into relocatable machine code.
-//
-//===---------------------------------------------------------------------===//
-
-#include "Sparc.h"
-#include "MCTargetDesc/SparcMCExpr.h"
-#include "SparcRelocations.h"
-#include "SparcTargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
-
-class SparcCodeEmitter : public MachineFunctionPass {
- SparcJITInfo *JTI;
- const SparcInstrInfo *II;
- const DataLayout *TD;
- const SparcSubtarget *Subtarget;
- TargetMachine &TM;
- JITCodeEmitter &MCE;
- const std::vector<MachineConstantPoolEntry> *MCPEs;
- bool IsPIC;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo> ();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- static char ID;
-
-public:
- SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr),
- TM(tm), MCE(mce), MCPEs(nullptr),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "Sparc Machine Code Emitter";
- }
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-
- void emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB);
-
-private:
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- unsigned getCallTargetOpValue(const MachineInstr &MI,
- unsigned) const;
- unsigned getBranchTargetOpValue(const MachineInstr &MI,
- unsigned) const;
- unsigned getBranchPredTargetOpValue(const MachineInstr &MI,
- unsigned) const;
- unsigned getBranchOnRegTargetOpValue(const MachineInstr &MI,
- unsigned) const;
-
- void emitWord(unsigned Word);
-
- unsigned getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const;
-
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
-};
-} // end anonymous namespace.
-
-char SparcCodeEmitter::ID = 0;
-
-bool SparcCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- SparcTargetMachine &Target = static_cast<SparcTargetMachine &>(
- const_cast<TargetMachine &>(MF.getTarget()));
-
- JTI = Target.getJITInfo();
- II = Target.getInstrInfo();
- TD = Target.getDataLayout();
- Subtarget = &TM.getSubtarget<SparcSubtarget> ();
- MCPEs = &MF.getConstantPool()->getConstants();
- JTI->Initialize(MF, IsPIC);
- MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
-
- do {
- DEBUG(errs() << "JITTing function '"
- << MF.getName() << "'\n");
- MCE.startFunction(MF);
-
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB){
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
- E = MBB->instr_end(); I != E;)
- emitInstruction(*I++, *MBB);
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-void SparcCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
- MachineBasicBlock &MBB) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
-
- MCE.processDebugLoc(MI->getDebugLoc(), true);
-
- ++NumEmitted;
-
- switch (MI->getOpcode()) {
- default: {
- emitWord(getBinaryCodeForInstr(*MI));
- break;
- }
- case TargetOpcode::INLINEASM: {
- // We allow inline assembler nodes with empty bodies - they can
- // implicitly define registers, which is ok for JIT.
- if (MI->getOperand(0).getSymbolName()[0]) {
- report_fatal_error("JIT does not support inline asm!");
- }
- break;
- }
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::EH_LABEL: {
- MCE.emitLabel(MI->getOperand(0).getMCSymbol());
- break;
- }
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL: {
- // Do nothing.
- break;
- }
- case SP::GETPCX: {
- report_fatal_error("JIT does not support pseudo instruction GETPCX yet!");
- break;
- }
- }
-
- MCE.processDebugLoc(MI->getDebugLoc(), false);
-}
-
-void SparcCodeEmitter::emitWord(unsigned Word) {
- DEBUG(errs() << " 0x";
- errs().write_hex(Word) << "\n");
- MCE.emitWordBE(Word);
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned SparcCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const {
- if (MO.isReg())
- return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
- else if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
- else if (MO.isGlobal())
- emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO));
- else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
- else if (MO.isMBB())
- emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
- else
- llvm_unreachable("Unable to encode MachineOperand!");
- return 0;
-}
-unsigned SparcCodeEmitter::getCallTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getBranchPredTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getBranchOnRegTargetOpValue(const MachineInstr &MI,
- unsigned opIdx) const {
- const MachineOperand MO = MI.getOperand(opIdx);
- return getMachineOpValue(MI, MO);
-}
-
-unsigned SparcCodeEmitter::getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const {
-
- unsigned TF = MO.getTargetFlags();
- switch (TF) {
- default:
- case SparcMCExpr::VK_Sparc_None: break;
- case SparcMCExpr::VK_Sparc_LO: return SP::reloc_sparc_lo;
- case SparcMCExpr::VK_Sparc_HI: return SP::reloc_sparc_hi;
- case SparcMCExpr::VK_Sparc_H44: return SP::reloc_sparc_h44;
- case SparcMCExpr::VK_Sparc_M44: return SP::reloc_sparc_m44;
- case SparcMCExpr::VK_Sparc_L44: return SP::reloc_sparc_l44;
- case SparcMCExpr::VK_Sparc_HH: return SP::reloc_sparc_hh;
- case SparcMCExpr::VK_Sparc_HM: return SP::reloc_sparc_hm;
- }
-
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
- default: break;
- case SP::CALL: return SP::reloc_sparc_pc30;
- case SP::BA:
- case SP::BCOND:
- case SP::FBCOND: return SP::reloc_sparc_pc22;
- case SP::BPXCC: return SP::reloc_sparc_pc19;
- }
- llvm_unreachable("unknown reloc!");
-}
-
-void SparcCodeEmitter::emitGlobalAddress(const GlobalValue *GV,
- unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), 0,
- true));
-}
-
-void SparcCodeEmitter::
-emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES, 0, 0));
-}
-
-void SparcCodeEmitter::
-emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, 0, false));
-}
-
-void SparcCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc) const {
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Reloc, BB));
-}
-
-
-/// createSparcJITCodeEmitterPass - Return a pass that emits the collected Sparc
-/// code to the specified MCE object.
-FunctionPass *llvm::createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new SparcCodeEmitter(TM, JCE);
-}
-
-#include "SparcGenCodeEmitter.inc"
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 3cdfda3..1b67b4b 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -46,7 +46,7 @@ void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (NumBytes >= -4096 && NumBytes < 4096) {
BuildMI(MBB, MBBI, dl, TII.get(ADDri), SP::O6)
@@ -88,7 +88,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -153,7 +153,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
assert(MBBI->getOpcode() == SP::RETL &&
"Can only put epilog before 'retl' instruction!");
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index a7d1b89..9e53994 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARC_FRAMEINFO_H
-#define SPARC_FRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H
+#define LLVM_LIB_TARGET_SPARC_SPARCFRAMELOWERING_H
#include "Sparc.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 2fade27..b3b029e 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -66,16 +66,15 @@ private:
} // end anonymous namespace
SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
- unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg,
- getTargetLowering()->getPointerTy()).getNode();
+ unsigned GlobalBaseReg =
+ TM.getSubtargetImpl()->getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
}
bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
SDValue &Base, SDValue &Offset) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
- getTargetLowering()->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -90,8 +89,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
// Constant offset from frame ref.
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
- getTargetLowering()->getPointerTy());
+ Base =
+ CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
} else {
Base = Addr.getOperand(0);
}
@@ -135,7 +134,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
}
R1 = Addr;
- R2 = CurDAG->getRegister(SP::G0, getTargetLowering()->getPointerTy());
+ R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy());
return true;
}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 990f52a..e6a69d2 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -190,8 +190,8 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
@@ -250,8 +250,8 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc64);
@@ -349,8 +349,8 @@ LowerFormalArguments_32(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
const unsigned StackOffset = 92;
@@ -474,7 +474,7 @@ LowerFormalArguments_32(SDValue Chain,
DAG.getConstant(Offset, MVT::i32));
Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
MachinePointerInfo(),
- VA.getValVT(), false, false,0);
+ VA.getValVT(), false, false, false,0);
Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
}
InVals.push_back(Load);
@@ -549,8 +549,8 @@ LowerFormalArguments_64(SDValue Chain,
// Analyze arguments according to CC_Sparc64.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
// The argument array begins at %fp+BIAS+128, after the register save area.
@@ -698,8 +698,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
// Get the size of the outgoing arguments stack space requirement.
@@ -915,7 +915,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI =
- ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
+ getTargetMachine().getSubtarget<SparcSubtarget>().getRegisterInfo();
const uint32_t *Mask = ((hasReturnsTwice)
? TRI->getRTCallPreservedMask(CallConv)
: TRI->getCallPreservedMask(CallConv));
@@ -934,8 +934,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
@@ -1061,8 +1061,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
// Get the size of the outgoing arguments stack space requirement.
@@ -1228,10 +1228,10 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI =
- ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
- const uint32_t *Mask = ((hasReturnsTwice)
- ? TRI->getRTCallPreservedMask(CLI.CallConv)
- : TRI->getCallPreservedMask(CLI.CallConv));
+ getTargetMachine().getSubtarget<SparcSubtarget>().getRegisterInfo();
+ const uint32_t *Mask =
+ ((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
+ : TRI->getCallPreservedMask(CLI.CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1255,8 +1255,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Set inreg flag manually for codegen generated library calls that
// return float.
@@ -1366,7 +1366,7 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
}
SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, new SparcELFTargetObjectFile()) {
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<SparcSubtarget>();
// Set up the register classes.
@@ -1905,7 +1905,9 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
Ops.push_back(Symbol);
Ops.push_back(DAG.getRegister(SP::O0, PtrVT));
const uint32_t *Mask = getTargetMachine()
- .getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getCallPreservedMask(CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
Ops.push_back(InFlag);
@@ -2754,9 +2756,10 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
ISD::SETNE);
}
// MulResult is a node with an illegal type. Because such things are not
- // generally permitted during this phase of legalization, delete the
- // node. The above EXTRACT_ELEMENT nodes should have been folded.
- DAG.DeleteNode(MulResult.getNode());
+ // generally permitted during this phase of legalization, ensure that
+ // nothing is left using the node. The above EXTRACT_ELEMENT nodes should have
+ // been folded.
+ assert(MulResult->use_empty() && "Illegally typed node still in use!");
SDValue Ops[2] = { BottomHalf, TopHalf } ;
return DAG.getMergeValues(Ops, dl);
@@ -2900,7 +2903,8 @@ MachineBasicBlock*
SparcTargetLowering::expandSelectCC(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned BROpcode) const {
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
@@ -2961,7 +2965,8 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode,
unsigned CondCode) const {
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index a24cc82..a62d569 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARC_ISELLOWERING_H
-#define SPARC_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
+#define LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
#include "Sparc.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 3a1472e..fe93ed7 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCINSTRUCTIONINFO_H
-#define SPARCINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCINSTRINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCINSTRINFO_H
#include "SparcRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 960261c..c320239 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -331,7 +331,7 @@ let hasSideEffects = 1, mayStore = 1 in {
[(flushw)]>;
}
-let isBarrier = 1, isTerminator = 1, rd = 0b1000, rs1 = 0, simm13 = 5 in
+let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in
def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>;
let rd = 0 in
diff --git a/lib/Target/Sparc/SparcInstrVIS.td b/lib/Target/Sparc/SparcInstrVIS.td
index 3e2b49d..d9adf3e 100644
--- a/lib/Target/Sparc/SparcInstrVIS.td
+++ b/lib/Target/Sparc/SparcInstrVIS.td
@@ -71,13 +71,13 @@ def FPACKFIX : VISInst2<0b000111101, "fpackfix">;
def FEXPAND : VISInst2<0b001001101, "fexpand">;
def FPMERGE : VISInst <0b001001011, "fpmerge">;
-def FMUL8X16 : VISInst<0b00110001, "fmul8x16">;
-def FMUL8X16AU : VISInst<0b00110011, "fmul8x16au">;
-def FMUL8X16AL : VISInst<0b00110101, "fmul8x16al">;
-def FMUL8SUX16 : VISInst<0b00110110, "fmul8sux16">;
-def FMUL8ULX16 : VISInst<0b00110111, "fmul8ulx16">;
-def FMULD8SUX16 : VISInst<0b00111000, "fmuld8sux16">;
-def FMULD8ULX16 : VISInst<0b00111001, "fmuld8ulx16">;
+def FMUL8X16 : VISInst<0b000110001, "fmul8x16">;
+def FMUL8X16AU : VISInst<0b000110011, "fmul8x16au">;
+def FMUL8X16AL : VISInst<0b000110101, "fmul8x16al">;
+def FMUL8SUX16 : VISInst<0b000110110, "fmul8sux16">;
+def FMUL8ULX16 : VISInst<0b000110111, "fmul8ulx16">;
+def FMULD8SUX16 : VISInst<0b000111000, "fmuld8sux16">;
+def FMULD8ULX16 : VISInst<0b000111001, "fmuld8ulx16">;
def ALIGNADDR : VISInst<0b000011000, "alignaddr", I64Regs>;
def ALIGNADDRL : VISInst<0b000011010, "alignaddrl", I64Regs>;
@@ -134,7 +134,7 @@ def EDGE16L : VISInst<0b000000110, "edge16l", I64Regs>;
def EDGE32 : VISInst<0b000001000, "edge32", I64Regs>;
def EDGE32L : VISInst<0b000001010, "edge32l", I64Regs>;
-def PDIST : VISInst<0b00111110, "pdist">;
+def PDIST : VISInst<0b000111110, "pdist">;
def ARRAY8 : VISInst<0b000010000, "array8", I64Regs>;
def ARRAY16 : VISInst<0b000010010, "array16", I64Regs>;
@@ -181,7 +181,7 @@ def CMASK32 : VISInstFormat<0b000011111, (outs), (ins I64Regs:$rs2),
}
-def FCHKSM16 : VISInst<0b01000100, "fchksm16">;
+def FCHKSM16 : VISInst<0b001000100, "fchksm16">;
def FHADDS : F3_3<0b10, 0b110100, 0b001100001,
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
@@ -229,14 +229,14 @@ def FNSMULD : F3_3<0b10, 0b110100, 0b001111001,
def FPADD64 : VISInst<0b001000010, "fpadd64">;
-def FSLL16 : VISInst<0b00100001, "fsll16">;
-def FSRL16 : VISInst<0b00100011, "fsrl16">;
-def FSLL32 : VISInst<0b00100101, "fsll32">;
-def FSRL32 : VISInst<0b00100111, "fsrl32">;
-def FSLAS16 : VISInst<0b00101001, "fslas16">;
-def FSRA16 : VISInst<0b00101011, "fsra16">;
-def FSLAS32 : VISInst<0b00101101, "fslas32">;
-def FSRA32 : VISInst<0b00101111, "fsra32">;
+def FSLL16 : VISInst<0b000100001, "fsll16">;
+def FSRL16 : VISInst<0b000100011, "fsrl16">;
+def FSLL32 : VISInst<0b000100101, "fsll32">;
+def FSRL32 : VISInst<0b000100111, "fsrl32">;
+def FSLAS16 : VISInst<0b000101001, "fslas16">;
+def FSRA16 : VISInst<0b000101011, "fsra16">;
+def FSLAS32 : VISInst<0b000101101, "fslas32">;
+def FSRA32 : VISInst<0b000101111, "fsra32">;
let rs1 = 0 in
def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
deleted file mode 100644
index d0eec98..0000000
--- a/lib/Target/Sparc/SparcJITInfo.cpp
+++ /dev/null
@@ -1,326 +0,0 @@
-//===-- SparcJITInfo.cpp - Implement the Sparc JIT Interface --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the Sparc target.
-//
-//===----------------------------------------------------------------------===//
-#include "SparcJITInfo.h"
-#include "Sparc.h"
-#include "SparcRelocations.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Support/Memory.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-extern "C" void SparcCompilationCallback();
-
-extern "C" {
-#if defined (__sparc__)
-
-#if defined(__arch64__)
-#define FRAME_PTR(X) #X "+2047"
-#else
-#define FRAME_PTR(X) #X
-#endif
-
- asm(
- ".text\n"
- "\t.align 4\n"
- "\t.global SparcCompilationCallback\n"
- "\t.type SparcCompilationCallback, #function\n"
- "SparcCompilationCallback:\n"
- // Save current register window and create stack.
- // 128 (save area) + 6*8 (for arguments) + 16*8 (for float regfile) = 304
- "\tsave %sp, -304, %sp\n"
- // save float regfile to the stack.
- "\tstd %f0, [" FRAME_PTR(%fp) "-0]\n"
- "\tstd %f2, [" FRAME_PTR(%fp) "-8]\n"
- "\tstd %f4, [" FRAME_PTR(%fp) "-16]\n"
- "\tstd %f6, [" FRAME_PTR(%fp) "-24]\n"
- "\tstd %f8, [" FRAME_PTR(%fp) "-32]\n"
- "\tstd %f10, [" FRAME_PTR(%fp) "-40]\n"
- "\tstd %f12, [" FRAME_PTR(%fp) "-48]\n"
- "\tstd %f14, [" FRAME_PTR(%fp) "-56]\n"
- "\tstd %f16, [" FRAME_PTR(%fp) "-64]\n"
- "\tstd %f18, [" FRAME_PTR(%fp) "-72]\n"
- "\tstd %f20, [" FRAME_PTR(%fp) "-80]\n"
- "\tstd %f22, [" FRAME_PTR(%fp) "-88]\n"
- "\tstd %f24, [" FRAME_PTR(%fp) "-96]\n"
- "\tstd %f26, [" FRAME_PTR(%fp) "-104]\n"
- "\tstd %f28, [" FRAME_PTR(%fp) "-112]\n"
- "\tstd %f30, [" FRAME_PTR(%fp) "-120]\n"
- // stubaddr is in %g1.
- "\tcall SparcCompilationCallbackC\n"
- "\t mov %g1, %o0\n"
- // restore float regfile from the stack.
- "\tldd [" FRAME_PTR(%fp) "-0], %f0\n"
- "\tldd [" FRAME_PTR(%fp) "-8], %f2\n"
- "\tldd [" FRAME_PTR(%fp) "-16], %f4\n"
- "\tldd [" FRAME_PTR(%fp) "-24], %f6\n"
- "\tldd [" FRAME_PTR(%fp) "-32], %f8\n"
- "\tldd [" FRAME_PTR(%fp) "-40], %f10\n"
- "\tldd [" FRAME_PTR(%fp) "-48], %f12\n"
- "\tldd [" FRAME_PTR(%fp) "-56], %f14\n"
- "\tldd [" FRAME_PTR(%fp) "-64], %f16\n"
- "\tldd [" FRAME_PTR(%fp) "-72], %f18\n"
- "\tldd [" FRAME_PTR(%fp) "-80], %f20\n"
- "\tldd [" FRAME_PTR(%fp) "-88], %f22\n"
- "\tldd [" FRAME_PTR(%fp) "-96], %f24\n"
- "\tldd [" FRAME_PTR(%fp) "-104], %f26\n"
- "\tldd [" FRAME_PTR(%fp) "-112], %f28\n"
- "\tldd [" FRAME_PTR(%fp) "-120], %f30\n"
- // restore original register window and
- // copy %o0 to %g1
- "\trestore %o0, 0, %g1\n"
- // call the new stub
- "\tjmp %g1\n"
- "\t nop\n"
- "\t.size SparcCompilationCallback, .-SparcCompilationCallback"
- );
-#else
- void SparcCompilationCallback() {
- llvm_unreachable(
- "Cannot call SparcCompilationCallback() on a non-sparc arch!");
- }
-#endif
-}
-
-
-#define SETHI_INST(imm, rd) (0x01000000 | ((rd) << 25) | ((imm) & 0x3FFFFF))
-#define JMP_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x38 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define NOP_INST SETHI_INST(0, 0)
-#define OR_INST_I(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define OR_INST_R(rs1, rs2, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \
- | ((rs1) << 14) | (0 << 13) | ((rs2) & 0x1F))
-#define RDPC_INST(rd) (0x80000000 | ((rd) << 25) | (0x28 << 19) \
- | (5 << 14))
-#define LDX_INST(rs1, imm, rd) (0xC0000000 | ((rd) << 25) | (0x0B << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define SLLX_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x25 << 19) \
- | ((rs1) << 14) | (3 << 12) | ((imm) & 0x3F))
-#define SUB_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x04 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define XOR_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x03 << 19) \
- | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
-#define BA_INST(tgt) (0x10800000 | ((tgt) & 0x3FFFFF))
-
-// Emit instructions to jump to Addr and store the starting address of
-// the instructions emitted in the scratch register.
-static void emitInstrForIndirectJump(intptr_t Addr,
- unsigned scratch,
- SmallVectorImpl<uint32_t> &Insts) {
-
- if (isInt<13>(Addr)) {
- // Emit: jmpl %g0+Addr, <scratch>
- // nop
- Insts.push_back(JMP_INST(0, LO10(Addr), scratch));
- Insts.push_back(NOP_INST);
- return;
- }
-
- if (isUInt<32>(Addr)) {
- // Emit: sethi %hi(Addr), scratch
- // jmpl scratch+%lo(Addr), scratch
- // sub scratch, 4, scratch
- Insts.push_back(SETHI_INST(HI22(Addr), scratch));
- Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch));
- Insts.push_back(SUB_INST(scratch, 4, scratch));
- return;
- }
-
- if (Addr < 0 && isInt<33>(Addr)) {
- // Emit: sethi %hix(Addr), scratch)
- // xor scratch, %lox(Addr), scratch
- // jmpl scratch+0, scratch
- // sub scratch, 8, scratch
- Insts.push_back(SETHI_INST(HIX22(Addr), scratch));
- Insts.push_back(XOR_INST(scratch, LOX10(Addr), scratch));
- Insts.push_back(JMP_INST(scratch, 0, scratch));
- Insts.push_back(SUB_INST(scratch, 8, scratch));
- return;
- }
-
- // Emit: rd %pc, scratch
- // ldx [scratch+16], scratch
- // jmpl scratch+0, scratch
- // sub scratch, 8, scratch
- // <Addr: 8 byte>
- Insts.push_back(RDPC_INST(scratch));
- Insts.push_back(LDX_INST(scratch, 16, scratch));
- Insts.push_back(JMP_INST(scratch, 0, scratch));
- Insts.push_back(SUB_INST(scratch, 8, scratch));
- Insts.push_back((uint32_t)(((int64_t)Addr) >> 32) & 0xffffffff);
- Insts.push_back((uint32_t)(Addr & 0xffffffff));
-
- // Instruction sequence without rdpc instruction
- // 7 instruction and 2 scratch register
- // Emit: sethi %hh(Addr), scratch
- // or scratch, %hm(Addr), scratch
- // sllx scratch, 32, scratch
- // sethi %hi(Addr), scratch2
- // or scratch, scratch2, scratch
- // jmpl scratch+%lo(Addr), scratch
- // sub scratch, 20, scratch
- // Insts.push_back(SETHI_INST(HH22(Addr), scratch));
- // Insts.push_back(OR_INST_I(scratch, HM10(Addr), scratch));
- // Insts.push_back(SLLX_INST(scratch, 32, scratch));
- // Insts.push_back(SETHI_INST(HI22(Addr), scratch2));
- // Insts.push_back(OR_INST_R(scratch, scratch2, scratch));
- // Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch));
- // Insts.push_back(SUB_INST(scratch, 20, scratch));
-}
-
-extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
- // Get the address of the compiled code for this function.
- intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
-
- // Rewrite the function stub so that we don't end up here every time we
- // execute the call. We're replacing the stub instructions with code
- // that jumps to the compiled function:
-
- SmallVector<uint32_t, 8> Insts;
- intptr_t diff = (NewVal - StubAddr) >> 2;
- if (isInt<22>(diff)) {
- // Use branch instruction to jump
- Insts.push_back(BA_INST(diff));
- Insts.push_back(NOP_INST);
- } else {
- // Otherwise, use indirect jump to the compiled function
- emitInstrForIndirectJump(NewVal, 1, Insts);
- }
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i)
- *(uint32_t *)(StubAddr + i*4) = Insts[i];
-
- sys::Memory::InvalidateInstructionCache((void*) StubAddr, Insts.size() * 4);
- return (void*)StubAddr;
-}
-
-
-void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- llvm_unreachable("FIXME: Implement SparcJITInfo::"
- "replaceMachineCodeForFunction");
-}
-
-
-TargetJITInfo::StubLayout SparcJITInfo::getStubLayout() {
- // The stub contains maximum of 4 4-byte instructions and 8 bytes for address,
- // aligned at 32 bytes.
- // See emitFunctionStub and emitInstrForIndirectJump for details.
- StubLayout Result = { 4*4 + 8, 32 };
- return Result;
-}
-
-void *SparcJITInfo::emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE)
-{
- JCE.emitAlignment(32);
- void *Addr = (void*) (JCE.getCurrentPCValue());
-
- intptr_t CurrentAddr = (intptr_t)Addr;
- intptr_t EmittedAddr;
- SmallVector<uint32_t, 8> Insts;
- if (Fn != (void*)(intptr_t)SparcCompilationCallback) {
- EmittedAddr = (intptr_t)Fn;
- intptr_t diff = (EmittedAddr - CurrentAddr) >> 2;
- if (isInt<22>(diff)) {
- Insts.push_back(BA_INST(diff));
- Insts.push_back(NOP_INST);
- }
- } else {
- EmittedAddr = (intptr_t)SparcCompilationCallback;
- }
-
- if (Insts.size() == 0)
- emitInstrForIndirectJump(EmittedAddr, 1, Insts);
-
-
- if (!sys::Memory::setRangeWritable(Addr, 4 * Insts.size()))
- llvm_unreachable("ERROR: Unable to mark stub writable.");
-
- for (unsigned i = 0, e = Insts.size(); i != e; ++i)
- JCE.emitWordBE(Insts[i]);
-
- sys::Memory::InvalidateInstructionCache(Addr, 4 * Insts.size());
- if (!sys::Memory::setRangeExecutable(Addr, 4 * Insts.size()))
- llvm_unreachable("ERROR: Unable to mark stub executable.");
-
- return Addr;
-}
-
-
-TargetJITInfo::LazyResolverFn
-SparcJITInfo::getLazyResolverFunction(JITCompilerFn F) {
- JITCompilerFunction = F;
- return SparcCompilationCallback;
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void SparcJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
-
- switch ((SP::RelocationType) MR->getRelocationType()) {
- case SP::reloc_sparc_hi:
- ResultPtr = (ResultPtr >> 10) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_lo:
- ResultPtr = (ResultPtr & 0x3ff);
- break;
-
- case SP::reloc_sparc_pc30:
- ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffffff;
- break;
-
- case SP::reloc_sparc_pc22:
- ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_pc19:
- ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x7ffff;
- break;
-
- case SP::reloc_sparc_h44:
- ResultPtr = (ResultPtr >> 22) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_m44:
- ResultPtr = (ResultPtr >> 12) & 0x3ff;
- break;
-
- case SP::reloc_sparc_l44:
- ResultPtr = (ResultPtr & 0xfff);
- break;
-
- case SP::reloc_sparc_hh:
- ResultPtr = (((int64_t)ResultPtr) >> 42) & 0x3fffff;
- break;
-
- case SP::reloc_sparc_hm:
- ResultPtr = (((int64_t)ResultPtr) >> 32) & 0x3ff;
- break;
-
- }
- *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
- }
-}
diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h
deleted file mode 100644
index ff1b43a..0000000
--- a/lib/Target/Sparc/SparcJITInfo.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//==- SparcJITInfo.h - Sparc Implementation of the JIT Interface -*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the SparcJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPARCJITINFO_H
-#define SPARCJITINFO_H
-
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
-class SparcTargetMachine;
-
-class SparcJITInfo : public TargetJITInfo {
-
- bool IsPIC;
-
- public:
- explicit SparcJITInfo()
- : IsPIC(false) {}
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on Sparc.
- StubLayout getStubLayout() override;
-
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase) override;
-
- /// Initialize - Initialize internal stage for the function being JITted.
- void Initialize(const MachineFunction &MF, bool isPIC) {
- IsPIC = isPIC;
- }
-
-};
-}
-
-#endif
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 3783c16..1047442 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -10,8 +10,8 @@
// This file declares Sparc specific per-machine-function information.
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCMACHINEFUNCTIONINFO_H
-#define SPARCMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index dc1ec7c..3cca98f 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -108,7 +108,7 @@ static void replaceFI(MachineFunction &MF,
return;
}
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// FIXME: it would be better to scavenge a register here instead of
// reserving G1 all of the time.
@@ -174,7 +174,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
if (MI.getOpcode() == SP::STQFri) {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned SrcReg = MI.getOperand(2).getReg();
unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
@@ -186,7 +186,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
} else if (MI.getOpcode() == SP::LDQFri) {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
unsigned DestReg = MI.getOperand(0).getReg();
unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 77f879a..63567b0 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCREGISTERINFO_H
-#define SPARCREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCREGISTERINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCREGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/Sparc/SparcRelocations.h b/lib/Target/Sparc/SparcRelocations.h
deleted file mode 100644
index c1ff78d..0000000
--- a/lib/Target/Sparc/SparcRelocations.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- SparcRelocations.h - Sparc Code Relocations -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Sparc target-specific relocation types
-// (for relocation-model=static).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPARC_RELOCATIONS_H
-#define SPARC_RELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace SP {
- enum RelocationType {
- // reloc_sparc_hi - upper 22 bits
- reloc_sparc_hi = 1,
-
- // reloc_sparc_lo - lower 10 bits
- reloc_sparc_lo = 2,
-
- // reloc_sparc_pc30 - pc rel. 30 bits for call
- reloc_sparc_pc30 = 3,
-
- // reloc_sparc_pc22 - pc rel. 22 bits for branch
- reloc_sparc_pc22 = 4,
-
- // reloc_sparc_pc22 - pc rel. 19 bits for branch with icc/xcc
- reloc_sparc_pc19 = 5,
-
- // reloc_sparc_h44 - 43-22 bits
- reloc_sparc_h44 = 6,
-
- // reloc_sparc_m44 - 21-12 bits
- reloc_sparc_m44 = 7,
-
- // reloc_sparc_l44 - lower 12 bits
- reloc_sparc_l44 = 8,
-
- // reloc_sparc_hh - 63-42 bits
- reloc_sparc_hh = 9,
-
- // reloc_sparc_hm - 41-32 bits
- reloc_sparc_hm = 10
- };
- }
-}
-
-#endif
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index 2346f41..a3a21d6 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCSELECTIONDAGINFO_H
-#define SPARCSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index a335778..d503b2b 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARC_SUBTARGET_H
-#define SPARC_SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H
+#define LLVM_LIB_TARGET_SPARC_SPARCSUBTARGET_H
#include "SparcFrameLowering.h"
#include "SparcInstrInfo.h"
#include "SparcISelLowering.h"
-#include "SparcJITInfo.h"
#include "SparcSelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -43,21 +42,25 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
- SparcJITInfo JITInfo;
public:
SparcSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, TargetMachine &TM, bool is64bit);
- const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const SparcRegisterInfo *getRegisterInfo() const {
+ const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const SparcRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const SparcTargetLowering *getTargetLowering() const { return &TLInfo; }
- const SparcSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- SparcJITInfo *getJITInfo() { return &JITInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
+ const SparcTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 0130fac..489bb69 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "SparcTargetMachine.h"
+#include "SparcTargetObjectFile.h"
#include "Sparc.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/PassManager.h"
@@ -32,10 +33,13 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<SparcELFTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
+SparcTargetMachine::~SparcTargetMachine() {}
+
namespace {
/// Sparc Code Generator Pass Configuration Options.
class SparcPassConfig : public TargetPassConfig {
@@ -47,6 +51,7 @@ public:
return getTM<SparcTargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
bool addPreEmitPass() override;
};
@@ -56,15 +61,14 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SparcPassConfig(this, PM);
}
-bool SparcPassConfig::addInstSelector() {
- addPass(createSparcISelDag(getSparcTargetMachine()));
- return false;
+void SparcPassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getSparcTargetMachine()));
+
+ TargetPassConfig::addIRPasses();
}
-bool SparcTargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- // Machine code emitter pass for Sparc.
- PM.add(createSparcJITCodeEmitterPass(*this, JCE));
+bool SparcPassConfig::addInstSelector() {
+ addPass(createSparcISelDag(getSparcTargetMachine()));
return false;
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 03b5137..096e7c8 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCTARGETMACHINE_H
-#define SPARCTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETMACHINE_H
+#define LLVM_LIB_TARGET_SPARC_SPARCTARGETMACHINE_H
#include "SparcInstrInfo.h"
#include "SparcSubtarget.h"
@@ -21,37 +21,22 @@
namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
SparcSubtarget Subtarget;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit);
+ ~SparcTargetMachine() override;
- const SparcInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const SparcRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const SparcTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- SparcJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
/// SparcV8TargetMachine - Sparc 32-bit target machine
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.h b/lib/Target/Sparc/SparcTargetObjectFile.h
index c60675b..76c8cca 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_SPARC_TARGETOBJECTFILE_H
-#define LLVM_TARGET_SPARC_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_SPARC_SPARCTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
diff --git a/lib/Target/Sparc/SparcTargetStreamer.h b/lib/Target/Sparc/SparcTargetStreamer.h
index 3767d8e..3b50350 100644
--- a/lib/Target/Sparc/SparcTargetStreamer.h
+++ b/lib/Target/Sparc/SparcTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPARCTARGETSTREAMER_H
-#define SPARCTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 758be41..0955f4a 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -345,7 +345,7 @@ public:
SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
// Used by the TableGen code to parse particular operand types.
@@ -677,7 +677,7 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -696,7 +696,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Special case the error message for the very common case where only
// a single subtarget feature is missing
std::string Msg = "instruction requires:";
- unsigned Mask = 1;
+ uint64_t Mask = 1;
for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
if (ErrorInfo & Mask) {
Msg += " ";
@@ -709,7 +709,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 4da2d0f..41a614d 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -5,7 +5,7 @@ tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv)
tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget)
diff --git a/lib/Target/SystemZ/Disassembler/LLVMBuild.txt b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
index c3081f5..fd78269 100644
--- a/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
+++ b/lib/Target/SystemZ/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SystemZDisassembler
parent = SystemZ
-required_libraries = MC Support SystemZDesc SystemZInfo
+required_libraries = MC MCDisassembler Support SystemZDesc SystemZInfo
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 2350776..23173bf 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -12,7 +12,6 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -28,11 +27,10 @@ public:
: MCDisassembler(STI, Ctx) {}
virtual ~SystemZDisassembler() {}
- // Override MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
+ DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
} // end anonymous namespace
@@ -288,14 +286,13 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
#include "SystemZGenDisassemblerTables.inc"
DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
+ ArrayRef<uint8_t> Bytes,
uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
+ raw_ostream &OS,
+ raw_ostream &CS) const {
// Get the first two bytes of the instruction.
- uint8_t Bytes[6];
Size = 0;
- if (Region.readBytes(Address, 2, Bytes) == -1)
+ if (Bytes.size() < 2)
return MCDisassembler::Fail;
// The top 2 bits of the first byte specify the size.
@@ -312,7 +309,7 @@ DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
// Read any remaining bytes.
- if (Size > 2 && Region.readBytes(Address + 2, Size - 2, Bytes + 2) == -1)
+ if (Bytes.size() < Size)
return MCDisassembler::Fail;
// Construct the instruction.
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index dce482b..753903c 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYSTEMZINSTPRINTER_H
-#define LLVM_SYSTEMZINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index 7781318..542aaee 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = SystemZCodeGen
parent = SystemZ
-required_libraries = AsmPrinter CodeGen Core MC Scalar SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index c46a36b..35887fa 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -23,12 +23,5 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) {
Data64bitsDirective = "\t.quad\t";
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
- HasLEB128 = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
}
-
-const MCSection *
-SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
- return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata());
-}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index 1de97af..19b5b4b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SystemZTARGETASMINFO_H
-#define SystemZTARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/Support/Compiler.h"
@@ -19,9 +19,6 @@ class StringRef;
class SystemZMCAsmInfo : public MCAsmInfoELF {
public:
explicit SystemZMCAsmInfo(StringRef TT);
-
- // Override MCAsmInfo;
- const MCSection *getNonexecutableStackSection(MCContext &Ctx) const override;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index a3aab71..52a8d1d 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYSTEMZMCFIXUPS_H
-#define LLVM_SYSTEMZMCFIXUPS_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index cc94869..6e82b6d 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -181,15 +181,12 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
return new SystemZInstPrinter(MAI, MII, MRI);
}
-static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
- MCContext &Ctx,
- MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+static MCStreamer *
+createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
}
extern "C" void LLVMInitializeSystemZTargetMC() {
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index cbaf9a8..5eb6526 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZMCTARGETDESC_H
-#define SYSTEMZMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
index 445725b..732c317 100644
--- a/lib/Target/SystemZ/Makefile
+++ b/lib/Target/SystemZ/Makefile
@@ -15,7 +15,6 @@ TARGET = SystemZ
BUILT_SOURCES = SystemZGenRegisterInfo.inc \
SystemZGenAsmWriter.inc \
SystemZGenAsmMatcher.inc \
- SystemZGenCodeEmitter.inc \
SystemZGenDisassemblerTables.inc \
SystemZGenInstrInfo.inc \
SystemZGenDAGISel.inc \
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index 1579249..c8b95b2 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZ_H
-#define SYSTEMZ_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZ_H
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/Support/CodeGen.h"
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 8b18bc1..f4f3ec7 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -185,7 +185,8 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+ uint64_t Size =
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ZCPV->getType());
OutStreamer.EmitValue(Expr, Size);
}
@@ -229,7 +230,7 @@ void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index 20093bc..6467279 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZASMPRINTER_H
-#define SYSTEMZASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/AsmPrinter.h"
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
index 4b1569d..71605ac 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZCALLINGCONV_H
-#define SYSTEMZCALLINGCONV_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
namespace llvm {
namespace SystemZ {
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
index 699718f..0bd8c20 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZCONSTANTPOOLVALUE_H
-#define SYSTEMZCONSTANTPOOLVALUE_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index dc210d6..ce99ee5 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -47,7 +47,7 @@ struct Reference {
return *this;
}
- operator bool() const { return Def || Use; }
+ LLVM_EXPLICIT operator bool() const { return Def || Use; }
// True if the register is defined or used in some form, either directly or
// via a sub- or super-register.
@@ -458,7 +458,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
}
bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
bool Changed = false;
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 055dbe9..eff4ae3 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -13,6 +13,7 @@
#include "SystemZInstrInfo.h"
#include "SystemZMachineFunctionInfo.h"
#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -65,7 +66,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFFrame = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
bool HasFP = hasFP(MF);
SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool IsVarArg = MF.getFunction()->isVarArg();
@@ -108,7 +109,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// and end registers.
static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
unsigned GPR64, bool IsImplicit) {
- const TargetRegisterInfo *RI = MBB.getParent()->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
@@ -127,7 +129,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool IsVarArg = MF.getFunction()->isVarArg();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -216,7 +218,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool HasFP = hasFP(MF);
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -292,7 +294,7 @@ static void emitIncrement(MachineBasicBlock &MBB,
else {
Opcode = SystemZ::AGFI;
// Make sure we maintain 8-byte stack alignment.
- int64_t MinVal = -int64_t(1) << 31;
+ int64_t MinVal = -uint64_t(1) << 31;
int64_t MaxVal = (int64_t(1) << 31) - 8;
if (ThisVal < MinVal)
ThisVal = MinVal;
@@ -311,7 +313,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFFrame = MF.getFrameInfo();
auto *ZII =
- static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineModuleInfo &MMI = MF.getMMI();
@@ -408,7 +410,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
auto *ZII =
- static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
// Skip the return instruction.
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 4d5fe6d..cefa56f 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZFRAMELOWERING_H
-#define SYSTEMZFRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#include "llvm/ADT/IndexedMap.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 24f7584..5f84624 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -140,7 +140,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
}
const SystemZInstrInfo *getInstrInfo() const {
- return getTargetMachine().getInstrInfo();
+ return getTargetMachine().getSubtargetImpl()->getInstrInfo();
}
// Try to fold more of the base or index of AM into AM, where IsBase
@@ -315,9 +315,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
public:
SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel),
- Lowering(*TM.getTargetLowering()),
- Subtarget(*TM.getSubtargetImpl()) { }
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getSubtargetImpl()->getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) {}
// Override MachineFunctionPass.
const char *getPassName() const override {
@@ -1000,8 +1000,8 @@ bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
if (V1 == V2 && End1 == End2)
return false;
- return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
- AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
+ return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getAAInfo()),
+ AliasAnalysis::Location(V2, End2, Store->getAAInfo()));
}
bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 00c65f5..b282fca 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,7 +81,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
}
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
- : TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ : TargetLowering(tm),
Subtarget(tm.getSubtarget<SystemZSubtarget>()) {
MVT PtrVT = getPointerTy();
@@ -339,9 +339,10 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isZero() || Imm.isNegZero();
}
-bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
// Unaligned accesses should never be slower than the expanded version.
// We check specifically for aligned accesses in the few cases where
// they are required.
@@ -674,12 +675,11 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
auto *TFL = static_cast<const SystemZFrameLowering *>(
- DAG.getTarget().getFrameLowering());
+ DAG.getSubtarget().getFrameLowering());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
unsigned NumFixedGPRs = 0;
@@ -782,7 +782,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
return Chain;
}
-static bool canUseSiblingCall(CCState ArgCCInfo,
+static bool canUseSiblingCall(const CCState &ArgCCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs) {
// Punt if there are any indirect or stack arguments, or if the call
// needs the call-saved argument register R6.
@@ -817,8 +817,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState ArgCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
- *DAG.getContext());
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
// We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -915,7 +914,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
RegsToPass[I].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -940,8 +940,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
- *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
// Copy all of the result registers out of their specified physreg.
@@ -972,8 +971,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
- *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
// Quick exit for void returns
@@ -1191,7 +1189,7 @@ static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) {
Load->getChain(), Load->getBasePtr(),
Load->getPointerInfo(), Load->getMemoryVT(),
Load->isVolatile(), Load->isNonTemporal(),
- Load->getAlignment());
+ Load->isInvariant(), Load->getAlignment());
// Make sure that the second operand is an i32 with the right value.
if (C.Op1.getValueType() != MVT::i32 ||
@@ -2614,7 +2612,7 @@ MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
MachineBasicBlock *MBB) const {
const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
- MBB->getParent()->getTarget().getInstrInfo());
+ MBB->getParent()->getSubtarget().getInstrInfo());
unsigned DestReg = MI->getOperand(0).getReg();
unsigned TrueReg = MI->getOperand(1).getReg();
@@ -2663,7 +2661,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const {
const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
- MBB->getParent()->getTarget().getInstrInfo());
+ MBB->getParent()->getSubtarget().getInstrInfo());
unsigned SrcReg = MI->getOperand(0).getReg();
MachineOperand Base = MI->getOperand(1);
@@ -2732,7 +2730,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
bool Invert) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2802,14 +2800,10 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
unsigned Tmp = MRI.createVirtualRegister(RC);
BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
.addReg(RotatedOldVal).addOperand(Src2);
- if (BitSize < 32)
+ if (BitSize <= 32)
// XILF with the upper BitSize bits set.
BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
- .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
- else if (BitSize == 32)
- // XILF with every bit set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
- .addReg(Tmp).addImm(~uint32_t(0));
+ .addReg(Tmp).addImm(-1U << (32 - BitSize));
else {
// Use LCGR and add -1 to the result, which is more compact than
// an XILF, XILH pair.
@@ -2856,7 +2850,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
unsigned BitSize) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2968,7 +2962,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
@@ -3085,7 +3079,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
bool ClearEven, unsigned SubReg) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3117,7 +3111,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3287,7 +3281,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
unsigned Opcode) const {
MachineFunction &MF = *MBB->getParent();
const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index e21b050..887c236 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
-#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
#include "SystemZ.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -208,8 +208,9 @@ public:
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
- bool *Fast) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
+ unsigned Align,
+ bool *Fast) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
@@ -321,4 +322,4 @@ private:
};
} // end namespace llvm
-#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
+#endif
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 84196e9..464f79a 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZINSTRBUILDER_H
-#define SYSTEMZINSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRBUILDER_H
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index f58ab47..8ff9553 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -633,7 +633,7 @@ struct LogicOp {
LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
: RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
- operator bool() const { return RegSize; }
+ LLVM_EXPLICIT operator bool() const { return RegSize; }
unsigned RegSize, ImmLSB, ImmSize;
};
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 83009cb..d2e3f54 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
-#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
#include "SystemZ.h"
#include "SystemZRegisterInfo.h"
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 8081334..8dab44e 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -448,7 +448,7 @@ void SystemZLongBranch::relaxBranches() {
}
bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
MF = &F;
uint64_t Size = initMBBInfo();
if (Size <= MaxForwardRange || !mustRelaxABranch())
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h
index 90447ff..7173cfa 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYSTEMZMCINSTLOWER_H
-#define LLVM_SYSTEMZMCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMCINSTLOWER_H
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 50865f1..92c2ce7 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
-#define SYSTEMZMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index f03bcc4..64f5eeb 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -35,7 +35,7 @@ SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
BitVector
SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
// R11D is the frame pointer. Reserve all aliases.
@@ -62,8 +62,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
auto *TII =
- static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
DebugLoc DL = MI->getDebugLoc();
// Decompose the frame index into a base and offset.
@@ -134,6 +134,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 9bffa46..212fe91 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SystemZREGISTERINFO_H
-#define SystemZREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
#include "SystemZ.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index e9de146..a257d6b 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZSELECTIONDAGINFO_H
-#define SYSTEMZSELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index aad899c..ec7a8c4 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -150,7 +150,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
}
bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+ TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
bool Changed = false;
for (auto &MBB : F)
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 4e8c710..f881552 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZSUBTARGET_H
-#define SYSTEMZSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZSUBTARGET_H
#include "SystemZFrameLowering.h"
#include "SystemZISelLowering.h"
@@ -55,14 +55,20 @@ public:
SystemZSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const SystemZRegisterInfo *getRegisterInfo() const {
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const SystemZRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const SystemZTargetLowering *getTargetLowering() const { return &TLInfo; }
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const SystemZTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
// This is important for reducing register pressure in vector code.
bool useAA() const override { return true; }
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 0122e99..d7c432e 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
using namespace llvm;
@@ -25,10 +26,13 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+SystemZTargetMachine::~SystemZTargetMachine() {}
+
namespace {
/// SystemZ Code Generator Pass Configuration Options.
class SystemZPassConfig : public TargetPassConfig {
@@ -49,7 +53,6 @@ public:
void SystemZPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- addPass(createPartiallyInlineLibCallsPass());
}
bool SystemZPassConfig::addInstSelector() {
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index ded07e9..9fae5e4 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
-#ifndef SYSTEMZTARGETMACHINE_H
-#define SYSTEMZTARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
#include "SystemZSubtarget.h"
#include "llvm/Target/TargetMachine.h"
@@ -23,6 +23,7 @@ namespace llvm {
class TargetFrameLowering;
class SystemZTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
SystemZSubtarget Subtarget;
public:
@@ -30,32 +31,17 @@ public:
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~SystemZTargetMachine() override;
// Override TargetMachine.
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const SystemZInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
const SystemZSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const SystemZRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const SystemZTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
-
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
} // end namespace llvm
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index d277f82..4b51b3f 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -49,7 +49,7 @@ LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
// The DataLayoutPass must now be in sync with the module. Unfortunatelly we
// cannot enforce that from the C api.
- unwrap(PM)->add(new DataLayoutPass(*unwrap(TD)));
+ unwrap(PM)->add(new DataLayoutPass());
}
void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 6ec0b1f..bca56b5 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -28,8 +28,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"_IO_putc",
"_ZdaPv",
"_ZdaPvRKSt9nothrow_t",
+ "_ZdaPvj",
+ "_ZdaPvm",
"_ZdlPv",
"_ZdlPvRKSt9nothrow_t",
+ "_ZdlPvj",
+ "_ZdlPvm",
"_Znaj",
"_ZnajRKSt9nothrow_t",
"_Znam",
@@ -47,6 +51,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__isoc99_scanf",
"__isoc99_sscanf",
"__memcpy_chk",
+ "__memmove_chk",
+ "__memset_chk",
"__sincospi_stret",
"__sincospif_stret",
"__sinpi",
@@ -54,7 +60,11 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__sqrt_finite",
"__sqrtf_finite",
"__sqrtl_finite",
+ "__stpcpy_chk",
+ "__stpncpy_chk",
+ "__strcpy_chk",
"__strdup",
+ "__strncpy_chk",
"__strndup",
"__strtok_r",
"abs",
@@ -348,7 +358,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
static bool hasSinCosPiStret(const Triple &T) {
// Only Darwin variants have _stret versions of combined trig functions.
- if (!T.isMacOSX() && T.getOS() != Triple::IOS)
+ if (!T.isOSDarwin())
return false;
// The ABI is rather complicated on x86, so don't do anything special there.
@@ -358,7 +368,7 @@ static bool hasSinCosPiStret(const Triple &T) {
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
return false;
- if (T.getOS() == Triple::IOS && T.isOSVersionLT(7, 0))
+ if (T.isiOS() && T.isOSVersionLT(7, 0))
return false;
return true;
@@ -426,7 +436,7 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::fiprintf);
}
- if (T.isKnownWindowsMSVCEnvironment()) {
+ if (T.isOSWindows() && !T.isOSCygMing()) {
// Win32 does not support long double
TLI.setUnavailable(LibFunc::acosl);
TLI.setUnavailable(LibFunc::asinl);
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 39e0459..01139fb 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -30,6 +30,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -42,7 +43,7 @@ using namespace llvm;
void TargetLoweringObjectFile::Initialize(MCContext &ctx,
const TargetMachine &TM) {
Ctx = &ctx;
- DL = TM.getDataLayout();
+ DL = TM.getSubtargetImpl()->getDataLayout();
InitMCObjectFileInfo(TM.getTargetTriple(),
TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
}
@@ -199,7 +200,8 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Otherwise, just drop it into a mergable constant section. If we have
// a section for this size, use it, otherwise use the arbitrary sized
// mergable section.
- switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
+ switch (TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(
+ C->getType())) {
case 4: return SectionKind::getMergeableConst4();
case 8: return SectionKind::getMergeableConst8();
case 16: return SectionKind::getMergeableConst16();
@@ -273,31 +275,13 @@ bool TargetLoweringObjectFile::isSectionAtomizableBySymbols(
return false;
}
-// Lame default implementation. Calculate the section name for global.
-const MCSection *
-TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
- SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const{
- assert(!Kind.isThreadLocal() && "Doesn't support TLS");
-
- if (Kind.isText())
- return getTextSection();
-
- if (Kind.isBSS() && BSSSection != nullptr)
- return BSSSection;
-
- if (Kind.isReadOnly() && ReadOnlySection != nullptr)
- return ReadOnlySection;
-
- return getDataSection();
-}
/// getSectionForConstant - Given a mergable constant with the
/// specified size and relocation information, return a section that it
/// should be placed in.
const MCSection *
-TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
+TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind,
+ const Constant *C) const {
if (Kind.isReadOnly() && ReadOnlySection != nullptr)
return ReadOnlySection;
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 95c8cb6..309e1bf 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
//---------------------------------------------------------------------------
@@ -47,17 +48,13 @@ TargetMachine::~TargetMachine() {
}
/// \brief Reset the target options based on the function's attributes.
-void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
- const Function *F = MF->getFunction();
- TargetOptions &TO = MF->getTarget().Options;
-
-#define RESET_OPTION(X, Y) \
- do { \
- if (F->hasFnAttribute(Y)) \
- TO.X = \
- (F->getAttributes(). \
- getAttribute(AttributeSet::FunctionIndex, \
- Y).getValueAsString() == "true"); \
+void TargetMachine::resetTargetOptions(const Function &F) const {
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F.hasFnAttribute(Y)) \
+ Options.X = (F.getAttributes() \
+ .getAttribute(AttributeSet::FunctionIndex, Y) \
+ .getValueAsString() == "true"); \
} while (0)
RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
@@ -68,7 +65,7 @@ void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
RESET_OPTION(UseSoftFloat, "use-soft-float");
RESET_OPTION(DisableTailCalls, "disable-tail-calls");
- TO.MCOptions.SanitizeAddress = F->hasFnAttribute(Attribute::SanitizeAddress);
+ Options.MCOptions.SanitizeAddress = F.hasFnAttribute(Attribute::SanitizeAddress);
}
/// getRelocationModel - Returns the code generation relocation model. The
@@ -183,7 +180,7 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
}
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, *this);
const TargetLoweringObjectFile &TLOF =
- getTargetLowering()->getObjFileLowering();
+ getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
const MCSection *TheSection = TLOF.SectionForGlobal(GV, GVKind, Mang, *this);
bool CannotUsePrivateLabel = TLOF.isSectionAtomizableBySymbols(*TheSection);
Mang.getNameWithPrefix(Name, GV, CannotUsePrivateLabel);
@@ -193,6 +190,6 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
SmallString<60> NameStr;
getNameWithPrefix(NameStr, GV, Mang);
const TargetLoweringObjectFile &TLOF =
- getTargetLowering()->getObjFileLowering();
+ getSubtargetImpl()->getTargetLowering()->getObjFileLowering();
return TLOF.getContext().GetOrCreateSymbol(NameStr.str());
}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 20923c9..b3e07df 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
@@ -172,7 +173,7 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
}
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
- return wrap(unwrap(T)->getDataLayout());
+ return wrap(unwrap(T)->getSubtargetImpl()->getDataLayout());
}
void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
@@ -189,7 +190,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
std::string error;
- const DataLayout* td = TM->getDataLayout();
+ const DataLayout *td = TM->getSubtargetImpl()->getDataLayout();
if (!td) {
error = "No DataLayout in TargetMachine";
@@ -197,7 +198,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
return true;
}
Mod->setDataLayout(td);
- pass.add(new DataLayoutPass(Mod));
+ pass.add(new DataLayoutPass());
TargetMachine::CodeGenFileType ft;
switch (codegen) {
@@ -222,10 +223,10 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
- std::string error;
- raw_fd_ostream dest(Filename, error, sys::fs::F_None);
- if (!error.empty()) {
- *ErrorMessage = strdup(error.c_str());
+ std::error_code EC;
+ raw_fd_ostream dest(Filename, EC, sys::fs::F_None);
+ if (EC) {
+ *ErrorMessage = strdup(EC.message().c_str());
return true;
}
formatted_raw_ostream destf(dest);
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 87b6b66..10597a8 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -39,7 +39,7 @@ bool TargetSubtargetInfo::useMachineScheduler() const {
return enableMachineScheduler();
}
-bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const {
+bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;
}
@@ -53,16 +53,7 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
}
bool TargetSubtargetInfo::enablePostMachineScheduler() const {
- return false;
-}
-
-bool TargetSubtargetInfo::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = ANTIDEP_NONE;
- CriticalPathRCs.clear();
- return false;
+ return getSchedModel().PostRAScheduler;
}
bool TargetSubtargetInfo::useAA() const {
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
index e2c4be7..861a41d 100644
--- a/lib/Target/X86/Android.mk
+++ b/lib/Target/X86/Android.mk
@@ -12,8 +12,6 @@ x86_codegen_TBLGEN_TABLES := \
x86_codegen_SRC_FILES := \
X86AsmPrinter.cpp \
- X86AtomicExpandPass.cpp \
- X86CodeEmitter.cpp \
X86FastISel.cpp \
X86FixupLEAs.cpp \
X86FloatingPoint.cpp \
@@ -21,7 +19,6 @@ x86_codegen_SRC_FILES := \
X86ISelDAGToDAG.cpp \
X86ISelLowering.cpp \
X86InstrInfo.cpp \
- X86JITInfo.cpp \
X86MachineFunctionInfo.cpp \
X86MCInstLower.cpp \
X86PadShortFunction.cpp \
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index b022a41..2c1926e 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -1,4 +1,7 @@
add_llvm_library(LLVMX86AsmParser
X86AsmInstrumentation.cpp
X86AsmParser.cpp
+
+ LINK_LIBS
+ LLVMX86CodeGen
)
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
index 9f94d5d..284bfd0 100644
--- a/lib/Target/X86/AsmParser/LLVMBuild.txt
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86AsmParser
parent = X86
-required_libraries = MC MCParser Support X86Desc X86Info
+required_libraries = MC MCParser Support X86CodeGen X86Desc X86Info
add_to_library_groups = X86
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index a365f62..9c49a11 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -10,9 +10,12 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86AsmInstrumentation.h"
#include "X86Operand.h"
+#include "X86RegisterInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
@@ -23,6 +26,73 @@
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+// Following comment describes how assembly instrumentation works.
+// Currently we have only AddressSanitizer instrumentation, but we're
+// planning to implement MemorySanitizer for inline assembly too. If
+// you're not familiar with AddressSanitizer algorithm, please, read
+// https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm.
+//
+// When inline assembly is parsed by an instance of X86AsmParser, all
+// instructions are emitted via EmitInstruction method. That's the
+// place where X86AsmInstrumentation analyzes an instruction and
+// decides, whether the instruction should be emitted as is or
+// instrumentation is required. The latter case happens when an
+// instruction reads from or writes to memory. Now instruction opcode
+// is explicitly checked, and if an instruction has a memory operand
+// (for instance, movq (%rsi, %rcx, 8), %rax) - it should be
+// instrumented. There're also exist instructions that modify
+// memory but don't have an explicit memory operands, for instance,
+// movs.
+//
+// Let's consider at first 8-byte memory accesses when an instruction
+// has an explicit memory operand. In this case we need two registers -
+// AddressReg to compute address of a memory cells which are accessed
+// and ShadowReg to compute corresponding shadow address. So, we need
+// to spill both registers before instrumentation code and restore them
+// after instrumentation. Thus, in general, instrumentation code will
+// look like this:
+// PUSHF # Store flags, otherwise they will be overwritten
+// PUSH AddressReg # spill AddressReg
+// PUSH ShadowReg # spill ShadowReg
+// LEA MemOp, AddressReg # compute address of the memory operand
+// MOV AddressReg, ShadowReg
+// SHR ShadowReg, 3
+// # ShadowOffset(AddressReg >> 3) contains address of a shadow
+// # corresponding to MemOp.
+// CMP ShadowOffset(ShadowReg), 0 # test shadow value
+// JZ .Done # when shadow equals to zero, everything is fine
+// MOV AddressReg, RDI
+// # Call __asan_report function with AddressReg as an argument
+// CALL __asan_report
+// .Done:
+// POP ShadowReg # Restore ShadowReg
+// POP AddressReg # Restore AddressReg
+// POPF # Restore flags
+//
+// Memory accesses with different size (1-, 2-, 4- and 16-byte) are
+// handled in a similar manner, but small memory accesses (less than 8
+// byte) require an additional ScratchReg, which is used for shadow value.
+//
+// If, suppose, we're instrumenting an instruction like movs, only
+// contents of RDI, RDI + AccessSize * RCX, RSI, RSI + AccessSize *
+// RCX are checked. In this case there're no need to spill and restore
+// AddressReg , ShadowReg or flags four times, they're saved on stack
+// just once, before instrumentation of these four addresses, and restored
+// at the end of the instrumentation.
+//
+// There exist several things which complicate this simple algorithm.
+// * Instrumented memory operand can have RSP as a base or an index
+// register. So we need to add a constant offset before computation
+// of memory address, since flags, AddressReg, ShadowReg, etc. were
+// already stored on stack and RSP was modified.
+// * Debug info (usually, DWARF) should be adjusted, because sometimes
+// RSP is used as a frame register. So, we need to select some
+// register as a frame register and temprorary override current CFA
+// register.
namespace llvm {
namespace {
@@ -32,10 +102,23 @@ static cl::opt<bool> ClAsanInstrumentAssembly(
cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden,
cl::init(false));
-bool IsStackReg(unsigned Reg) {
- return Reg == X86::RSP || Reg == X86::ESP || Reg == X86::SP;
+const int64_t MinAllowedDisplacement = std::numeric_limits<int32_t>::min();
+const int64_t MaxAllowedDisplacement = std::numeric_limits<int32_t>::max();
+
+int64_t ApplyDisplacementBounds(int64_t Displacement) {
+ return std::max(std::min(MaxAllowedDisplacement, Displacement),
+ MinAllowedDisplacement);
+}
+
+void CheckDisplacementBounds(int64_t Displacement) {
+ assert(Displacement >= MinAllowedDisplacement &&
+ Displacement <= MaxAllowedDisplacement);
}
+bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; }
+
+bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
+
std::string FuncName(unsigned AccessSize, bool IsWrite) {
return std::string("__asan_report_") + (IsWrite ? "store" : "load") +
utostr(AccessSize);
@@ -43,60 +126,245 @@ std::string FuncName(unsigned AccessSize, bool IsWrite) {
class X86AddressSanitizer : public X86AsmInstrumentation {
public:
- X86AddressSanitizer(const MCSubtargetInfo &STI) : STI(STI) {}
+ struct RegisterContext {
+ private:
+ enum RegOffset {
+ REG_OFFSET_ADDRESS = 0,
+ REG_OFFSET_SHADOW,
+ REG_OFFSET_SCRATCH
+ };
+
+ public:
+ RegisterContext(unsigned AddressReg, unsigned ShadowReg,
+ unsigned ScratchReg) {
+ BusyRegs.push_back(convReg(AddressReg, MVT::i64));
+ BusyRegs.push_back(convReg(ShadowReg, MVT::i64));
+ BusyRegs.push_back(convReg(ScratchReg, MVT::i64));
+ }
+
+ unsigned AddressReg(MVT::SimpleValueType VT) const {
+ return convReg(BusyRegs[REG_OFFSET_ADDRESS], VT);
+ }
+
+ unsigned ShadowReg(MVT::SimpleValueType VT) const {
+ return convReg(BusyRegs[REG_OFFSET_SHADOW], VT);
+ }
+
+ unsigned ScratchReg(MVT::SimpleValueType VT) const {
+ return convReg(BusyRegs[REG_OFFSET_SCRATCH], VT);
+ }
+
+ void AddBusyReg(unsigned Reg) {
+ if (Reg != X86::NoRegister)
+ BusyRegs.push_back(convReg(Reg, MVT::i64));
+ }
+
+ void AddBusyRegs(const X86Operand &Op) {
+ AddBusyReg(Op.getMemBaseReg());
+ AddBusyReg(Op.getMemIndexReg());
+ }
+
+ unsigned ChooseFrameReg(MVT::SimpleValueType VT) const {
+ static const unsigned Candidates[] = { X86::RBP, X86::RAX, X86::RBX,
+ X86::RCX, X86::RDX, X86::RDI,
+ X86::RSI };
+ for (unsigned Reg : Candidates) {
+ if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg))
+ return convReg(Reg, VT);
+ }
+ return X86::NoRegister;
+ }
+
+ private:
+ unsigned convReg(unsigned Reg, MVT::SimpleValueType VT) const {
+ return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, VT);
+ }
+
+ std::vector<unsigned> BusyRegs;
+ };
+
+ X86AddressSanitizer(const MCSubtargetInfo &STI)
+ : X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
+
virtual ~X86AddressSanitizer() {}
// X86AsmInstrumentation implementation:
- virtual void InstrumentInstruction(
- const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out) override {
+ virtual void InstrumentAndEmitInstruction(const MCInst &Inst,
+ OperandVector &Operands,
+ MCContext &Ctx,
+ const MCInstrInfo &MII,
+ MCStreamer &Out) override {
+ InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
+ if (RepPrefix)
+ EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
+
InstrumentMOV(Inst, Operands, Ctx, MII, Out);
- }
- // Should be implemented differently in x86_32 and x86_64 subclasses.
- virtual void InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) = 0;
- virtual void InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) = 0;
+ RepPrefix = (Inst.getOpcode() == X86::REP_PREFIX);
+ if (!RepPrefix)
+ EmitInstruction(Out, Inst);
+ }
- void InstrumentMemOperand(MCParsedAsmOperand &Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx, MCStreamer &Out);
+ // Adjusts up stack and saves all registers used in instrumentation.
+ virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) = 0;
+
+ // Restores all registers used in instrumentation and adjusts stack.
+ virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) = 0;
+
+ virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx, MCStreamer &Out) = 0;
+ virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx, MCStreamer &Out) = 0;
+
+ virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) = 0;
+
+ void InstrumentMemOperand(X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx,
+ MCStreamer &Out);
+ void InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, unsigned CntReg,
+ unsigned AccessSize, MCContext &Ctx, MCStreamer &Out);
+
+ void InstrumentMOVS(const MCInst &Inst, OperandVector &Operands,
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
void InstrumentMOV(const MCInst &Inst, OperandVector &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
- void EmitInstruction(MCStreamer &Out, const MCInst &Inst) {
- Out.EmitInstruction(Inst, STI);
- }
+protected:
void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
-protected:
- const MCSubtargetInfo &STI;
+ void EmitLEA(X86Operand &Op, MVT::SimpleValueType VT, unsigned Reg,
+ MCStreamer &Out) {
+ assert(VT == MVT::i32 || VT == MVT::i64);
+ MCInst Inst;
+ Inst.setOpcode(VT == MVT::i32 ? X86::LEA32r : X86::LEA64r);
+ Inst.addOperand(MCOperand::CreateReg(getX86SubSuperRegister(Reg, VT)));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ void ComputeMemOperandAddress(X86Operand &Op, MVT::SimpleValueType VT,
+ unsigned Reg, MCContext &Ctx, MCStreamer &Out);
+
+ // Creates new memory operand with Displacement added to an original
+ // displacement. Residue will contain a residue which could happen when the
+ // total displacement exceeds 32-bit limitation.
+ std::unique_ptr<X86Operand> AddDisplacement(X86Operand &Op,
+ int64_t Displacement,
+ MCContext &Ctx, int64_t *Residue);
+
+ // True when previous instruction was actually REP prefix.
+ bool RepPrefix;
+
+ // Offset from the original SP register.
+ int64_t OrigSPOffset;
};
void X86AddressSanitizer::InstrumentMemOperand(
- MCParsedAsmOperand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
assert(Op.isMem() && "Op should be a memory operand.");
assert((AccessSize & (AccessSize - 1)) == 0 && AccessSize <= 16 &&
"AccessSize should be a power of two, less or equal than 16.");
+ // FIXME: take into account load/store alignment.
+ if (IsSmallMemAccess(AccessSize))
+ InstrumentMemOperandSmall(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
+ else
+ InstrumentMemOperandLarge(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
+}
+
+void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
+ unsigned CntReg,
+ unsigned AccessSize,
+ MCContext &Ctx, MCStreamer &Out) {
+ // FIXME: check whole ranges [DstReg .. DstReg + AccessSize * (CntReg - 1)]
+ // and [SrcReg .. SrcReg + AccessSize * (CntReg - 1)].
+ RegisterContext RegCtx(X86::RDX /* AddressReg */, X86::RAX /* ShadowReg */,
+ IsSmallMemAccess(AccessSize)
+ ? X86::RBX
+ : X86::NoRegister /* ScratchReg */);
+ RegCtx.AddBusyReg(DstReg);
+ RegCtx.AddBusyReg(SrcReg);
+ RegCtx.AddBusyReg(CntReg);
+
+ InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
+
+ // Test (%SrcReg)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
+ Out);
+ }
+
+ // Test -1(%SrcReg, %CntReg, AccessSize)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(), SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
+ Out);
+ }
+
+ // Test (%DstReg)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(0, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
+ }
+
+ // Test -1(%DstReg, %CntReg, AccessSize)
+ {
+ const MCExpr *Disp = MCConstantExpr::Create(-1, Ctx);
+ std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
+ 0, Disp, DstReg, CntReg, AccessSize, SMLoc(), SMLoc()));
+ InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
+ }
+
+ InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
+}
+
+void X86AddressSanitizer::InstrumentMOVS(const MCInst &Inst,
+ OperandVector &Operands,
+ MCContext &Ctx, const MCInstrInfo &MII,
+ MCStreamer &Out) {
+ // Access size in bytes.
+ unsigned AccessSize = 0;
- X86Operand &MemOp = static_cast<X86Operand &>(Op);
- // FIXME: get rid of this limitation.
- if (IsStackReg(MemOp.getMemBaseReg()) || IsStackReg(MemOp.getMemIndexReg()))
+ switch (Inst.getOpcode()) {
+ case X86::MOVSB:
+ AccessSize = 1;
+ break;
+ case X86::MOVSW:
+ AccessSize = 2;
+ break;
+ case X86::MOVSL:
+ AccessSize = 4;
+ break;
+ case X86::MOVSQ:
+ AccessSize = 8;
+ break;
+ default:
return;
+ }
- // FIXME: take into account load/store alignment.
- if (AccessSize < 8)
- InstrumentMemOperandSmallImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
- else
- InstrumentMemOperandLargeImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
+ InstrumentMOVSImpl(AccessSize, Ctx, Out);
}
-void X86AddressSanitizer::InstrumentMOV(
- const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out) {
+void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst,
+ OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII,
+ MCStreamer &Out) {
// Access size in bytes.
unsigned AccessSize = 0;
@@ -132,41 +400,199 @@ void X86AddressSanitizer::InstrumentMOV(
}
const bool IsWrite = MII.get(Inst.getOpcode()).mayStore();
+
for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) {
assert(Operands[Ix]);
MCParsedAsmOperand &Op = *Operands[Ix];
- if (Op.isMem())
- InstrumentMemOperand(Op, AccessSize, IsWrite, Ctx, Out);
+ if (Op.isMem()) {
+ X86Operand &MemOp = static_cast<X86Operand &>(Op);
+ RegisterContext RegCtx(
+ X86::RDI /* AddressReg */, X86::RAX /* ShadowReg */,
+ IsSmallMemAccess(AccessSize) ? X86::RCX
+ : X86::NoRegister /* ScratchReg */);
+ RegCtx.AddBusyRegs(MemOp);
+ InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
+ InstrumentMemOperand(MemOp, AccessSize, IsWrite, RegCtx, Ctx, Out);
+ InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
+ }
}
}
+void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
+ MVT::SimpleValueType VT,
+ unsigned Reg, MCContext &Ctx,
+ MCStreamer &Out) {
+ int64_t Displacement = 0;
+ if (IsStackReg(Op.getMemBaseReg()))
+ Displacement -= OrigSPOffset;
+ if (IsStackReg(Op.getMemIndexReg()))
+ Displacement -= OrigSPOffset * Op.getMemScale();
+
+ assert(Displacement >= 0);
+
+ // Emit Op as is.
+ if (Displacement == 0) {
+ EmitLEA(Op, VT, Reg, Out);
+ return;
+ }
+
+ int64_t Residue;
+ std::unique_ptr<X86Operand> NewOp =
+ AddDisplacement(Op, Displacement, Ctx, &Residue);
+ EmitLEA(*NewOp, VT, Reg, Out);
+
+ while (Residue != 0) {
+ const MCConstantExpr *Disp =
+ MCConstantExpr::Create(ApplyDisplacementBounds(Residue), Ctx);
+ std::unique_ptr<X86Operand> DispOp =
+ X86Operand::CreateMem(0, Disp, Reg, 0, 1, SMLoc(), SMLoc());
+ EmitLEA(*DispOp, VT, Reg, Out);
+ Residue -= Disp->getValue();
+ }
+}
+
+std::unique_ptr<X86Operand>
+X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement,
+ MCContext &Ctx, int64_t *Residue) {
+ assert(Displacement >= 0);
+
+ if (Displacement == 0 ||
+ (Op.getMemDisp() && Op.getMemDisp()->getKind() != MCExpr::Constant)) {
+ *Residue = Displacement;
+ return X86Operand::CreateMem(Op.getMemSegReg(), Op.getMemDisp(),
+ Op.getMemBaseReg(), Op.getMemIndexReg(),
+ Op.getMemScale(), SMLoc(), SMLoc());
+ }
+
+ int64_t OrigDisplacement =
+ static_cast<const MCConstantExpr *>(Op.getMemDisp())->getValue();
+ CheckDisplacementBounds(OrigDisplacement);
+ Displacement += OrigDisplacement;
+
+ int64_t NewDisplacement = ApplyDisplacementBounds(Displacement);
+ CheckDisplacementBounds(NewDisplacement);
+
+ *Residue = Displacement - NewDisplacement;
+ const MCExpr *Disp = MCConstantExpr::Create(NewDisplacement, Ctx);
+ return X86Operand::CreateMem(Op.getMemSegReg(), Disp, Op.getMemBaseReg(),
+ Op.getMemIndexReg(), Op.getMemScale(), SMLoc(),
+ SMLoc());
+}
+
class X86AddressSanitizer32 : public X86AddressSanitizer {
public:
static const long kShadowOffset = 0x20000000;
X86AddressSanitizer32(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
+
virtual ~X86AddressSanitizer32() {}
- virtual void InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
+ unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
+ unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
+ if (FrameReg == X86::NoRegister)
+ return FrameReg;
+ return getX86SubSuperRegister(FrameReg, MVT::i32);
+ }
+
+ void SpillReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(Reg));
+ OrigSPOffset -= 4;
+ }
+
+ void RestoreReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(Reg));
+ OrigSPOffset += 4;
+ }
+
+ void StoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+ OrigSPOffset -= 4;
+ }
+
+ void RestoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::POPF32));
+ OrigSPOffset += 4;
+ }
+
+ virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (MRI && FrameReg != X86::NoRegister) {
+ SpillReg(Out, LocalFrameReg);
+ if (FrameReg == X86::ESP) {
+ Out.EmitCFIAdjustCfaOffset(4 /* byte size of the LocalFrameReg */);
+ Out.EmitCFIRelOffset(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
+ }
+ EmitInstruction(
+ Out,
+ MCInstBuilder(X86::MOV32rr).addReg(LocalFrameReg).addReg(FrameReg));
+ Out.EmitCFIRememberState();
+ Out.EmitCFIDefCfaRegister(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
+ }
+
+ SpillReg(Out, RegCtx.AddressReg(MVT::i32));
+ SpillReg(Out, RegCtx.ShadowReg(MVT::i32));
+ if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(MVT::i32));
+ StoreFlags(Out);
+ }
+
+ virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ RestoreFlags(Out);
+ if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(MVT::i32));
+ RestoreReg(Out, RegCtx.ShadowReg(MVT::i32));
+ RestoreReg(Out, RegCtx.AddressReg(MVT::i32));
+
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
+ RestoreReg(Out, LocalFrameReg);
+ Out.EmitCFIRestoreState();
+ if (FrameReg == X86::ESP)
+ Out.EmitCFIAdjustCfaOffset(-4 /* byte size of the LocalFrameReg */);
+ }
+ }
- private:
- void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
- bool IsWrite, unsigned AddressReg) {
+ virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
+
+private:
+ void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out, const RegisterContext &RegCtx) {
EmitInstruction(Out, MCInstBuilder(X86::CLD));
EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
- EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::ESP)
- .addReg(X86::ESP).addImm(-16));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(AddressReg));
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8)
+ .addReg(X86::ESP)
+ .addReg(X86::ESP)
+ .addImm(-16));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(MVT::i32)));
-
- const std::string& Fn = FuncName(AccessSize, IsWrite);
+ const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
@@ -174,67 +600,64 @@ public:
}
};
-void X86AddressSanitizer32::InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EDX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+void X86AddressSanitizer32::InstrumentMemOperandSmall(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::EAX));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
+ assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(3));
+ ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
+ .addReg(ShadowRegI32)
+ .addReg(ShadowRegI32)
+ .addImm(3));
{
MCInst Inst;
Inst.setOpcode(X86::MOV8rm);
- Inst.addOperand(MCOperand::CreateReg(X86::CL));
+ Inst.addOperand(MCOperand::CreateReg(ShadowRegI8));
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(0, Disp, ShadowRegI32, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
- EmitInstruction(Out,
- MCInstBuilder(X86::TEST8rr).addReg(X86::CL).addReg(X86::CL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.CreateTempSymbol();
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::EDX).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::EDX)
- .addReg(X86::EDX).addImm(7));
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(7));
switch (AccessSize) {
case 1:
break;
case 2: {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::EDX));
-
const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::EDX, 0, 1, SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
+ X86Operand::CreateMem(0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc()));
+ EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
break;
}
case 4:
- EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::EDX)
- .addReg(X86::EDX).addImm(3));
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(3));
break;
default:
assert(false && "Incorrect access size");
@@ -242,54 +665,46 @@ void X86AddressSanitizer32::InstrumentMemOperandSmallImpl(
}
EmitInstruction(
- Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::ECX).addReg(X86::CL));
- EmitInstruction(
- Out, MCInstBuilder(X86::CMP32rr).addReg(X86::EDX).addReg(X86::ECX));
+ Out,
+ MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
+ EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
+ ShadowRegI32));
EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
-
- EmitInstruction(Out, MCInstBuilder(X86::POPF32));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EDX));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
}
-void X86AddressSanitizer32::InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+void X86AddressSanitizer32::InstrumentMemOperandLarge(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::EAX));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
- EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(3));
+ ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
+ .addReg(ShadowRegI32)
+ .addReg(ShadowRegI32)
+ .addImm(3));
{
MCInst Inst;
switch (AccessSize) {
- case 8:
- Inst.setOpcode(X86::CMP8mi);
- break;
- case 16:
- Inst.setOpcode(X86::CMP16mi);
- break;
- default:
- assert(false && "Incorrect access size");
- break;
+ case 8:
+ Inst.setOpcode(X86::CMP8mi);
+ break;
+ case 16:
+ Inst.setOpcode(X86::CMP16mi);
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
}
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(0, Disp, ShadowRegI32, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
Inst.addOperand(MCOperand::CreateImm(0));
EmitInstruction(Out, Inst);
@@ -298,12 +713,28 @@ void X86AddressSanitizer32::InstrumentMemOperandLargeImpl(
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
+}
+
+void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize,
+ MCContext &Ctx,
+ MCStreamer &Out) {
+ StoreFlags(Out);
+
+ // No need to test when ECX is equals to zero.
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ // Instrument first and last elements in src and dst range.
+ InstrumentMOVSBase(X86::EDI /* DstReg */, X86::ESI /* SrcReg */,
+ X86::ECX /* CntReg */, AccessSize, Ctx, Out);
- EmitInstruction(Out, MCInstBuilder(X86::POPF32));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
+ EmitLabel(Out, DoneSym);
+ RestoreFlags(Out);
}
class X86AddressSanitizer64 : public X86AddressSanitizer {
@@ -312,37 +743,126 @@ public:
X86AddressSanitizer64(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
+
virtual ~X86AddressSanitizer64() {}
- virtual void InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
+ unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
+ unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
+ if (FrameReg == X86::NoRegister)
+ return FrameReg;
+ return getX86SubSuperRegister(FrameReg, MVT::i64);
+ }
+
+ void SpillReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(Reg));
+ OrigSPOffset -= 8;
+ }
+
+ void RestoreReg(MCStreamer &Out, unsigned Reg) {
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(Reg));
+ OrigSPOffset += 8;
+ }
+
+ void StoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
+ OrigSPOffset -= 8;
+ }
+
+ void RestoreFlags(MCStreamer &Out) {
+ EmitInstruction(Out, MCInstBuilder(X86::POPF64));
+ OrigSPOffset += 8;
+ }
+
+ virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (MRI && FrameReg != X86::NoRegister) {
+ SpillReg(Out, X86::RBP);
+ if (FrameReg == X86::RSP) {
+ Out.EmitCFIAdjustCfaOffset(8 /* byte size of the LocalFrameReg */);
+ Out.EmitCFIRelOffset(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
+ }
+ EmitInstruction(
+ Out,
+ MCInstBuilder(X86::MOV64rr).addReg(LocalFrameReg).addReg(FrameReg));
+ Out.EmitCFIRememberState();
+ Out.EmitCFIDefCfaRegister(
+ MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
+ }
+
+ EmitAdjustRSP(Ctx, Out, -128);
+ SpillReg(Out, RegCtx.ShadowReg(MVT::i64));
+ SpillReg(Out, RegCtx.AddressReg(MVT::i64));
+ if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(MVT::i64));
+ StoreFlags(Out);
+ }
+
+ virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ assert(LocalFrameReg != X86::NoRegister);
+
+ RestoreFlags(Out);
+ if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(MVT::i64));
+ RestoreReg(Out, RegCtx.AddressReg(MVT::i64));
+ RestoreReg(Out, RegCtx.ShadowReg(MVT::i64));
+ EmitAdjustRSP(Ctx, Out, 128);
+
+ unsigned FrameReg = GetFrameReg(Ctx, Out);
+ if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
+ RestoreReg(Out, LocalFrameReg);
+ Out.EmitCFIRestoreState();
+ if (FrameReg == X86::RSP)
+ Out.EmitCFIAdjustCfaOffset(-8 /* byte size of the LocalFrameReg */);
+ }
+ }
+
+ virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
private:
void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
- MCInst Inst;
- Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RSP));
-
const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
+ EmitLEA(*Op, MVT::i64, X86::RSP, Out);
+ OrigSPOffset += Offset;
}
- void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
- bool IsWrite) {
+ void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out, const RegisterContext &RegCtx) {
EmitInstruction(Out, MCInstBuilder(X86::CLD));
EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
- EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::RSP)
- .addReg(X86::RSP).addImm(-16));
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8)
+ .addReg(X86::RSP)
+ .addReg(X86::RSP)
+ .addImm(-16));
- const std::string& Fn = FuncName(AccessSize, IsWrite);
+ if (RegCtx.AddressReg(MVT::i64) != X86::RDI) {
+ EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
+ RegCtx.AddressReg(MVT::i64)));
+ }
+ const std::string &Fn = FuncName(AccessSize, IsWrite);
MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
@@ -350,65 +870,65 @@ private:
}
};
-void X86AddressSanitizer64::InstrumentMemOperandSmallImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitAdjustRSP(Ctx, Out, -128);
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RCX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RDI));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RDI));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RAX).addReg(X86::RDI));
- EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
- .addReg(X86::RAX).addImm(3));
+void X86AddressSanitizer64::InstrumentMemOperandSmall(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
+ unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+
+ assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+
+ ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
+ AddressRegI64));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
+ .addReg(ShadowRegI64)
+ .addReg(ShadowRegI64)
+ .addImm(3));
{
MCInst Inst;
Inst.setOpcode(X86::MOV8rm);
- Inst.addOperand(MCOperand::CreateReg(X86::AL));
+ Inst.addOperand(MCOperand::CreateReg(ShadowRegI8));
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(0, Disp, ShadowRegI64, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
- EmitInstruction(Out,
- MCInstBuilder(X86::TEST8rr).addReg(X86::AL).addReg(X86::AL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
MCSymbol *DoneSym = Ctx.CreateTempSymbol();
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
- EmitInstruction(
- Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EDI));
- EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(7));
+ EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
+ AddressRegI32));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(7));
switch (AccessSize) {
case 1:
break;
case 2: {
- MCInst Inst;
- Inst.setOpcode(X86::LEA32r);
- Inst.addOperand(MCOperand::CreateReg(X86::ECX));
-
const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
+ X86Operand::CreateMem(0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc()));
+ EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
break;
}
case 4:
- EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::ECX)
- .addReg(X86::ECX).addImm(3));
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
+ .addReg(ScratchRegI32)
+ .addReg(ScratchRegI32)
+ .addImm(3));
break;
default:
assert(false && "Incorrect access size");
@@ -416,37 +936,30 @@ void X86AddressSanitizer64::InstrumentMemOperandSmallImpl(
}
EmitInstruction(
- Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::EAX).addReg(X86::AL));
- EmitInstruction(
- Out, MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::EAX));
+ Out,
+ MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
+ EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
+ ShadowRegI32));
EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
-
- EmitInstruction(Out, MCInstBuilder(X86::POPF64));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RDI));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RCX));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
- EmitAdjustRSP(Ctx, Out, 128);
}
-void X86AddressSanitizer64::InstrumentMemOperandLargeImpl(
- X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
- EmitAdjustRSP(Ctx, Out, -128);
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
+void X86AddressSanitizer64::InstrumentMemOperandLarge(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite,
+ const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
+ unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
- {
- MCInst Inst;
- Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RAX));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
- EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
- .addReg(X86::RAX).addImm(3));
+ ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+
+ EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
+ AddressRegI64));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
+ .addReg(ShadowRegI64)
+ .addReg(ShadowRegI64)
+ .addImm(3));
{
MCInst Inst;
switch (AccessSize) {
@@ -462,7 +975,7 @@ void X86AddressSanitizer64::InstrumentMemOperandLargeImpl(
}
const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(0, Disp, ShadowRegI64, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
Inst.addOperand(MCOperand::CreateImm(0));
EmitInstruction(Out, Inst);
@@ -472,22 +985,66 @@ void X86AddressSanitizer64::InstrumentMemOperandLargeImpl(
const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
- EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
EmitLabel(Out, DoneSym);
+}
+
+void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
+ MCContext &Ctx,
+ MCStreamer &Out) {
+ StoreFlags(Out);
+
+ // No need to test when RCX is equals to zero.
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(
+ Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX));
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
- EmitInstruction(Out, MCInstBuilder(X86::POPF64));
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
- EmitAdjustRSP(Ctx, Out, 128);
+ // Instrument first and last elements in src and dst range.
+ InstrumentMOVSBase(X86::RDI /* DstReg */, X86::RSI /* SrcReg */,
+ X86::RCX /* CntReg */, AccessSize, Ctx, Out);
+
+ EmitLabel(Out, DoneSym);
+ RestoreFlags(Out);
}
} // End anonymous namespace
-X86AsmInstrumentation::X86AsmInstrumentation() {}
+X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo &STI)
+ : STI(STI), InitialFrameReg(0) {}
+
X86AsmInstrumentation::~X86AsmInstrumentation() {}
-void X86AsmInstrumentation::InstrumentInstruction(
+void X86AsmInstrumentation::InstrumentAndEmitInstruction(
const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out) {}
+ const MCInstrInfo &MII, MCStreamer &Out) {
+ EmitInstruction(Out, Inst);
+}
+
+void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
+ const MCInst &Inst) {
+ Out.EmitInstruction(Inst, STI);
+}
+
+unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
+ MCStreamer &Out) {
+ if (!Out.getNumFrameInfos()) // No active dwarf frame
+ return X86::NoRegister;
+ const MCDwarfFrameInfo &Frame = Out.getDwarfFrameInfos().back();
+ if (Frame.End) // Active dwarf frame is closed
+ return X86::NoRegister;
+ const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
+ if (!MRI) // No register info
+ return X86::NoRegister;
+
+ if (InitialFrameReg) {
+ // FrameReg is set explicitly, we're instrumenting a MachineFunction.
+ return InitialFrameReg;
+ }
+
+ return MRI->getLLVMRegNum(Frame.CurrentCfaRegister, true /* IsEH */);
+}
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
@@ -501,7 +1058,7 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
if ((STI.getFeatureBits() & X86::Mode64Bit) != 0)
return new X86AddressSanitizer64(STI);
}
- return new X86AsmInstrumentation();
+ return new X86AsmInstrumentation(STI);
}
} // End llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 1bc3c09..19ebcc4 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_ASM_INSTRUMENTATION_H
-#define X86_ASM_INSTRUMENTATION_H
+#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
+#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
#include "llvm/ADT/SmallVector.h"
@@ -34,11 +34,15 @@ class X86AsmInstrumentation {
public:
virtual ~X86AsmInstrumentation();
- // Instruments Inst. Should be called just before the original
- // instruction is sent to Out.
- virtual void InstrumentInstruction(
+ // Sets frame register corresponding to a current frame.
+ void SetInitialFrameRegister(unsigned RegNo) {
+ InitialFrameReg = RegNo;
+ }
+
+ // Tries to instrument and emit instruction.
+ virtual void InstrumentAndEmitInstruction(
const MCInst &Inst,
- SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
+ SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand> > &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
protected:
@@ -46,9 +50,17 @@ protected:
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
const MCContext &Ctx, const MCSubtargetInfo &STI);
- X86AsmInstrumentation();
+ X86AsmInstrumentation(const MCSubtargetInfo &STI);
+
+ unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
+
+ void EmitInstruction(MCStreamer &Out, const MCInst &Inst);
+
+ const MCSubtargetInfo &STI;
+
+ unsigned InitialFrameReg;
};
} // End llvm namespace
-#endif // X86_ASM_INSTRUMENTATION_H
+#endif
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f0765ed..8ef2a55 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <memory>
using namespace llvm;
@@ -55,12 +56,12 @@ static const char OpPrecedence[] = {
class X86AsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
- MCAsmParser &Parser;
const MCInstrInfo &MII;
ParseInstructionInfo *InstInfo;
std::unique_ptr<X86AsmInstrumentation> Instrumentation;
private:
SMLoc consumeToken() {
+ MCAsmParser &Parser = getParser();
SMLoc Result = Parser.getTok().getLoc();
Parser.Lex();
return Result;
@@ -630,13 +631,10 @@ private:
}
};
- MCAsmParser &getParser() const { return Parser; }
-
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None,
bool MatchingInlineAsm = false) {
+ MCAsmParser &Parser = getParser();
if (MatchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
}
@@ -644,8 +642,9 @@ private:
bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = None,
bool MatchingInlineAsm = false) {
- Parser.eatToEndOfStatement();
- return Error(L, Msg, Ranges, MatchingInlineAsm);
+ MCAsmParser &Parser = getParser();
+ Parser.eatToEndOfStatement();
+ return Error(L, Msg, Ranges, MatchingInlineAsm);
}
std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
@@ -693,9 +692,34 @@ private:
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo,
+ uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
+ void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
+ MCStreamer &Out, bool MatchingInlineAsm);
+
+ bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+ bool MatchingInlineAsm);
+
+ bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ unsigned getPointerSize() {
+ if (is16BitMode()) return 16;
+ if (is32BitMode()) return 32;
+ if (is64BitMode()) return 64;
+ llvm_unreachable("invalid mode");
+ }
+
+ bool OmitRegisterFromClobberLists(unsigned RegNo) override;
+
/// doSrcDstMatch - Returns true if operands are matching in their
/// word size (%si and %di, %esi and %edi, etc.). Order depends on
/// the parsing mode (Intel vs. AT&T).
@@ -728,6 +752,13 @@ private:
(X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
}
+ unsigned getPointerWidth() {
+ if (is16BitMode()) return 16;
+ if (is32BitMode()) return 32;
+ if (is64BitMode()) return 64;
+ llvm_unreachable("invalid mode");
+ }
+
bool isParsingIntelSyntax() {
return getParser().getAssemblerDialect();
}
@@ -741,11 +772,9 @@ private:
/// }
public:
- X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &mii,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
- InstInfo(nullptr) {
+ X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
+ const MCInstrInfo &mii, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -755,6 +784,8 @@ public:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ void SetFrameRegister(unsigned RegNo) override;
+
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -830,6 +861,7 @@ bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
bool X86AsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmParser &Parser = getParser();
RegNo = 0;
const AsmToken &PercentTok = Parser.getTok();
StartLoc = PercentTok.getLoc();
@@ -937,6 +969,10 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
return false;
}
+void X86AsmParser::SetFrameRegister(unsigned RegNo) {
+ Instrumentation->SetInitialFrameRegister(RegNo);
+}
+
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
@@ -979,15 +1015,20 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
InlineAsmIdentifierInfo &Info) {
- // If this is not a VarDecl then assume it is a FuncDecl or some other label
- // reference. We need an 'r' constraint here, so we need to create register
- // operand to ensure proper matching. Just pick a GPR based on the size of
- // a pointer.
- if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
- unsigned RegNo =
- is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
- return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
- SMLoc(), Identifier, Info.OpDecl);
+ // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
+ // some other label reference.
+ if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
+ // Insert an explicit size if the user didn't have one.
+ if (!Size) {
+ Size = getPointerWidth();
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
+ /*Len=*/0, Size));
+ }
+
+ // Create an absolute memory reference in order to match against
+ // instructions taking a PC relative operand.
+ return X86Operand::CreateMem(Disp, Start, End, Size, Identifier,
+ Info.OpDecl);
}
// We either have a direct symbol reference, or an offset from a symbol. The
@@ -1076,6 +1117,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
}
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
bool Done = false;
@@ -1197,6 +1239,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
std::unique_ptr<X86Operand>
X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
int64_t ImmDisp, unsigned Size) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
if (getLexer().isNot(AsmToken::LBrac))
@@ -1272,13 +1315,16 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End) {
+ MCAsmParser &Parser = getParser();
assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = nullptr;
StringRef LineBuf(Identifier.data());
- SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
+ void *Result =
+ SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
const AsmToken &Tok = Parser.getTok();
+ SMLoc Loc = Tok.getLoc();
// Advance the token stream until the end of the current token is
// after the end of what the frontend claimed.
@@ -1290,9 +1336,22 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
if (End.getPointer() == EndPtr) break;
}
+ Identifier = LineBuf;
+
+ // If the identifier lookup was unsuccessful, assume that we are dealing with
+ // a label.
+ if (!Result) {
+ StringRef InternalName =
+ SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
+ Loc, false);
+ assert(InternalName.size() && "We should have an internal name here.");
+ // Push a rewrite for replacing the identifier name with the internal name.
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
+ Identifier.size(),
+ InternalName));
+ }
// Create the symbol reference.
- Identifier = LineBuf;
MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
@@ -1303,6 +1362,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
std::unique_ptr<X86Operand>
X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
unsigned Size) {
+ MCAsmParser &Parser = getParser();
assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
const AsmToken &Tok = Parser.getTok(); // Eat colon.
if (Tok.isNot(AsmToken::Colon))
@@ -1354,6 +1414,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
SMLoc Start,
unsigned Size) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc End;
@@ -1413,6 +1474,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
/// Parse the '.' operator.
bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
const MCExpr *&NewDisp) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
int64_t OrigDispVal, DotDispVal;
@@ -1457,6 +1519,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
/// Parse the 'offset' operator. This operator is used to specify the
/// location rather then the content of a variable.
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc OffsetOfLoc = Tok.getLoc();
Parser.Lex(); // Eat offset.
@@ -1494,6 +1557,7 @@ enum IntelOperatorKind {
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc TypeLoc = Tok.getLoc();
Parser.Lex(); // Eat operator.
@@ -1527,6 +1591,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
}
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
+ MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@@ -1547,7 +1612,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
if (Size) {
Parser.Lex(); // Eat operand size (e.g., byte, word).
if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
- return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+ return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
Parser.Lex(); // Eat ptr.
}
Start = Tok.getLoc();
@@ -1609,6 +1674,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
}
std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
+ MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
default:
// Parse a memory operand with no segment register.
@@ -1629,6 +1695,9 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
+ if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
+ return ErrorOperand(Start, "invalid segment register");
+
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
@@ -1646,6 +1715,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
const MCParsedAsmOperand &Op) {
+ MCAsmParser &Parser = getParser();
if(STI.getFeatureBits() & X86::FeatureAVX512) {
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
@@ -1664,6 +1734,8 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
// Recognize only reasonable suffixes.
const char *BroadcastPrimitive =
StringSwitch<const char*>(getLexer().getTok().getIdentifier())
+ .Case("to2", "{1to2}")
+ .Case("to4", "{1to4}")
.Case("to8", "{1to8}")
.Case("to16", "{1to16}")
.Default(nullptr);
@@ -1715,6 +1787,7 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
SMLoc MemStart) {
+ MCAsmParser &Parser = getParser();
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
@@ -1872,12 +1945,15 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
return nullptr;
}
- return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
- MemStart, MemEnd);
+ if (SegReg || BaseReg || IndexReg)
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+ MemStart, MemEnd);
+ return X86Operand::CreateMem(Disp, MemStart, MemEnd);
}
bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
InstInfo = &Info;
StringRef PatchedName = Name;
@@ -2275,51 +2351,79 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
}
}
-static const char *getSubtargetFeatureName(unsigned Val);
+static const char *getSubtargetFeatureName(uint64_t Val);
void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
MCStreamer &Out) {
- Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
- Out);
- Out.EmitInstruction(Inst, STI);
+ Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
+ MII, Out);
}
bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
- assert(!Operands.empty() && "Unexpect empty operand list!");
- X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
- assert(Op.isToken() && "Leading operand should always be a mnemonic!");
- ArrayRef<SMRange> EmptyRanges = None;
+ if (isParsingIntelSyntax())
+ return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
+ MatchingInlineAsm);
+ return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
+ MatchingInlineAsm);
+}
- // First, handle aliases that expand to multiple instructions.
+void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
+ OperandVector &Operands, MCStreamer &Out,
+ bool MatchingInlineAsm) {
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
- if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
- Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
- Op.getToken() == "finit" || Op.getToken() == "fsave" ||
- Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
+ const char *Repl = StringSwitch<const char *>(Op.getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(nullptr);
+ if (Repl) {
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
-
- const char *Repl = StringSwitch<const char *>(Op.getToken())
- .Case("finit", "fninit")
- .Case("fsave", "fnsave")
- .Case("fstcw", "fnstcw")
- .Case("fstcww", "fnstcw")
- .Case("fstenv", "fnstenv")
- .Case("fstsw", "fnstsw")
- .Case("fstsww", "fnstsw")
- .Case("fclex", "fnclex")
- .Default(nullptr);
- assert(Repl && "Unknown wait-prefixed instruction");
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
+}
+
+bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(ErrorInfo && "Unknown missing feature!");
+ ArrayRef<SMRange> EmptyRanges = None;
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "instruction requires:";
+ uint64_t Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask)
+ OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
+ Mask <<= 1;
+ }
+ return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
+}
+
+bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
+ ArrayRef<SMRange> EmptyRanges = None;
+
+ // First, handle aliases that expand to multiple instructions.
+ MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
bool WasOriginallyInvalidOperand = false;
MCInst Inst;
@@ -2342,21 +2446,8 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
EmitInstruction(Inst, Operands, Out);
Opcode = Inst.getOpcode();
return false;
- case Match_MissingFeature: {
- assert(ErrorInfo && "Unknown missing feature!");
- // Special case the error message for the very common case where only
- // a single subtarget feature is missing.
- std::string Msg = "instruction requires:";
- unsigned Mask = 1;
- for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
- if (ErrorInfo & Mask) {
- Msg += " ";
- Msg += getSubtargetFeatureName(ErrorInfo & Mask);
- }
- Mask <<= 1;
- }
- return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
- }
+ case Match_MissingFeature:
+ return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -2385,34 +2476,18 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
// Check for the various suffix matches.
- Tmp[Base.size()] = Suffixes[0];
- unsigned ErrorInfoIgnore;
- unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
- unsigned Match1, Match2, Match3, Match4;
-
- Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match1 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
- Tmp[Base.size()] = Suffixes[1];
- Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match2 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
- Tmp[Base.size()] = Suffixes[2];
- Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match3 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
- Tmp[Base.size()] = Suffixes[3];
- Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
- // If this returned as a missing feature failure, remember that.
- if (Match4 == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
+ uint64_t ErrorInfoIgnore;
+ uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
+ unsigned Match[4];
+
+ for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
+ Tmp.back() = Suffixes[I];
+ Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ MatchingInlineAsm, isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match[I] == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+ }
// Restore the old token.
Op.setTokenValue(Base);
@@ -2421,8 +2496,7 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
unsigned NumSuccessfulMatches =
- (Match1 == Match_Success) + (Match2 == Match_Success) +
- (Match3 == Match_Success) + (Match4 == Match_Success);
+ std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
@@ -2438,10 +2512,9 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (NumSuccessfulMatches > 1) {
char MatchChars[4];
unsigned NumMatches = 0;
- if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
- if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
- if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
- if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
+ for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
+ if (Match[I] == Match_Success)
+ MatchChars[NumMatches++] = Suffixes[I];
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
@@ -2462,8 +2535,7 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// If all of the instructions reported an invalid mnemonic, then the original
// mnemonic was invalid.
- if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
- (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+ if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
if (!WasOriginallyInvalidOperand) {
ArrayRef<SMRange> Ranges =
MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
@@ -2472,7 +2544,7 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
// Recover location info for the operand if we know which was the problem.
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction",
EmptyRanges, MatchingInlineAsm);
@@ -2491,27 +2563,19 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// If one instruction matched with a missing feature, report this as a
// missing feature.
- if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
- (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
- std::string Msg = "instruction requires:";
- unsigned Mask = 1;
- for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
- if (ErrorInfoMissingFeature & Mask) {
- Msg += " ";
- Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
- }
- Mask <<= 1;
- }
- return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_MissingFeature) == 1) {
+ ErrorInfo = ErrorInfoMissingFeature;
+ return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+ MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
// operand failure.
- if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
- (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
- Error(IDLoc, "invalid operand for instruction", EmptyRanges,
- MatchingInlineAsm);
- return true;
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_InvalidOperand) == 1) {
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
}
// If all of these were an outright failure, report it in a useless way.
@@ -2520,22 +2584,173 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
+bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
+ StringRef Mnemonic = Op.getToken();
+ ArrayRef<SMRange> EmptyRanges = None;
+
+ // First, handle aliases that expand to multiple instructions.
+ MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
+
+ MCInst Inst;
+
+ // Find one unsized memory operand, if present.
+ X86Operand *UnsizedMemOp = nullptr;
+ for (const auto &Op : Operands) {
+ X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
+ if (X86Op->isMemUnsized())
+ UnsizedMemOp = X86Op;
+ }
+
+ // Allow some instructions to have implicitly pointer-sized operands. This is
+ // compatible with gas.
+ if (UnsizedMemOp) {
+ static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
+ for (const char *Instr : PtrSizedInstrs) {
+ if (Mnemonic == Instr) {
+ UnsizedMemOp->Mem.Size = getPointerSize();
+ break;
+ }
+ }
+ }
+
+ // If an unsized memory operand is present, try to match with each memory
+ // operand size. In Intel assembly, the size is not part of the instruction
+ // mnemonic.
+ SmallVector<unsigned, 8> Match;
+ uint64_t ErrorInfoMissingFeature = 0;
+ if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
+ static const unsigned MopSizes[] = {8, 16, 32, 64, 80};
+ for (unsigned Size : MopSizes) {
+ UnsizedMemOp->Mem.Size = Size;
+ uint64_t ErrorInfoIgnore;
+ unsigned LastOpcode = Inst.getOpcode();
+ unsigned M =
+ MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+ MatchingInlineAsm, isParsingIntelSyntax());
+ if (Match.empty() || LastOpcode != Inst.getOpcode())
+ Match.push_back(M);
+
+ // If this returned as a missing feature failure, remember that.
+ if (Match.back() == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
+ }
+
+ // Restore the size of the unsized memory operand if we modified it.
+ if (UnsizedMemOp)
+ UnsizedMemOp->Mem.Size = 0;
+ }
+
+ // If we haven't matched anything yet, this is not a basic integer or FPU
+ // operation. There shouldn't be any ambiguity in our mneumonic table, so try
+ // matching with the unsized operand.
+ if (Match.empty()) {
+ Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm,
+ isParsingIntelSyntax()));
+ // If this returned as a missing feature failure, remember that.
+ if (Match.back() == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfo;
+ }
+
+ // Restore the size of the unsized memory operand if we modified it.
+ if (UnsizedMemOp)
+ UnsizedMemOp->Mem.Size = 0;
+
+ // If it's a bad mnemonic, all results will be the same.
+ if (Match.back() == Match_MnemonicFail) {
+ ArrayRef<SMRange> Ranges =
+ MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
+ return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
+ Ranges, MatchingInlineAsm);
+ }
+
+ // If exactly one matched, then we treat that as a successful match (and the
+ // instruction will already have been filled in correctly, since the failing
+ // matches won't have modified it).
+ unsigned NumSuccessfulMatches =
+ std::count(std::begin(Match), std::end(Match), Match_Success);
+ if (NumSuccessfulMatches == 1) {
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the individual
+ // transformations can chain off each other.
+ if (!MatchingInlineAsm)
+ while (processInstruction(Inst, Operands))
+ ;
+ Inst.setLoc(IDLoc);
+ if (!MatchingInlineAsm)
+ EmitInstruction(Inst, Operands, Out);
+ Opcode = Inst.getOpcode();
+ return false;
+ } else if (NumSuccessfulMatches > 1) {
+ assert(UnsizedMemOp &&
+ "multiple matches only possible with unsized memory operands");
+ ArrayRef<SMRange> Ranges =
+ MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
+ return Error(UnsizedMemOp->getStartLoc(),
+ "ambiguous operand size for instruction '" + Mnemonic + "\'",
+ Ranges, MatchingInlineAsm);
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_MissingFeature) == 1) {
+ ErrorInfo = ErrorInfoMissingFeature;
+ return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+ MatchingInlineAsm);
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_InvalidOperand) == 1) {
+ return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+ MatchingInlineAsm);
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
+ MatchingInlineAsm);
+}
+
+bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
+ return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
+}
bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
+ MCAsmParser &Parser = getParser();
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return ParseDirectiveWord(2, DirectiveID.getLoc());
else if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (Parser.getTok().getString() == "prefix")
+ Parser.Lex();
+ else if (Parser.getTok().getString() == "noprefix")
+ return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
+ "supported: registers must have a "
+ "'%' prefix in .att_syntax");
+ }
getParser().setAssemblerDialect(0);
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- // FIXME: Handle noprefix
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();
+ else if (Parser.getTok().getString() == "prefix")
+ return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
+ "supported: registers must not have "
+ "a '%' prefix in .intel_syntax");
}
return false;
}
@@ -2545,6 +2760,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
@@ -2572,6 +2788,7 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
/// ParseDirectiveCode
/// ::= .code16 | .code32 | .code64
bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+ MCAsmParser &Parser = getParser();
if (IDVal == ".code16") {
Parser.Lex();
if (!is16BitMode()) {
diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index ef1565f..72aeeaa 100644
--- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_ASM_PARSER_COMMON_H
-#define X86_ASM_PARSER_COMMON_H
+#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMPARSERCOMMON_H
+#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMPARSERCOMMON_H
namespace llvm {
@@ -24,10 +24,6 @@ inline bool isImmSExti32i8Value(uint64_t Value) {
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
-inline bool isImmZExtu32u8Value(uint64_t Value) {
- return (Value <= 0x00000000000000FFULL);
-}
-
inline bool isImmSExti64i8Value(uint64_t Value) {
return (( Value <= 0x000000000000007FULL)||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
@@ -40,4 +36,4 @@ inline bool isImmSExti64i32Value(uint64_t Value) {
} // End of namespace llvm
-#endif // X86_ASM_PARSER_COMMON_H
+#endif
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 1bbfc11..e0fab8d 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_OPERAND_H
-#define X86_OPERAND_H
+#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
+#define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
#include "X86AsmParserCommon.h"
#include "llvm/MC/MCExpr.h"
@@ -153,20 +153,6 @@ struct X86Operand : public MCParsedAsmOperand {
// extension.
return isImmSExti32i8Value(CE->getValue());
}
- bool isImmZExtu32u8() const {
- if (!isImm())
- return false;
-
- // If this isn't a constant expr, just assume it fits and let relaxation
- // handle it.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE)
- return true;
-
- // Otherwise, check the value is in a range that makes sense for this
- // extension.
- return isImmZExtu32u8Value(CE->getValue());
- }
bool isImmSExti64i8() const {
if (!isImm())
return false;
@@ -205,6 +191,9 @@ struct X86Operand : public MCParsedAsmOperand {
}
bool isMem() const override { return Kind == Memory; }
+ bool isMemUnsized() const {
+ return Kind == Memory && Mem.Size == 0;
+ }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
}
@@ -485,4 +474,4 @@ struct X86Operand : public MCParsedAsmOperand {
} // End of namespace llvm
-#endif // X86_OPERAND
+#endif
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index a09767e..1083fad 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -14,15 +14,12 @@ add_public_tablegen_target(X86CommonTableGen)
set(sources
X86AsmPrinter.cpp
- X86AtomicExpandPass.cpp
- X86CodeEmitter.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
X86FrameLowering.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86InstrInfo.cpp
- X86JITInfo.cpp
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
X86PadShortFunction.cpp
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt
index cac7adf..e003fc9 100644
--- a/lib/Target/X86/Disassembler/LLVMBuild.txt
+++ b/lib/Target/X86/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86Disassembler
parent = X86
-required_libraries = MC Support X86Info
+required_libraries = MCDisassembler Support X86Info
add_to_library_groups = X86
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index c366725..5e8c2d6 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -23,7 +23,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -97,16 +96,26 @@ X86GenericDisassembler::X86GenericDisassembler(
}
}
-/// regionReader - a callback function that wraps the readByte method from
-/// MemoryObject.
+struct Region {
+ ArrayRef<uint8_t> Bytes;
+ uint64_t Base;
+ Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
+};
+
+/// A callback function that wraps the readByte method from Region.
///
-/// @param arg - The generic callback parameter. In this case, this should
-/// be a pointer to a MemoryObject.
-/// @param byte - A pointer to the byte to be read.
-/// @param address - The address to be read.
-static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
- const MemoryObject* region = static_cast<const MemoryObject*>(arg);
- return region->readByte(address, byte);
+/// @param Arg - The generic callback parameter. In this case, this should
+/// be a pointer to a Region.
+/// @param Byte - A pointer to the byte to be read.
+/// @param Address - The address to be read.
+static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) {
+ auto *R = static_cast<const Region *>(Arg);
+ ArrayRef<uint8_t> Bytes = R->Bytes;
+ unsigned Index = Address - R->Base;
+ if (Bytes.size() <= Index)
+ return -1;
+ *Byte = Bytes[Index];
+ return 0;
}
/// logger - a callback function that wraps the operator<< method from
@@ -127,38 +136,29 @@ static void logger(void* arg, const char* log) {
// Public interface for the disassembler
//
-MCDisassembler::DecodeStatus
-X86GenericDisassembler::getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
- CommentStream = &cStream;
+MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
+ MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream, raw_ostream &CStream) const {
+ CommentStream = &CStream;
- InternalInstruction internalInstr;
+ InternalInstruction InternalInstr;
- dlog_t loggerFn = logger;
- if (&vStream == &nulls())
- loggerFn = nullptr; // Disable logging completely if it's going to nulls().
-
- int ret = decodeInstruction(&internalInstr,
- regionReader,
- (const void*)&region,
- loggerFn,
- (void*)&vStream,
- (const void*)MII.get(),
- address,
- fMode);
-
- if (ret) {
- size = internalInstr.readerCursor - address;
+ dlog_t LoggerFn = logger;
+ if (&VStream == &nulls())
+ LoggerFn = nullptr; // Disable logging completely if it's going to nulls().
+
+ Region R(Bytes, Address);
+
+ int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R,
+ LoggerFn, (void *)&VStream,
+ (const void *)MII.get(), Address, fMode);
+
+ if (Ret) {
+ Size = InternalInstr.readerCursor - Address;
return Fail;
- }
- else {
- size = internalInstr.length;
- return (!translateInstruction(instr, internalInstr, this)) ?
- Success : Fail;
+ } else {
+ Size = InternalInstr.length;
+ return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail;
}
}
@@ -717,7 +717,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
return false;
case ENCODING_WRITEMASK:
return translateMaskRegister(mcInst, insn.writemask);
- case ENCODING_RM:
+ CASE_ENCODING_RM:
return translateRM(mcInst, operand, insn, Dis);
case ENCODING_CB:
case ENCODING_CW:
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 4dc7c29..d7f426b 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -71,8 +71,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86DISASSEMBLER_H
-#define X86DISASSEMBLER_H
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H
#include "X86DisassemblerDecoderCommon.h"
#include "llvm/MC/MCDisassembler.h"
@@ -87,21 +87,17 @@ class raw_ostream;
namespace X86Disassembler {
-/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
-/// All each platform class should have to do is subclass the constructor, and
-/// provide a different disassemblerMode value.
+/// Generic disassembler for all X86 platforms. All each platform class should
+/// have to do is subclass the constructor, and provide a different
+/// disassemblerMode value.
class X86GenericDisassembler : public MCDisassembler {
std::unique_ptr<const MCInstrInfo> MII;
public:
- /// Constructor - Initializes the disassembler.
- ///
X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
std::unique_ptr<const MCInstrInfo> MII);
public:
-
- /// getInstruction - See MCDisassembler.
DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region, uint64_t address,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &vStream,
raw_ostream &cStream) const override;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 55587d4..98b3440 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1,4 +1,4 @@
-//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===//
+//===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,10 +13,10 @@
//
//===----------------------------------------------------------------------===//
-#include <stdarg.h> /* for va_*() */
-#include <stdio.h> /* for vsnprintf() */
-#include <stdlib.h> /* for exit() */
-#include <string.h> /* for memset() */
+#include <cstdarg> /* for va_*() */
+#include <cstdio> /* for vsnprintf() */
+#include <cstdlib> /* for exit() */
+#include <cstring> /* for memset() */
#include "X86DisassemblerDecoder.h"
@@ -472,8 +472,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
insn->vectorExtensionType = TYPE_EVEX;
- }
- else {
+ } else {
unconsumeByte(insn); /* unconsume byte1 */
unconsumeByte(insn); /* unconsume byte */
insn->necessaryPrefixLocation = insn->readerCursor - 2;
@@ -504,8 +503,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
}
- }
- else if (byte == 0xc4) {
+ } else if (byte == 0xc4) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
@@ -516,8 +514,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vectorExtensionType = TYPE_VEX_3B;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
- }
- else {
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -541,8 +538,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2]);
}
- }
- else if (byte == 0xc5) {
+ } else if (byte == 0xc5) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
@@ -552,8 +548,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vectorExtensionType = TYPE_VEX_2B;
- }
- else {
+ } else {
unconsumeByte(insn);
}
@@ -566,8 +561,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
| (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
}
- switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1]))
- {
+ switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
default:
break;
case VEX_PREFIX_66:
@@ -579,8 +573,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0],
insn->vectorExtensionPrefix[1]);
}
- }
- else if (byte == 0x8f) {
+ } else if (byte == 0x8f) {
uint8_t byte1;
if (lookAtByte(insn, &byte1)) {
@@ -591,8 +584,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
insn->vectorExtensionType = TYPE_XOP;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
- }
- else {
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -612,8 +604,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
| (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
}
- switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2]))
- {
+ switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
default:
break;
case VEX_PREFIX_66:
@@ -625,8 +616,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2]);
}
- }
- else {
+ } else {
if (insn->mode == MODE_64BIT) {
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
@@ -698,8 +688,7 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = ONEBYTE;
- if (insn->vectorExtensionType == TYPE_EVEX)
- {
+ if (insn->vectorExtensionType == TYPE_EVEX) {
switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
@@ -715,8 +704,7 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
- }
- else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
@@ -732,12 +720,10 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
- }
- else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
- }
- else if (insn->vectorExtensionType == TYPE_XOP) {
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
@@ -866,6 +852,22 @@ static bool is16BitEquivalent(const char* orig, const char* equiv) {
}
/*
+ * is64Bit - Determines whether this instruction is a 64-bit instruction.
+ *
+ * @param name - The instruction that is not 16-bit
+ */
+static bool is64Bit(const char* name) {
+ off_t i;
+
+ for (i = 0;; ++i) {
+ if (name[i] == '\0')
+ return false;
+ if (name[i] == '6' && name[i+1] == '4')
+ return true;
+ }
+}
+
+/*
* getID - Determines the ID of an instruction, consuming the ModR/M byte as
* appropriate for extended and escape opcodes. Determines the attributes and
* context for the instruction before doing so.
@@ -911,8 +913,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
attrMask |= ATTR_EVEXL;
if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_EVEXL2;
- }
- else if (insn->vectorExtensionType == TYPE_VEX_3B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
@@ -927,8 +928,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
attrMask |= ATTR_VEXL;
- }
- else if (insn->vectorExtensionType == TYPE_VEX_2B) {
+ } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
@@ -943,8 +943,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
attrMask |= ATTR_VEXL;
- }
- else if (insn->vectorExtensionType == TYPE_XOP) {
+ } else if (insn->vectorExtensionType == TYPE_XOP) {
switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
@@ -959,12 +958,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
attrMask |= ATTR_VEXL;
- }
- else {
+ } else {
return -1;
}
- }
- else {
+ } else {
if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
attrMask |= ATTR_OPSIZE;
else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
@@ -1002,6 +999,37 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
/* The following clauses compensate for limitations of the tables. */
+ if (insn->mode != MODE_64BIT &&
+ insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
+ /*
+ * The tables can't distinquish between cases where the W-bit is used to
+ * select register size and cases where its a required part of the opcode.
+ */
+ if ((insn->vectorExtensionType == TYPE_EVEX &&
+ wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_VEX_3B &&
+ wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
+ (insn->vectorExtensionType == TYPE_XOP &&
+ wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
+
+ uint16_t instructionIDWithREXW;
+ if (getIDWithAttrMask(&instructionIDWithREXW,
+ insn, attrMask | ATTR_REXW)) {
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(instructionID);
+ return 0;
+ }
+
+ const char *SpecName = GetInstrName(instructionIDWithREXW, miiArg);
+ // If not a 64-bit instruction. Switch the opcode.
+ if (!is64Bit(SpecName)) {
+ insn->instructionID = instructionIDWithREXW;
+ insn->spec = specifierForUID(instructionIDWithREXW);
+ return 0;
+ }
+ }
+ }
+
if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
!(attrMask & ATTR_OPSIZE)) {
/*
@@ -1488,7 +1516,7 @@ static int fixupReg(struct InternalInstruction *insn,
if (!valid)
return -1;
break;
- case ENCODING_RM:
+ CASE_ENCODING_RM:
if (insn->eaBase >= insn->eaRegBase) {
insn->eaBase = (EABase)fixupRMValue(insn,
(OperandType)op->type,
@@ -1681,11 +1709,14 @@ static int readOperands(struct InternalInstruction* insn) {
case ENCODING_DI:
break;
case ENCODING_REG:
- case ENCODING_RM:
+ CASE_ENCODING_RM:
if (readModRM(insn))
return -1;
if (fixupReg(insn, &Op))
return -1;
+ // Apply the AVX512 compressed displacement scaling factor.
+ if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
+ insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
break;
case ENCODING_CB:
case ENCODING_CW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 8c45402..457b382 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86DISASSEMBLERDECODER_H
-#define X86DISASSEMBLERDECODER_H
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
#include "X86DisassemblerDecoderCommon.h"
#include "llvm/ADT/ArrayRef.h"
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index f59e0b6..bec4f0e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86DISASSEMBLERDECODERCOMMON_H
-#define X86DISASSEMBLERDECODERCOMMON_H
+#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H
+#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H
#include "llvm/Support/DataTypes.h"
@@ -265,7 +265,7 @@ enum attributeBits {
ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
- ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
#define ENUM_ENTRY(n, r, d) n,
enum InstructionContext {
@@ -325,11 +325,26 @@ enum ModRMDecisionType {
};
#undef ENUM_ENTRY
+#define CASE_ENCODING_RM \
+ case ENCODING_RM: \
+ case ENCODING_RM_CD2: \
+ case ENCODING_RM_CD4: \
+ case ENCODING_RM_CD8: \
+ case ENCODING_RM_CD16: \
+ case ENCODING_RM_CD32: \
+ case ENCODING_RM_CD64
+
// Physical encodings of instruction operands.
#define ENCODINGS \
ENUM_ENTRY(ENCODING_NONE, "") \
ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_RM_CD2, "R/M operand with CDisp scaling of 2") \
+ ENUM_ENTRY(ENCODING_RM_CD4, "R/M operand with CDisp scaling of 4") \
+ ENUM_ENTRY(ENCODING_RM_CD8, "R/M operand with CDisp scaling of 8") \
+ ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16") \
+ ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32") \
+ ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64") \
ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
ENUM_ENTRY(ENCODING_WRITEMASK, "Register operand in EVEX.aaa byte.") \
ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
@@ -438,8 +453,12 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_XMM256, "32-byte") \
ENUM_ENTRY(TYPE_XMM512, "64-byte") \
ENUM_ENTRY(TYPE_VK1, "1-bit") \
+ ENUM_ENTRY(TYPE_VK2, "2-bit") \
+ ENUM_ENTRY(TYPE_VK4, "4-bit") \
ENUM_ENTRY(TYPE_VK8, "8-bit") \
ENUM_ENTRY(TYPE_VK16, "16-bit") \
+ ENUM_ENTRY(TYPE_VK32, "32-bit") \
+ ENUM_ENTRY(TYPE_VK64, "64-bit") \
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
@@ -481,7 +500,7 @@ enum ModifierType {
};
#undef ENUM_ENTRY
-static const unsigned X86_MAX_OPERANDS = 5;
+static const unsigned X86_MAX_OPERANDS = 6;
/// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
/// are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index b45b118..b72730c 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -45,19 +45,31 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
uint64_t TSFlags = Desc.TSFlags;
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ HasCustomInstComment =
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+
if (TSFlags & X86II::LOCK)
OS << "\tlock\n";
+ // Output CALLpcrel32 as "callq" in 64-bit mode.
+ // In Intel annotation it's always emitted as "call".
+ //
+ // TODO: Probably this hack should be redesigned via InstAlias in
+ // InstrInfo.td as soon as Requires clause is supported properly
+ // for InstAlias.
+ if (MI->getOpcode() == X86::CALLpcrel32 &&
+ (getAvailableFeatures() & X86::Mode64Bit) != 0) {
+ OS << "\tcallq\t";
+ printPCRelImm(MI, 0, OS);
+ }
// Try to print any aliases first.
- if (!printAliasInstr(MI, OS))
+ else if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
// Next always print the annotation.
printAnnotation(OS, Annot);
-
- // If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
- EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
@@ -170,7 +182,11 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
<< '$' << formatImm((int64_t)Op.getImm())
<< markup(">");
- if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+ // If there are no instruction-specific comments, add a comment clarifying
+ // the hex value of the immediate operand when it isn't in the range
+ // [-256,255].
+ if (CommentStream && !HasCustomInstComment &&
+ (Op.getImm() > 255 || Op.getImm() < -256))
*CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm());
} else {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 531183b..41be14b 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_ATT_INST_PRINTER_H
-#define X86_ATT_INST_PRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
@@ -23,8 +24,11 @@ class MCOperand;
class X86ATTInstPrinter final : public MCInstPrinter {
public:
X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI)
+ : MCInstPrinter(MAI, MII, MRI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+ }
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
@@ -129,6 +133,9 @@ public:
void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemOffset(MI, OpNo, O);
}
+
+private:
+ bool HasCustomInstComment;
};
}
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index baf6507..a8f15e6 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -28,13 +28,116 @@ using namespace llvm;
/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
/// newline terminated strings to the specified string if desired. This
/// information is shown in disassembly dumps when verbose assembly is enabled.
-void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned)) {
// If this is a shuffle operation, the switch should fill in this state.
SmallVector<int, 8> ShuffleMask;
const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;
switch (MI->getOpcode()) {
+ default:
+ // Not an instruction for which we can decode comments.
+ return false;
+
+ case X86::BLENDPDrri:
+ case X86::VBLENDPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::BLENDPDrmi:
+ case X86::VBLENDPDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v2f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VBLENDPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VBLENDPDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::BLENDPSrri:
+ case X86::VBLENDPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::BLENDPSrmi:
+ case X86::VBLENDPSrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VBLENDPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VBLENDPSYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::PBLENDWrri:
+ case X86::VPBLENDWrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PBLENDWrmi:
+ case X86::VPBLENDWrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPBLENDWYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDWYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::VPBLENDDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::VPBLENDDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
case X86::INSERTPSrr:
case X86::VINSERTPSrr:
DestName = getRegName(MI->getOperand(0).getReg());
@@ -60,6 +163,80 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeMOVHLPSMask(2, ShuffleMask);
break;
+ case X86::MOVSLDUPrr:
+ case X86::VMOVSLDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSLDUPrm:
+ case X86::VMOVSLDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSHDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSHDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSLDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSLDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::MOVSHDUPrr:
+ case X86::VMOVSHDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSHDUPrm:
+ case X86::VMOVSHDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
+ case X86::PSLLDQri:
+ case X86::VPSLLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSLLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSRLDQri:
+ case X86::VPSRLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSRLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:
Src1Name = getRegName(MI->getOperand(2).getReg());
@@ -489,54 +666,59 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
}
+ // The only comments we decode are shuffles, so give up if we were unable to
+ // decode a shuffle mask.
+ if (ShuffleMask.empty())
+ return false;
- // If this was a shuffle operation, print the shuffle mask.
- if (!ShuffleMask.empty()) {
- if (!DestName) DestName = Src1Name;
- OS << (DestName ? DestName : "mem") << " = ";
+ if (!DestName) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
- // If the two sources are the same, canonicalize the input elements to be
- // from the first src so that we get larger element spans.
- if (Src1Name == Src2Name) {
- for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
- if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
- ShuffleMask[i] >= (int)e) // From second mask.
- ShuffleMask[i] -= e;
- }
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= (int)e) // From second mask.
+ ShuffleMask[i] -= e;
}
+ }
- // The shuffle mask specifies which elements of the src1/src2 fill in the
- // destination, with a few sentinel values. Loop through and print them
- // out.
- for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
- if (i != 0)
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
+ (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
OS << ',';
- if (ShuffleMask[i] == SM_SentinelZero) {
- OS << "zero";
- continue;
- }
-
- // Otherwise, it must come from src1 or src2. Print the span of elements
- // that comes from this src.
- bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
- const char *SrcName = isSrc1 ? Src1Name : Src2Name;
- OS << (SrcName ? SrcName : "mem") << '[';
- bool IsFirst = true;
- while (i != e &&
- (int)ShuffleMask[i] >= 0 &&
- (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
- if (!IsFirst)
- OS << ',';
- else
- IsFirst = false;
+ else
+ IsFirst = false;
+ if (ShuffleMask[i] == SM_SentinelUndef)
+ OS << "u";
+ else
OS << ShuffleMask[i] % ShuffleMask.size();
- ++i;
- }
- OS << ']';
- --i; // For loop increments element #.
+ ++i;
}
- //MI->print(OS, 0);
- OS << "\n";
+ OS << ']';
+ --i; // For loop increments element #.
}
+ //MI->print(OS, 0);
+ OS << "\n";
+ // We successfully added a comment to this instruction.
+ return true;
}
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h
index 13fdf9a..687581b 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.h
+++ b/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -12,13 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_INST_COMMENTS_H
-#define X86_INST_COMMENTS_H
+#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
+#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
namespace llvm {
class MCInst;
class raw_ostream;
- void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ bool EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned));
}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 4d9b481..d082f0b 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_INTEL_INST_PRINTER_H
-#define X86_INTEL_INST_PRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
index 146d111..b9fdc9c 100644
--- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86Desc
parent = X86
-required_libraries = MC Object Support X86AsmPrinter X86Info
+required_libraries = MC MCDisassembler Object Support X86AsmPrinter X86Info
add_to_library_groups = X86
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 23bca0d..befa6c2 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -11,7 +11,6 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
@@ -437,10 +436,30 @@ class DarwinX86AsmBackend : public X86AsmBackend {
bool Is64Bit;
unsigned OffsetSize; ///< Offset of a "push" instruction.
- unsigned PushInstrSize; ///< Size of a "push" instruction.
unsigned MoveInstrSize; ///< Size of a "move" instruction.
- unsigned StackDivide; ///< Amount to adjust stack stize by.
+ unsigned StackDivide; ///< Amount to adjust stack size by.
protected:
+ /// \brief Size of a "push" instruction for the given register.
+ unsigned PushInstrSize(unsigned Reg) const {
+ switch (Reg) {
+ case X86::EBX:
+ case X86::ECX:
+ case X86::EDX:
+ case X86::EDI:
+ case X86::ESI:
+ case X86::EBP:
+ case X86::RBX:
+ case X86::RBP:
+ return 1;
+ case X86::R12:
+ case X86::R13:
+ case X86::R14:
+ case X86::R15:
+ return 2;
+ }
+ return 1;
+ }
+
/// \brief Implementation of algorithm to generate the compact unwind encoding
/// for the CFI instructions.
uint32_t
@@ -530,7 +549,7 @@ protected:
unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
SavedRegs[SavedRegIdx++] = Reg;
StackAdjust += OffsetSize;
- InstrOffset += PushInstrSize;
+ InstrOffset += PushInstrSize(Reg);
break;
}
}
@@ -724,7 +743,6 @@ public:
OffsetSize = Is64Bit ? 8 : 4;
MoveInstrSize = Is64Bit ? 3 : 2;
StackDivide = Is64Bit ? 8 : 4;
- PushInstrSize = 1;
}
};
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 6aeb1f2..365cf0c 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86BASEINFO_H
-#define X86BASEINFO_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86BASEINFO_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86BASEINFO_H
#include "X86MCTargetDesc.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -216,7 +216,7 @@ namespace X86II {
MO_SECREL
};
- enum {
+ enum : uint64_t {
//===------------------------------------------------------------------===//
// Instruction encodings. These are the standard/most common forms for X86
// instructions.
@@ -303,17 +303,18 @@ namespace X86II {
//// MRM_XX - A mod/rm byte of exactly 0xXX.
MRM_C0 = 32, MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35,
MRM_C4 = 36, MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39,
- MRM_CB = 40, MRM_D0 = 41, MRM_D1 = 42, MRM_D4 = 43,
- MRM_D5 = 44, MRM_D6 = 45, MRM_D8 = 46, MRM_D9 = 47,
- MRM_DA = 48, MRM_DB = 49, MRM_DC = 50, MRM_DD = 51,
- MRM_DE = 52, MRM_DF = 53, MRM_E0 = 54, MRM_E1 = 55,
- MRM_E2 = 56, MRM_E3 = 57, MRM_E4 = 58, MRM_E5 = 59,
- MRM_E8 = 60, MRM_E9 = 61, MRM_EA = 62, MRM_EB = 63,
- MRM_EC = 64, MRM_ED = 65, MRM_EE = 66, MRM_F0 = 67,
- MRM_F1 = 68, MRM_F2 = 69, MRM_F3 = 70, MRM_F4 = 71,
- MRM_F5 = 72, MRM_F6 = 73, MRM_F7 = 74, MRM_F8 = 75,
- MRM_F9 = 76, MRM_FA = 77, MRM_FB = 78, MRM_FC = 79,
- MRM_FD = 80, MRM_FE = 81, MRM_FF = 82,
+ MRM_CB = 40, MRM_CF = 41, MRM_D0 = 42, MRM_D1 = 43,
+ MRM_D4 = 44, MRM_D5 = 45, MRM_D6 = 46, MRM_D7 = 47,
+ MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50, MRM_DB = 51,
+ MRM_DC = 52, MRM_DD = 53, MRM_DE = 54, MRM_DF = 55,
+ MRM_E0 = 56, MRM_E1 = 57, MRM_E2 = 58, MRM_E3 = 59,
+ MRM_E4 = 60, MRM_E5 = 61, MRM_E8 = 62, MRM_E9 = 63,
+ MRM_EA = 64, MRM_EB = 65, MRM_EC = 66, MRM_ED = 67,
+ MRM_EE = 68, MRM_F0 = 69, MRM_F1 = 70, MRM_F2 = 71,
+ MRM_F3 = 72, MRM_F4 = 73, MRM_F5 = 74, MRM_F6 = 75,
+ MRM_F7 = 76, MRM_F8 = 77, MRM_F9 = 78, MRM_FA = 79,
+ MRM_FB = 80, MRM_FC = 81, MRM_FD = 82, MRM_FE = 83,
+ MRM_FF = 84,
FormMask = 127,
@@ -327,8 +328,8 @@ namespace X86II {
OpSizeShift = 7,
OpSizeMask = 0x3 << OpSizeShift,
- OpSize16 = 1,
- OpSize32 = 2,
+ OpSize16 = 1 << OpSizeShift,
+ OpSize32 = 2 << OpSizeShift,
// AsSize - Set if this instruction requires an operand size prefix (0x67),
// which most often indicates that the instruction address 16 bit address
@@ -454,51 +455,53 @@ namespace X86II {
EncodingMask = 0x3 << EncodingShift,
// VEX - encoding using 0xC4/0xC5
- VEX = 1,
+ VEX = 1 << EncodingShift,
/// XOP - Opcode prefix used by XOP instructions.
- XOP = 2,
+ XOP = 2 << EncodingShift,
// VEX_EVEX - Specifies that this instruction use EVEX form which provides
// syntax support up to 32 512-bit register operands and up to 7 16-bit
// mask operands as well as source operand data swizzling/memory operand
// conversion, eviction hint, and rounding mode.
- EVEX = 3,
+ EVEX = 3 << EncodingShift,
// Opcode
OpcodeShift = EncodingShift + 2,
- //===------------------------------------------------------------------===//
- /// VEX - The opcode prefix used by AVX instructions
- VEXShift = OpcodeShift + 8,
-
/// VEX_W - Has a opcode specific functionality, but is used in the same
/// way as REX_W is for regular SSE instructions.
- VEX_W = 1U << 0,
+ VEX_WShift = OpcodeShift + 8,
+ VEX_W = 1ULL << VEX_WShift,
/// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
/// address instructions in SSE are represented as 3 address ones in AVX
/// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4V = 1U << 1,
+ VEX_4VShift = VEX_WShift + 1,
+ VEX_4V = 1ULL << VEX_4VShift,
/// VEX_4VOp3 - Similar to VEX_4V, but used on instructions that encode
/// operand 3 with VEX.vvvv.
- VEX_4VOp3 = 1U << 2,
+ VEX_4VOp3Shift = VEX_4VShift + 1,
+ VEX_4VOp3 = 1ULL << VEX_4VOp3Shift,
/// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
/// must be encoded in the i8 immediate field. This usually happens in
/// instructions with 4 operands.
- VEX_I8IMM = 1U << 3,
+ VEX_I8IMMShift = VEX_4VOp3Shift + 1,
+ VEX_I8IMM = 1ULL << VEX_I8IMMShift,
/// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
/// instruction uses 256-bit wide registers. This is usually auto detected
/// if a VR256 register is used, but some AVX instructions also have this
/// field marked when using a f256 memory references.
- VEX_L = 1U << 4,
+ VEX_LShift = VEX_I8IMMShift + 1,
+ VEX_L = 1ULL << VEX_LShift,
// VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
// prefix. Usually used for scalar instructions. Needed by disassembler.
- VEX_LIG = 1U << 5,
+ VEX_LIGShift = VEX_LShift + 1,
+ VEX_LIG = 1ULL << VEX_LIGShift,
// TODO: we should combine VEX_L and VEX_LIG together to form a 2-bit field
// with following encoding:
@@ -509,24 +512,24 @@ namespace X86II {
// this will save 1 tsflag bit
// EVEX_K - Set if this instruction requires masking
- EVEX_K = 1U << 6,
+ EVEX_KShift = VEX_LIGShift + 1,
+ EVEX_K = 1ULL << EVEX_KShift,
// EVEX_Z - Set if this instruction has EVEX.Z field set.
- EVEX_Z = 1U << 7,
+ EVEX_ZShift = EVEX_KShift + 1,
+ EVEX_Z = 1ULL << EVEX_ZShift,
// EVEX_L2 - Set if this instruction has EVEX.L' field set.
- EVEX_L2 = 1U << 8,
+ EVEX_L2Shift = EVEX_ZShift + 1,
+ EVEX_L2 = 1ULL << EVEX_L2Shift,
// EVEX_B - Set if this instruction has EVEX.B field set.
- EVEX_B = 1U << 9,
+ EVEX_BShift = EVEX_L2Shift + 1,
+ EVEX_B = 1ULL << EVEX_BShift,
- // EVEX_CD8E - compressed disp8 form, element-size
- EVEX_CD8EShift = VEXShift + 10,
- EVEX_CD8EMask = 3,
-
- // EVEX_CD8V - compressed disp8 form, vector-width
- EVEX_CD8VShift = EVEX_CD8EShift + 2,
- EVEX_CD8VMask = 7,
+ // The scaling factor for the AVX512's 8-bit compressed displacement.
+ CD8_Scale_Shift = EVEX_BShift + 1,
+ CD8_Scale_Mask = 127ULL << CD8_Scale_Shift,
/// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
/// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
@@ -534,14 +537,17 @@ namespace X86II {
/// storing a classifier in the imm8 field. To simplify our implementation,
/// we handle this by storeing the classifier in the opcode field and using
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- Has3DNow0F0FOpcode = 1U << 15,
+ Has3DNow0F0FOpcodeShift = CD8_Scale_Shift + 7,
+ Has3DNow0F0FOpcode = 1ULL << Has3DNow0F0FOpcodeShift,
/// MemOp4 - Used to indicate swapping of operand 3 and 4 to be encoded in
/// ModRM or I8IMM. This is used for FMA4 and XOP instructions.
- MemOp4 = 1U << 16,
+ MemOp4Shift = Has3DNow0F0FOpcodeShift + 1,
+ MemOp4 = 1ULL << MemOp4Shift,
/// Explicitly specified rounding control
- EVEX_RC = 1U << 17
+ EVEX_RCShift = MemOp4Shift + 1,
+ EVEX_RC = 1ULL << EVEX_RCShift
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -643,10 +649,10 @@ namespace X86II {
/// counted as one operand.
///
inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
- bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
-
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasMemOp4 = TSFlags & X86II::MemOp4;
+ bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+
switch (TSFlags & X86II::FormMask) {
default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
case X86II::Pseudo:
@@ -687,7 +693,7 @@ namespace X86II {
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m: {
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
unsigned FirstMemOp = 0;
if (HasVEX_4V)
++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
@@ -698,20 +704,21 @@ namespace X86II {
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E0: case X86II::MRM_E1:
- case X86II::MRM_E2: case X86II::MRM_E3: case X86II::MRM_E4:
- case X86II::MRM_E5: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_F0:
- case X86II::MRM_F1: case X86II::MRM_F2: case X86II::MRM_F3:
- case X86II::MRM_F4: case X86II::MRM_F5: case X86II::MRM_F6:
- case X86II::MRM_F7: case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_FA: case X86II::MRM_FB: case X86II::MRM_FC:
- case X86II::MRM_FD: case X86II::MRM_FE: case X86II::MRM_FF:
+ case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+ case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
+ case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+ case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+ case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
+ case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
+ case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
+ case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
+ case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
+ case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
+ case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
+ case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
+ case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
+ case X86II::MRM_FE: case X86II::MRM_FF:
return -1;
}
}
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 3fdec87..be6a8e4 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -77,7 +77,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
break;
case MCSymbolRefExpr::VK_GOTTPOFF:
Type = ELF::R_X86_64_GOTTPOFF;
- break;
+ break;
case MCSymbolRefExpr::VK_TLSGD:
Type = ELF::R_X86_64_TLSGD;
break;
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 09396b7..4899900 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_X86_X86FIXUPKINDS_H
-#define LLVM_X86_X86FIXUPKINDS_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86FIXUPKINDS_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86FIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 83b2777..5679d63 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -72,11 +72,10 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
HasWeakDefCanBeHiddenDirective = false;
- // FIXME: this should not depend on the target OS version, but on the ld64
- // version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified
- // FDE relocs may be used. We also use them for the ios simulator.
- DwarfFDESymbolsUseAbsDiff = (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
- || T.isiOS();
+ // Assume ld64 is new enough that the abs-ified FDE relocs may be used
+ // (actually, must, since otherwise the non-extern relocations we produce
+ // overwhelm ld64's tiny little mind and it fails).
+ DwarfFDESymbolsUseAbsDiff = true;
UseIntegratedAssembler = true;
}
@@ -103,9 +102,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
TextAlignFillValue = 0x90;
- // Set up DWARF directives
- HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
-
// Debug Information
SupportsDebugInformation = true;
@@ -134,19 +130,14 @@ X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
return MCBinaryExpr::CreateAdd(Res, Four, Context);
}
-const MCSection *X86ELFMCAsmInfo::
-getNonexecutableStackSection(MCContext &Ctx) const {
- return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata());
-}
-
void X86MCAsmInfoMicrosoft::anchor() { }
X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PointerSize = 8;
- ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
+ ExceptionsType = ExceptionHandling::ItaniumWinEH;
}
AssemblerDialect = AsmWriterFlavor;
@@ -165,7 +156,8 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PointerSize = 8;
- ExceptionsType = ExceptionHandling::WinEH;
+ WinEHEncodingType = WinEH::EncodingType::Itanium;
+ ExceptionsType = ExceptionHandling::ItaniumWinEH;
} else {
ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index a7509b0..f2f06c3 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86TARGETASMINFO_H
-#define X86TARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCASMINFO_H
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmInfoCOFF.h"
@@ -39,8 +39,6 @@ namespace llvm {
void anchor() override;
public:
explicit X86ELFMCAsmInfo(const Triple &Triple);
- const MCSection *
- getNonexecutableStackSection(MCContext &Ctx) const override;
};
class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 2152b21..31b8e2d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -185,42 +185,21 @@ static bool isDisp8(int Value) {
/// isCDisp8 - Return true if this signed displacement fits in a 8-bit
/// compressed dispacement field.
static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) {
- assert((TSFlags & X86II::EncodingMask) >> X86II::EncodingShift == X86II::EVEX &&
+ assert(((TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
"Compressed 8-bit displacement is only valid for EVEX inst.");
- unsigned CD8E = (TSFlags >> X86II::EVEX_CD8EShift) & X86II::EVEX_CD8EMask;
- unsigned CD8V = (TSFlags >> X86II::EVEX_CD8VShift) & X86II::EVEX_CD8VMask;
-
- if (CD8V == 0 && CD8E == 0) {
+ unsigned CD8_Scale =
+ (TSFlags & X86II::CD8_Scale_Mask) >> X86II::CD8_Scale_Shift;
+ if (CD8_Scale == 0) {
CValue = Value;
return isDisp8(Value);
}
-
- unsigned MemObjSize = 1U << CD8E;
- if (CD8V & 4) {
- // Fixed vector length
- MemObjSize *= 1U << (CD8V & 0x3);
- } else {
- // Modified vector length
- bool EVEX_b = (TSFlags >> X86II::VEXShift) & X86II::EVEX_B;
- if (!EVEX_b) {
- unsigned EVEX_LL = ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) ? 1 : 0;
- EVEX_LL += ((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2) ? 2 : 0;
- assert(EVEX_LL < 3 && "");
-
- unsigned NumElems = (1U << (EVEX_LL + 4)) / MemObjSize;
- NumElems /= 1U << (CD8V & 0x3);
-
- MemObjSize *= NumElems;
- }
- }
- unsigned MemObjMask = MemObjSize - 1;
- assert((MemObjSize & MemObjMask) == 0 && "Invalid memory object size.");
-
- if (Value & MemObjMask) // Unaligned offset
+ unsigned Mask = CD8_Scale - 1;
+ assert((CD8_Scale & Mask) == 0 && "Invalid memory object size.");
+ if (Value & Mask) // Unaligned offset
return false;
- Value /= (int)MemObjSize;
+ Value /= (int)CD8_Scale;
bool Ret = (Value == (signed char)Value);
if (Ret)
@@ -393,9 +372,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
unsigned BaseReg = Base.getReg();
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
- bool HasEVEX = (Encoding == X86II::EVEX);
+ bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
// Handle %rip relative addressing.
if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
@@ -613,13 +590,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
int MemOperand, const MCInst &MI,
const MCInstrDesc &Desc,
raw_ostream &OS) const {
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
- bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
- bool HasEVEX_RC = (TSFlags >> X86II::VEXShift) & X86II::EVEX_RC;
+ uint64_t Encoding = TSFlags & X86II::EncodingMask;
+ bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = TSFlags & X86II::VEX_4VOp3;
+ bool HasMemOp4 = TSFlags & X86II::MemOp4;
+ bool HasEVEX_RC = TSFlags & X86II::EVEX_RC;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -700,18 +676,18 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
bool EncodeRC = false;
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ if (TSFlags & X86II::VEX_W)
VEX_W = 1;
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ if (TSFlags & X86II::VEX_L)
VEX_L = 1;
- if (((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2))
+ if (TSFlags & X86II::EVEX_L2)
EVEX_L2 = 1;
- if (HasEVEX_K && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_Z))
+ if (HasEVEX_K && (TSFlags & X86II::EVEX_Z))
EVEX_z = 1;
- if (((TSFlags >> X86II::VEXShift) & X86II::EVEX_B))
+ if ((TSFlags & X86II::EVEX_B))
EVEX_b = 1;
switch (TSFlags & X86II::OpPrefixMask) {
@@ -1129,8 +1105,8 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
// Emit the operand size opcode prefix as needed.
- unsigned char OpSize = (TSFlags & X86II::OpSizeMask) >> X86II::OpSizeShift;
- if (OpSize == (is16BitMode(STI) ? X86II::OpSize32 : X86II::OpSize16))
+ if ((TSFlags & X86II::OpSizeMask) == (is16BitMode(STI) ? X86II::OpSize32
+ : X86II::OpSize16))
EmitByte(0x66, CurByte, OS);
switch (TSFlags & X86II::OpPrefixMask) {
@@ -1190,19 +1166,18 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned CurByte = 0;
// Encoding type for this instruction.
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
+ uint64_t Encoding = TSFlags & X86II::EncodingMask;
// It uses the VEX.VVVV field?
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = TSFlags & X86II::VEX_4VOp3;
+ bool HasMemOp4 = TSFlags & X86II::MemOp4;
const unsigned MemOp4_I8IMMOperand = 2;
// It uses the EVEX.aaa field?
- bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
- bool HasEVEX_RC = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_RC);
-
+ bool HasEVEX_K = TSFlags & X86II::EVEX_K;
+ bool HasEVEX_RC = TSFlags & X86II::EVEX_RC;
+
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -1257,7 +1232,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
- if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ if (TSFlags & X86II::Has3DNow0F0FOpcode)
BaseOpcode = 0x0F; // Weird 3DNow! encoding.
unsigned SrcRegNum = 0;
@@ -1457,20 +1432,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E0: case X86II::MRM_E1:
- case X86II::MRM_E2: case X86II::MRM_E3: case X86II::MRM_E4:
- case X86II::MRM_E5: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_F0:
- case X86II::MRM_F1: case X86II::MRM_F2: case X86II::MRM_F3:
- case X86II::MRM_F4: case X86II::MRM_F5: case X86II::MRM_F6:
- case X86II::MRM_F7: case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_FA: case X86II::MRM_FB: case X86II::MRM_FC:
- case X86II::MRM_FD: case X86II::MRM_FE: case X86II::MRM_FF:
+ case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+ case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
+ case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+ case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+ case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
+ case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
+ case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
+ case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
+ case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
+ case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
+ case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
+ case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
+ case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
+ case X86II::MRM_FE: case X86II::MRM_FF:
EmitByte(BaseOpcode, CurByte, OS);
unsigned char MRM;
@@ -1485,11 +1461,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_C9: MRM = 0xC9; break;
case X86II::MRM_CA: MRM = 0xCA; break;
case X86II::MRM_CB: MRM = 0xCB; break;
+ case X86II::MRM_CF: MRM = 0xCF; break;
case X86II::MRM_D0: MRM = 0xD0; break;
case X86II::MRM_D1: MRM = 0xD1; break;
case X86II::MRM_D4: MRM = 0xD4; break;
case X86II::MRM_D5: MRM = 0xD5; break;
case X86II::MRM_D6: MRM = 0xD6; break;
+ case X86II::MRM_D7: MRM = 0xD7; break;
case X86II::MRM_D8: MRM = 0xD8; break;
case X86II::MRM_D9: MRM = 0xD9; break;
case X86II::MRM_DA: MRM = 0xDA; break;
@@ -1538,7 +1516,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
while (CurOp != NumOps && NumOps - CurOp <= 2) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte.
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
+ if (TSFlags & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
: CurOp);
++CurOp;
@@ -1564,7 +1542,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
}
- if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
+ if (TSFlags & X86II::Has3DNow0F0FOpcode)
EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
#ifndef NDEBUG
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 5e29e5c..5a9181d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -272,7 +272,8 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
MAI = new X86ELFMCAsmInfo(TheTriple);
} else if (TheTriple.isWindowsMSVCEnvironment()) {
MAI = new X86MCAsmInfoMicrosoft(TheTriple);
- } else if (TheTriple.isOSCygMing()) {
+ } else if (TheTriple.isOSCygMing() ||
+ TheTriple.isWindowsItaniumEnvironment()) {
MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
} else {
// The default is ELF.
@@ -350,11 +351,8 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
+ raw_ostream &_OS, MCCodeEmitter *_Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getObjectFormat()) {
@@ -365,7 +363,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
assert(TheTriple.isOSWindows() && "only Windows COFF is supported");
return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll);
case Triple::ELF:
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
}
}
@@ -376,7 +374,7 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI, MII, MRI);
+ return new X86ATTInstPrinter(MAI, MII, MRI, STI);
if (SyntaxVariant == 1)
return new X86IntelInstPrinter(MAI, MII, MRI);
return nullptr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index ebe74cf..aef9571 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86MCTARGETDESC_H
-#define X86MCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
#include <string>
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index ead3338..5685a7f 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -179,11 +179,14 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
if (A_Base == B_Base && A_Base)
report_fatal_error("unsupported relocation with identical base", false);
- // A subtraction expression where both symbols are undefined is a
+ // A subtraction expression where either symbol is undefined is a
// non-relocatable expression.
- if (A->isUndefined() && B->isUndefined())
- report_fatal_error("unsupported relocation with subtraction expression",
- false);
+ if (A->isUndefined() || B->isUndefined()) {
+ StringRef Name = A->isUndefined() ? A->getName() : B->getName();
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ "unsupported relocation with subtraction expression, symbol '" +
+ Name + "' can not be undefined in a subtraction expression");
+ }
Value += Writer->getSymbolAddress(&A_SD, Layout) -
(!A_Base ? 0 : Writer->getSymbolAddress(A_Base, Layout));
@@ -572,7 +575,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
// For external relocations, make sure to offset the fixup value to
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
- if (!SD->Symbol->isUndefined())
+ if (!SD->getSymbol().isUndefined())
FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 7fa4180..5f1596c 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -8,18 +8,21 @@
//===----------------------------------------------------------------------===//
#include "X86MCTargetDesc.h"
+#include "llvm/MC/MCWin64EH.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
using namespace llvm;
namespace {
class X86WinCOFFStreamer : public MCWinCOFFStreamer {
+ Win64EH::UnwindEmitter EHStreamer;
public:
X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE,
raw_ostream &OS)
: MCWinCOFFStreamer(C, AB, *CE, OS) { }
void EmitWinEHHandlerData() override;
+ void EmitWindowsUnwindTables() override;
void FinishImpl() override;
};
@@ -28,12 +31,18 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData() {
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section!
- MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo());
+}
+
+void X86WinCOFFStreamer::EmitWindowsUnwindTables() {
+ if (!getNumWinFrameInfos())
+ return;
+ EHStreamer.Emit(*this);
}
void X86WinCOFFStreamer::FinishImpl() {
EmitFrames(nullptr);
- EmitW64Tables();
+ EmitWindowsUnwindTables();
MCWinCOFFStreamer::FinishImpl();
}
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 52d3c01..19a1832 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2,17 +2,6 @@
// Random ideas for the X86 backend.
//===---------------------------------------------------------------------===//
-This should be one DIV/IDIV instruction, not a libcall:
-
-unsigned test(unsigned long long X, unsigned Y) {
- return X/Y;
-}
-
-This can be done trivially with a custom legalizer. What about overflow
-though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
-
-//===---------------------------------------------------------------------===//
-
Improvements to the multiply -> shift/add algorithm:
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html
@@ -83,43 +72,6 @@ It appears icc use push for parameter passing. Need to investigate.
//===---------------------------------------------------------------------===//
-This:
-
-void foo(void);
-void bar(int x, int *P) {
- x >>= 2;
- if (x)
- foo();
- *P = x;
-}
-
-compiles into:
-
- movq %rsi, %rbx
- movl %edi, %r14d
- sarl $2, %r14d
- testl %r14d, %r14d
- je LBB0_2
-
-Instead of doing an explicit test, we can use the flags off the sar. This
-occurs in a bigger testcase like this, which is pretty common:
-
-#include <vector>
-int test1(std::vector<int> &X) {
- int Sum = 0;
- for (long i = 0, e = X.size(); i != e; ++i)
- X[i] = 0;
- return Sum;
-}
-
-//===---------------------------------------------------------------------===//
-
-Only use inc/neg/not instructions on processors where they are faster than
-add/sub/xor. They are slower on the P4 due to only updating some processor
-flags.
-
-//===---------------------------------------------------------------------===//
-
The instruction selector sometimes misses folding a load into a compare. The
pattern is written as (cmp reg, (load p)). Because the compare isn't
commutative, it is not matched with the load on both sides. The dag combiner
@@ -303,42 +255,6 @@ opposed to two cycles for the movl+lea variant.
//===---------------------------------------------------------------------===//
-__builtin_ffs codegen is messy.
-
-int ffs_(unsigned X) { return __builtin_ffs(X); }
-
-llvm produces:
-ffs_:
- movl 4(%esp), %ecx
- bsfl %ecx, %eax
- movl $32, %edx
- cmove %edx, %eax
- incl %eax
- xorl %edx, %edx
- testl %ecx, %ecx
- cmove %edx, %eax
- ret
-
-vs gcc:
-
-_ffs_:
- movl $-1, %edx
- bsfl 4(%esp), %eax
- cmove %edx, %eax
- addl $1, %eax
- ret
-
-Another example of __builtin_ffs (use predsimplify to eliminate a select):
-
-int foo (unsigned long j) {
- if (j)
- return __builtin_ffs (j) - 1;
- else
- return 0;
-}
-
-//===---------------------------------------------------------------------===//
-
It appears gcc place string data with linkonce linkage in
.section __TEXT,__const_coal,coalesced instead of
.section __DATA,__const_coal,coalesced.
@@ -466,85 +382,6 @@ We should inline lrintf and probably other libc functions.
//===---------------------------------------------------------------------===//
-Use the FLAGS values from arithmetic instructions more. For example, compile:
-
-int add_zf(int *x, int y, int a, int b) {
- if ((*x += y) == 0)
- return a;
- else
- return b;
-}
-
-to:
- addl %esi, (%rdi)
- movl %edx, %eax
- cmovne %ecx, %eax
- ret
-instead of:
-
-_add_zf:
- addl (%rdi), %esi
- movl %esi, (%rdi)
- testl %esi, %esi
- cmove %edx, %ecx
- movl %ecx, %eax
- ret
-
-As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll
-without a test instruction.
-
-//===---------------------------------------------------------------------===//
-
-These two functions have identical effects:
-
-unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;}
-unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
-
-We currently compile them to:
-
-_f:
- movl 4(%esp), %eax
- movl %eax, %ecx
- incl %ecx
- movl 8(%esp), %edx
- cmpl %edx, %ecx
- jne LBB1_2 #UnifiedReturnBlock
-LBB1_1: #cond_true
- addl $2, %eax
- ret
-LBB1_2: #UnifiedReturnBlock
- movl %ecx, %eax
- ret
-_f2:
- movl 4(%esp), %eax
- movl %eax, %ecx
- incl %ecx
- cmpl 8(%esp), %ecx
- sete %cl
- movzbl %cl, %ecx
- leal 1(%ecx,%eax), %eax
- ret
-
-both of which are inferior to GCC's:
-
-_f:
- movl 4(%esp), %edx
- leal 1(%edx), %eax
- addl $2, %edx
- cmpl 8(%esp), %eax
- cmove %edx, %eax
- ret
-_f2:
- movl 4(%esp), %eax
- addl $1, %eax
- xorl %edx, %edx
- cmpl 8(%esp), %eax
- sete %dl
- addl %edx, %eax
- ret
-
-//===---------------------------------------------------------------------===//
-
This code:
void test(int X) {
@@ -1398,20 +1235,6 @@ A similar code sequence works for division.
//===---------------------------------------------------------------------===//
-These should compile to the same code, but the later codegen's to useless
-instructions on X86. This may be a trivial dag combine (GCC PR7061):
-
-struct s1 { unsigned char a, b; };
-unsigned long f1(struct s1 x) {
- return x.a + x.b;
-}
-struct s2 { unsigned a: 8, b: 8; };
-unsigned long f2(struct s2 x) {
- return x.a + x.b;
-}
-
-//===---------------------------------------------------------------------===//
-
We currently compile this:
define i32 @func1(i32 %v1, i32 %v2) nounwind {
diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt
index fdb886f..de0a30f 100644
--- a/lib/Target/X86/Utils/LLVMBuild.txt
+++ b/lib/Target/X86/Utils/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86Utils
parent = X86
-required_libraries = Support
+required_libraries = Core Support
add_to_library_groups = X86
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 5f2441c..ba6cbc8 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86ShuffleDecode.h"
+#include "llvm/IR/Constants.h"
#include "llvm/CodeGen/MachineValueType.h"
//===----------------------------------------------------------------------===//
@@ -62,6 +63,51 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(NElts+i);
}
+void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i);
+ ShuffleMask.push_back(2 * i);
+ }
+}
+
+void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i + 1);
+ ShuffleMask.push_back(2 * i + 1);
+ }
+}
+
+void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ int M = SM_SentinelZero;
+ if (i >= Imm) M = i - Imm + l;
+ ShuffleMask.push_back(M);
+ }
+}
+
+void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ unsigned Base = i + Imm;
+ int M = Base + l;
+ if (Base >= NumLaneElts) M = SM_SentinelZero;
+ ShuffleMask.push_back(M);
+ }
+}
+
void DecodePALIGNRMask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
@@ -207,6 +253,97 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
}
}
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
+ assert(MaskTy->getVectorElementType()->isIntegerTy(8) &&
+ "Expected i8 constant mask elements!");
+ int NumElements = MaskTy->getVectorNumElements();
+ // FIXME: Add support for AVX-512.
+ assert((NumElements == 16 || NumElements == 32) &&
+ "Only 128-bit and 256-bit vectors supported!");
+ ShuffleMask.reserve(NumElements);
+
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ assert((unsigned)NumElements == CDS->getNumElements() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+ // lane of the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ uint64_t Element = CDS->getElementAsInteger(i);
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (Element & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (Element & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ assert((unsigned)NumElements == CV->getNumOperands() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+ // lane of the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (Element & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (Element & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+ }
+}
+
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ if (M == (uint64_t)SM_SentinelUndef) {
+ ShuffleMask.push_back(M);
+ continue;
+ }
+ // For AVX vectors with 32 bytes the base of the shuffle is the half of
+ // the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (M & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (M & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+}
+
+void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ int ElementBits = VT.getScalarSizeInBits();
+ int NumElements = VT.getVectorNumElements();
+ for (int i = 0; i < NumElements; ++i) {
+ // If there are more than 8 elements in the vector, then any immediate blend
+ // mask applies to each 128-bit lane. There can never be more than
+ // 8 elements in a 128-bit lane with an immediate blend.
+ int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;
+ assert(Bit < 8 &&
+ "Immediate blends only operate over 8 elements at a time!");
+ ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);
+ }
+}
+
/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
@@ -215,4 +352,44 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
}
}
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
+ assert(MaskTy->getVectorElementType()->isIntegerTy() &&
+ "Expected integer constant mask elements!");
+ int ElementBits = MaskTy->getScalarSizeInBits();
+ int NumElements = MaskTy->getVectorNumElements();
+ assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
+ "Unexpected number of vector elements.");
+ ShuffleMask.reserve(NumElements);
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ assert((unsigned)NumElements == CDS->getNumElements() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ uint64_t Element = CDS->getElementAsInteger(i);
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ assert((unsigned)NumElements == C->getNumOperands() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
+ }
+}
+
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 9e75b6b..6ba3c64 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -12,21 +12,21 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_SHUFFLE_DECODE_H
-#define X86_SHUFFLE_DECODE_H
+#ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
+#define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ArrayRef.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
//===----------------------------------------------------------------------===//
namespace llvm {
+class Constant;
class MVT;
-enum {
- SM_SentinelZero = -1
-};
+enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 };
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
@@ -36,6 +36,14 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
@@ -59,6 +67,16 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// different datatypes and vector widths.
void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decode a PSHUFB mask from an IR-level vector constant.
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a PSHUFB mask from a raw array of constants such as from
+/// BUILD_VECTOR.
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a BLEND immediate mask into a shuffle mask.
+void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
@@ -67,6 +85,9 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+
} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index d5522ed..8bd5817 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_X86_H
-#define TARGET_X86_H
+#ifndef LLVM_LIB_TARGET_X86_X86_H
+#define LLVM_LIB_TARGET_X86_X86_H
#include "llvm/Support/CodeGen.h"
@@ -21,13 +21,8 @@ namespace llvm {
class FunctionPass;
class ImmutablePass;
-class JITCodeEmitter;
class X86TargetMachine;
-/// createX86AtomicExpandPass - This pass expands atomic operations that cannot
-/// be handled natively in terms of a loop using cmpxchg.
-FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
-
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
@@ -54,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
/// AVX and SSE.
FunctionPass *createX86IssueVZeroUpperPass();
-/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
-/// to the specified MCE object.
-FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
- JITCodeEmitter &JCE);
-
/// createX86EmitCodeToMemory - Returns a pass that converts a register
/// allocated function into raw machine code in a dynamically
/// allocated chunk of memory.
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 93f516a..83f55d3 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -104,7 +104,15 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
-
+def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
+ "Enable AVX-512 Doubleword and Quadword Instructions",
+ [FeatureAVX512]>;
+def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
+ "Enable AVX-512 Byte and Word Instructions",
+ [FeatureAVX512]>;
+def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
+ "Enable AVX-512 Vector Length eXtensions",
+ [FeatureAVX512]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@@ -149,10 +157,14 @@ def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
+def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
+ "Support SGX instructions">;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
+def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
+ "Support SMAP instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
@@ -170,6 +182,10 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
+def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
+ "Use RSQRT* to optimize square root calculations">;
+def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst",
+ "true", "Use RCP* to optimize division calculations">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -264,8 +280,16 @@ def : ProcessorModel<"core-avx2", HaswellModel,
FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
- FeatureHLE]>;
+ FeatureHLE, FeatureSlowIncDec]>;
+// Broadwell
+def : ProcessorModel<"broadwell", HaswellModel,
+ [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
+ FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
+ FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSMAP,
+ FeatureSlowIncDec]>;
// KNL
// FIXME: define KNL model
def : ProcessorModel<"knl", HaswellModel,
@@ -276,6 +300,17 @@ def : ProcessorModel<"knl", HaswellModel,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
FeatureSlowIncDec]>;
+// SKX
+// FIXME: define SKX model
+def : ProcessorModel<"skx", HaswellModel,
+ [FeatureAVX512, FeatureCDI,
+ FeatureDQI, FeatureBWI, FeatureVLX,
+ FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
+ FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
+ FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
+ FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
+ FeatureSlowIncDec, FeatureSGX]>;
+
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
def : Proc<"k6-3", [Feature3DNow]>;
@@ -311,35 +346,42 @@ def : Proc<"amdfam10", [FeatureSSE4A,
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
FeatureSlowSHLD]>;
+
// Jaguar
-def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
- FeatureBMI, FeatureF16C, FeatureMOVBE,
- FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>;
+def : ProcessorModel<"btver2", BtVer2Model,
+ [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
+ FeatureBMI, FeatureF16C, FeatureMOVBE,
+ FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD,
+ FeatureUseSqrtEst, FeatureUseRecipEst]>;
+
// Bulldozer
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>;
+ FeatureAVX, FeatureSSE4A, FeatureLZCNT,
+ FeaturePOPCNT, FeatureSlowSHLD]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureTBM,
- FeatureFMA, FeatureSlowSHLD]>;
+ FeatureAVX, FeatureSSE4A, FeatureF16C,
+ FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
+ FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
// Steamroller
def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureTBM,
- FeatureFMA, FeatureFSGSBase]>;
+ FeatureAVX, FeatureSSE4A, FeatureF16C,
+ FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
+ FeatureTBM, FeatureFMA, FeatureSlowSHLD,
+ FeatureFSGSBase]>;
// Excavator
def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI, FeatureBMI2,
- FeatureTBM, FeatureFMA, FeatureFSGSBase]>;
+ FeatureTBM, FeatureFMA, FeatureSSE4A,
+ FeatureFSGSBase]>;
def : Proc<"geode", [Feature3DNowA]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 1dca568..4e5b7b8 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -29,6 +30,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -45,6 +47,8 @@ using namespace llvm;
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SMShadowTracker.startFunction(MF);
+
SetupMachineFunction(MF);
if (Subtarget->isTargetCOFF()) {
@@ -549,6 +553,28 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
4 /*size*/);
}
+MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ if (Subtarget->isTargetKnownWindowsMSVC()) {
+ const MachineConstantPoolEntry &CPE =
+ MF->getConstantPool()->getConstants()[CPID];
+ if (!CPE.isMachineConstantPoolEntry()) {
+ SectionKind Kind =
+ CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout());
+ const Constant *C = CPE.Val.ConstVal;
+ if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
+ getObjFileLowering().getSectionForConstant(Kind, C))) {
+ if (MCSymbol *Sym = S->getCOMDATSymbol()) {
+ if (Sym->isUndefined())
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ return Sym;
+ }
+ }
+ }
+ }
+
+ return AsmPrinter::GetCPISymbol(CPID);
+}
+
void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
SmallString<128> Directive;
raw_svector_ostream OS(Directive);
@@ -703,7 +729,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
for (const auto &Stub : Stubs) {
OutStreamer.EmitLabel(Stub.first);
@@ -712,6 +738,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
Stubs.clear();
}
+
+ SM.serializeToStackMapSection();
}
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index e4eef5d..748b948 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -7,14 +7,19 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86ASMPRINTER_H
-#define X86ASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_X86ASMPRINTER_H
+#define LLVM_LIB_TARGET_X86_X86ASMPRINTER_H
#include "X86Subtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/Target/TargetMachine.h"
+// Implemented in X86MCInstLower.cpp
+namespace {
+ class X86MCInstLower;
+}
+
namespace llvm {
class MCStreamer;
class MCSymbol;
@@ -25,9 +30,63 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
+ // This utility class tracks the length of a stackmap instruction's 'shadow'.
+ // It is used by the X86AsmPrinter to ensure that the stackmap shadow
+ // invariants (i.e. no other stackmaps, patchpoints, or control flow within
+ // the shadow) are met, while outputting a minimal number of NOPs for padding.
+ //
+ // To minimise the number of NOPs used, the shadow tracker counts the number
+ // of instruction bytes output since the last stackmap. Only if there are too
+ // few instruction bytes to cover the shadow are NOPs used for padding.
+ class StackMapShadowTracker {
+ public:
+ StackMapShadowTracker(TargetMachine &TM);
+ ~StackMapShadowTracker();
+ void startFunction(MachineFunction &MF);
+ void count(MCInst &Inst, const MCSubtargetInfo &STI);
+
+ // Called to signal the start of a shadow of RequiredSize bytes.
+ void reset(unsigned RequiredSize) {
+ RequiredShadowSize = RequiredSize;
+ CurrentShadowSize = 0;
+ InShadow = true;
+ }
+
+ // Called before every stackmap/patchpoint, and at the end of basic blocks,
+ // to emit any necessary padding-NOPs.
+ void emitShadowPadding(MCStreamer &OutStreamer, const MCSubtargetInfo &STI);
+ private:
+ TargetMachine &TM;
+ std::unique_ptr<MCCodeEmitter> CodeEmitter;
+ bool InShadow;
+
+ // RequiredShadowSize holds the length of the shadow specified in the most
+ // recently encountered STACKMAP instruction.
+ // CurrentShadowSize counts the number of bytes encoded since the most
+ // recently encountered STACKMAP, stopping when that number is greater than
+ // or equal to RequiredShadowSize.
+ unsigned RequiredShadowSize, CurrentShadowSize;
+ };
+
+ StackMapShadowTracker SMShadowTracker;
+
+ // All instructions emitted by the X86AsmPrinter should use this helper
+ // method.
+ //
+ // This helper function invokes the SMShadowTracker on each instruction before
+ // outputting it to the OutStream. This allows the shadow tracker to minimise
+ // the number of NOPs used for stackmap padding.
+ void EmitAndCountInstruction(MCInst &Inst);
+
+ void InsertStackMapShadows(MachineFunction &MF);
+ void LowerSTACKMAP(const MachineInstr &MI);
+ void LowerPATCHPOINT(const MachineInstr &MI);
+
+ void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI);
+
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), SM(*this) {
+ : AsmPrinter(TM, Streamer), SM(*this), SMShadowTracker(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
@@ -43,6 +102,10 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void EmitInstruction(const MachineInstr *MI) override;
+ void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override {
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+ }
+
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
@@ -50,6 +113,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
+ /// \brief Return the symbol for the specified constant pool entry.
+ MCSymbol *GetCPISymbol(unsigned CPID) const override;
+
+ bool doInitialization(Module &M) override {
+ SMShadowTracker.reset(0);
+ SM.reset();
+ return AsmPrinter::doInitialization(M);
+ }
+
bool runOnMachineFunction(MachineFunction &F) override;
};
diff --git a/lib/Target/X86/X86AtomicExpandPass.cpp b/lib/Target/X86/X86AtomicExpandPass.cpp
deleted file mode 100644
index 61eefbb..0000000
--- a/lib/Target/X86/X86AtomicExpandPass.cpp
+++ /dev/null
@@ -1,287 +0,0 @@
-//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass (at IR level) to replace atomic instructions which
-// cannot be implemented as a single instruction with cmpxchg-based loops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86TargetMachine.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-atomic-expand"
-
-namespace {
- class X86AtomicExpandPass : public FunctionPass {
- const X86TargetMachine *TM;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit X86AtomicExpandPass(const X86TargetMachine *TM)
- : FunctionPass(ID), TM(TM) {}
-
- bool runOnFunction(Function &F) override;
- bool expandAtomicInsts(Function &F);
-
- bool needsCmpXchgNb(Type *MemType);
-
- /// There are four kinds of atomic operations. Two never need expanding:
- /// cmpxchg is what we expand the others *to*, and loads are easily handled
- /// by ISelLowering. Atomicrmw and store can need expanding in some
- /// circumstances.
- bool shouldExpand(Instruction *Inst);
-
- /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
- /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
- bool shouldExpandStore(StoreInst *SI);
-
- /// Only some atomicrmw instructions need expanding -- some operations
- /// (e.g. max) have absolutely no architectural support; some (e.g. or) have
- /// limited support but can't return the previous value; some (e.g. add)
- /// have complete support in the instruction set.
- ///
- /// Also, naturally, 128-bit operations always need to be expanded.
- bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
-
- bool expandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicStore(StoreInst *SI);
- };
-}
-
-char X86AtomicExpandPass::ID = 0;
-
-FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
- return new X86AtomicExpandPass(TM);
-}
-
-bool X86AtomicExpandPass::runOnFunction(Function &F) {
- SmallVector<Instruction *, 1> AtomicInsts;
-
- // Changing control-flow while iterating through it is a bad idea, so gather a
- // list of all atomic instructions before we start.
- for (BasicBlock &BB : F)
- for (Instruction &Inst : BB) {
- if (isa<AtomicRMWInst>(&Inst) ||
- (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
- AtomicInsts.push_back(&Inst);
- }
-
- bool MadeChange = false;
- for (Instruction *Inst : AtomicInsts) {
- if (!shouldExpand(Inst))
- continue;
-
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
- MadeChange |= expandAtomicRMW(AI);
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- MadeChange |= expandAtomicStore(SI);
-
- assert(MadeChange && "Atomic inst not expanded when it should be?");
- Inst->eraseFromParent();
- }
-
- return MadeChange;
-}
-
-/// Returns true if operations on the given type will need to use either
-/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the
-/// native width, and the instructions are available (otherwise we leave them
-/// alone to become __sync_fetch_and_... calls).
-bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
- const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
- if (!Subtarget.hasCmpxchg16b())
- return false;
-
- unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64;
-
- unsigned OpWidth = MemType->getPrimitiveSizeInBits();
- if (OpWidth == CmpXchgNbWidth)
- return true;
-
- return false;
-}
-
-
-bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
- const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
- unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
-
- if (needsCmpXchgNb(AI->getType()))
- return true;
-
- if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
- return false;
-
- AtomicRMWInst::BinOp Op = AI->getOperation();
- switch (Op) {
- default:
- llvm_unreachable("Unknown atomic operation");
- case AtomicRMWInst::Xchg:
- case AtomicRMWInst::Add:
- case AtomicRMWInst::Sub:
- // It's better to use xadd, xsub or xchg for these in all cases.
- return false;
- case AtomicRMWInst::Or:
- case AtomicRMWInst::And:
- case AtomicRMWInst::Xor:
- // If the atomicrmw's result isn't actually used, we can just add a "lock"
- // prefix to a normal instruction for these operations.
- return !AI->use_empty();
- case AtomicRMWInst::Nand:
- case AtomicRMWInst::Max:
- case AtomicRMWInst::Min:
- case AtomicRMWInst::UMax:
- case AtomicRMWInst::UMin:
- // These always require a non-trivial set of data operations on x86. We must
- // use a cmpxchg loop.
- return true;
- }
-}
-
-bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
- if (needsCmpXchgNb(SI->getValueOperand()->getType()))
- return true;
-
- return false;
-}
-
-bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
- return shouldExpandAtomicRMW(AI);
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return shouldExpandStore(SI);
- return false;
-}
-
-/// Emit IR to implement the given atomicrmw operation on values in registers,
-/// returning the new value.
-static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
- Value *Loaded, Value *Inc) {
- Value *NewVal;
- switch (Op) {
- case AtomicRMWInst::Xchg:
- return Inc;
- case AtomicRMWInst::Add:
- return Builder.CreateAdd(Loaded, Inc, "new");
- case AtomicRMWInst::Sub:
- return Builder.CreateSub(Loaded, Inc, "new");
- case AtomicRMWInst::And:
- return Builder.CreateAnd(Loaded, Inc, "new");
- case AtomicRMWInst::Nand:
- return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
- case AtomicRMWInst::Or:
- return Builder.CreateOr(Loaded, Inc, "new");
- case AtomicRMWInst::Xor:
- return Builder.CreateXor(Loaded, Inc, "new");
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- default:
- break;
- }
- llvm_unreachable("Unknown atomic op");
-}
-
-bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
- AtomicOrdering Order =
- AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // %init_loaded = load atomic iN* %addr
- // br label %loop
- // loop:
- // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
- // %new = some_op iN %loaded, %incr
- // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
- // %new_loaded = extractvalue { iN, i1 } %pair, 0
- // %success = extractvalue { iN, i1 } %pair, 1
- // br i1 %success, label %atomicrmw.end, label %loop
- // atomicrmw.end:
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we want a load. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- LoadInst *InitLoaded = Builder.CreateLoad(Addr);
- InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
- Loaded->addIncoming(InitLoaded, BB);
-
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
-
- Value *Pair = Builder.CreateAtomicCmpXchg(
- Addr, Loaded, NewVal, Order,
- AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
- Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
- Loaded->addIncoming(NewLoaded, LoopBB);
-
- Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
- Builder.CreateCondBr(Success, ExitBB, LoopBB);
-
- AI->replaceAllUsesWith(NewLoaded);
-
- return true;
-}
-
-bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
- // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
- // this in terms of the usual expansion to "atomicrmw xchg".
- IRBuilder<> Builder(SI);
- AtomicOrdering Order =
- SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering();
- AtomicRMWInst *AI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
- SI->getValueOperand(), Order);
-
- // Now we have an appropriate swap instruction, lower it as usual.
- if (shouldExpandAtomicRMW(AI)) {
- expandAtomicRMW(AI);
- AI->eraseFromParent();
- return true;
- }
-
- return AI;
-}
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index e76f9fd..0eb2494 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -12,14 +12,27 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86CALLINGCONV_H
-#define X86CALLINGCONV_H
+#ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
+#define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"
namespace llvm {
+inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ // Similar to CCPassIndirect, with the addition of inreg.
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::Indirect;
+ ArgFlags.setInReg();
+ return false; // Continue the search, but now for i32.
+}
+
+
inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
CCState &) {
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 0824d4e..75a2ec0 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -14,7 +14,9 @@
/// CCIfSubtarget - Match if the current subtarget has a feature F.
class CCIfSubtarget<string F, CCAction A>
- : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+ : CCIf<!strconcat("static_cast<const X86Subtarget&>"
+ "(State.getMachineFunction().getSubtarget()).", F),
+ A>;
//===----------------------------------------------------------------------===//
// Return Value Calling Conventions
@@ -52,27 +54,27 @@ def RetCC_X86Common : CallingConv<[
// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX-512 target feature.
- CCIfType<[v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
- // Long double types are always returned in ST0 (even with SSE).
- CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+ // Long double types are always returned in FP0 (even with SSE).
+ CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>
]>;
// X86-32 C return-value convention.
def RetCC_X86_32_C : CallingConv<[
- // The X86-32 calling convention returns FP values in ST0, unless marked
+ // The X86-32 calling convention returns FP values in FP0, unless marked
// with "inreg" (used here to distinguish one kind of reg from another,
// weirdly; this is really the sse-regparm calling convention) in which
// case they use XMM0, otherwise it is the same as the common X86 calling
// conv.
CCIfInReg<CCIfSubtarget<"hasSSE2()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
- CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+ CCIfType<[f32,f64], CCAssignToReg<[FP0, FP1]>>,
CCDelegateTo<RetCC_X86Common>
]>;
@@ -122,6 +124,24 @@ def RetCC_X86_32_HiPE : CallingConv<[
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
]>;
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_VectorCall : CallingConv<[
+ // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit FP vectors
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // 512-bit FP vectors
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
+
+ // Return integers in the standard way.
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -177,6 +197,7 @@ def RetCC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
@@ -224,6 +245,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[i8, i16], CCPromoteToType<i32>>,
// The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCIfSubtarget<"isTarget64BitILP32()", CCAssignToReg<[R10D]>>>,
CCIfNest<CCAssignToReg<[R10]>>,
// The first 6 integer arguments are passed in integer registers.
@@ -252,7 +274,7 @@ def CC_X86_64_C : CallingConv<[
YMM4, YMM5, YMM6, YMM7]>>>>,
// The first 8 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v16i32, v8i64, v16f32, v8f64],
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
CCIfSubtarget<"hasAVX512()",
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,
@@ -327,6 +349,25 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
+def CC_X86_Win64_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_Win64_C>
+]>;
+
+
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -460,6 +501,30 @@ def CC_X86_32_FastCall : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Otherwise, pass it indirectly.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
+ v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
+ v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCCustom<"CC_X86_32_VectorCallIndirect">>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_32_FastCall>
+]>;
+
def CC_X86_32_ThisCall_Common : CallingConv<[
// The first integer argument is passed in ECX
CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -573,6 +638,7 @@ def CC_Intel_OCL_BI : CallingConv<[
// This is the root argument convention for the X86-32 backend.
def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
@@ -590,6 +656,7 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
deleted file mode 100644
index a3ae7ee..0000000
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ /dev/null
@@ -1,1498 +0,0 @@
-//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the X86 machine instructions into
-// relocatable machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
-#include "X86Relocations.h"
-#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-emitter"
-
-STATISTIC(NumEmitted, "Number of machine instructions emitted");
-
-namespace {
- template<class CodeEmitter>
- class Emitter : public MachineFunctionPass {
- const X86InstrInfo *II;
- const DataLayout *TD;
- X86TargetMachine &TM;
- CodeEmitter &MCE;
- MachineModuleInfo *MMI;
- intptr_t PICBaseOffset;
- bool Is64BitMode;
- bool IsPIC;
- public:
- static char ID;
- explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
- : MachineFunctionPass(ID), II(nullptr), TD(nullptr), TM(tm),
- MCE(mce), PICBaseOffset(0), Is64BitMode(false),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "X86 Machine Code Emitter";
- }
-
- void emitOpcodePrefix(uint64_t TSFlags, int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const;
-
- void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const;
-
- void emitSegmentOverridePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI) const;
-
- void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequired<MachineModuleInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- private:
- void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- intptr_t Disp = 0, intptr_t PCAdj = 0,
- bool Indirect = false);
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
- intptr_t PCAdj = 0);
- void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
- intptr_t PCAdj = 0);
-
- void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
- intptr_t Adj = 0, bool IsPCRel = true);
-
- void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
- void emitRegModRMByte(unsigned RegOpcodeField);
- void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
- void emitConstant(uint64_t Val, unsigned Size);
-
- void emitMemModRMByte(const MachineInstr &MI,
- unsigned Op, unsigned RegOpcodeField,
- intptr_t PCAdj = 0);
-
- unsigned getX86RegNum(unsigned RegNo) const {
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- return TRI->getEncodingValue(RegNo) & 0x7;
- }
-
- unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
- unsigned OpNum) const;
- };
-
-template<class CodeEmitter>
- char Emitter<CodeEmitter>::ID = 0;
-} // end anonymous namespace.
-
-/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
-/// to the specified JITCodeEmitter object.
-FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new Emitter<JITCodeEmitter>(TM, JCE);
-}
-
-template<class CodeEmitter>
-bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
- MMI = &getAnalysis<MachineModuleInfo>();
- MCE.setModuleInfo(MMI);
-
- II = TM.getInstrInfo();
- TD = TM.getDataLayout();
- Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
- IsPIC = TM.getRelocationModel() == Reloc::PIC_;
-
- do {
- DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n");
- MCE.startFunction(MF);
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
- MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MCInstrDesc &Desc = I->getDesc();
- emitInstruction(*I, &Desc);
- // MOVPC32r is basically a call plus a pop instruction.
- if (Desc.getOpcode() == X86::MOVPC32r)
- emitInstruction(*I, &II->get(X86::POP32r));
- ++NumEmitted; // Keep track of the # of mi's emitted
- }
- }
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
-/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
-/// size, and 3) use of X86-64 extended registers.
-static unsigned determineREX(const MachineInstr &MI) {
- unsigned REX = 0;
- const MCInstrDesc &Desc = MI.getDesc();
-
- // Pseudo instructions do not need REX prefix byte.
- if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
- if (Desc.TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
- unsigned NumOps = Desc.getNumOperands();
- if (NumOps) {
- bool isTwoAddr = NumOps > 1 &&
- Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- unsigned i = isTwoAddr ? 1 : 0;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (X86II::isX86_64NonExtLowByteReg(Reg))
- REX |= 0x40;
- }
- }
-
- switch (Desc.TSFlags & X86II::FormMask) {
- case X86II::MRMSrcReg: {
- if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << 0;
- }
- break;
- }
- case X86II::MRMSrcMem: {
- if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- i = isTwoAddr ? 2 : 1;
- for (; i != NumOps; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- case X86II::MRMXm:
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
- i = isTwoAddr ? 1 : 0;
- if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- for (; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- default: {
- if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 0;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (X86InstrInfo::isX86_64ExtendedReg(MO))
- REX |= 1 << 2;
- }
- break;
- }
- }
- }
- return REX;
-}
-
-
-/// emitPCRelativeBlockAddress - This method keeps track of the information
-/// necessary to resolve the address of this block later and emits a dummy
-/// value.
-///
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
- // Remember where this reference was and where it is to so we can
- // deal with it later.
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- X86::reloc_pcrel_word, MBB));
- MCE.emitWordLE(0);
-}
-
-/// emitGlobalAddress - Emit the specified address to the code stream assuming
-/// this is part of a "take the address of a global" instruction.
-///
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
- unsigned Reloc,
- intptr_t Disp /* = 0 */,
- intptr_t PCAdj /* = 0 */,
- bool Indirect /* = false */) {
- intptr_t RelocCST = Disp;
- if (Reloc == X86::reloc_picrel_word)
- RelocCST = PICBaseOffset;
- else if (Reloc == X86::reloc_pcrel_word)
- RelocCST = PCAdj;
- MachineRelocation MR = Indirect
- ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV),
- RelocCST, false)
- : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(GV), RelocCST, false);
- MCE.addRelocation(MR);
- // The relocated value will be added to the displacement
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(Disp);
- else
- MCE.emitWordLE((int32_t)Disp);
-}
-
-/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
- unsigned Reloc) {
- intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
-
- // X86 never needs stubs because instruction selection will always pick
- // an instruction sequence that is large enough to hold any address
- // to a symbol.
- // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
- bool NeedStub = false;
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, ES, RelocCST,
- 0, NeedStub));
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(0);
- else
- MCE.emitWordLE(0);
-}
-
-/// emitConstPoolAddress - Arrange for the address of an constant pool
-/// to be emitted to the current location in the function, and allow it to be PC
-/// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
- intptr_t Disp /* = 0 */,
- intptr_t PCAdj /* = 0 */) {
- intptr_t RelocCST = 0;
- if (Reloc == X86::reloc_picrel_word)
- RelocCST = PICBaseOffset;
- else if (Reloc == X86::reloc_pcrel_word)
- RelocCST = PCAdj;
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, CPI, RelocCST));
- // The relocated value will be added to the displacement
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(Disp);
- else
- MCE.emitWordLE((int32_t)Disp);
-}
-
-/// emitJumpTableAddress - Arrange for the address of a jump table to
-/// be emitted to the current location in the function, and allow it to be PC
-/// relative.
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
- intptr_t PCAdj /* = 0 */) {
- intptr_t RelocCST = 0;
- if (Reloc == X86::reloc_picrel_word)
- RelocCST = PICBaseOffset;
- else if (Reloc == X86::reloc_pcrel_word)
- RelocCST = PCAdj;
- MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- Reloc, JTI, RelocCST));
- // The relocated value will be added to the displacement
- if (Reloc == X86::reloc_absolute_dword)
- MCE.emitDWordLE(0);
- else
- MCE.emitWordLE(0);
-}
-
-inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
- unsigned RM) {
- assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
- return RM | (RegOpcode << 3) | (Mod << 6);
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
- unsigned RegOpcodeFld){
- MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
- MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
- unsigned Index,
- unsigned Base) {
- // SIB byte is in the same format as the ModRMByte...
- MCE.emitByte(ModRMByte(SS, Index, Base));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
- // Output the constant in little endian byte order...
- for (unsigned i = 0; i != Size; ++i) {
- MCE.emitByte(Val & 255);
- Val >>= 8;
- }
-}
-
-/// isDisp8 - Return true if this signed displacement fits in a 8-bit
-/// sign-extended field.
-static bool isDisp8(int Value) {
- return Value == (signed char)Value;
-}
-
-static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
- const TargetMachine &TM) {
- // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
- // mechanism as 32-bit mode.
- if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
- !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
- return false;
-
- // Return true if this is a reference to a stub containing the address of the
- // global, not the global itself.
- return isGlobalStubReference(GVOp.getTargetFlags());
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
- int DispVal,
- intptr_t Adj /* = 0 */,
- bool IsPCRel /* = true */) {
- // If this is a simple integer displacement that doesn't require a relocation,
- // emit it now.
- if (!RelocOp) {
- emitConstant(DispVal, 4);
- return;
- }
-
- // Otherwise, this is something that requires a relocation. Emit it as such
- // now.
- unsigned RelocType = Is64BitMode ?
- (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (RelocOp->isGlobal()) {
- // In 64-bit static small code model, we could potentially emit absolute.
- // But it's probably not beneficial. If the MCE supports using RIP directly
- // do it, otherwise fallback to absolute (this is determined by IsPCRel).
- // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
- // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
- bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
- emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
- Adj, Indirect);
- } else if (RelocOp->isSymbol()) {
- emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
- } else if (RelocOp->isCPI()) {
- emitConstPoolAddress(RelocOp->getIndex(), RelocType,
- RelocOp->getOffset(), Adj);
- } else {
- assert(RelocOp->isJTI() && "Unexpected machine operand!");
- emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
- }
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
- unsigned Op,unsigned RegOpcodeField,
- intptr_t PCAdj) {
- const MachineOperand &Op3 = MI.getOperand(Op+3);
- int DispVal = 0;
- const MachineOperand *DispForReloc = nullptr;
-
- // Figure out what sort of displacement we have to handle here.
- if (Op3.isGlobal()) {
- DispForReloc = &Op3;
- } else if (Op3.isSymbol()) {
- DispForReloc = &Op3;
- } else if (Op3.isCPI()) {
- if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
- DispVal += Op3.getOffset();
- }
- } else if (Op3.isJTI()) {
- if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
- }
- } else {
- DispVal = Op3.getImm();
- }
-
- const MachineOperand &Base = MI.getOperand(Op);
- const MachineOperand &Scale = MI.getOperand(Op+1);
- const MachineOperand &IndexReg = MI.getOperand(Op+2);
-
- unsigned BaseReg = Base.getReg();
-
- // Handle %rip relative addressing.
- if (BaseReg == X86::RIP ||
- (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
- assert(IndexReg.getReg() == 0 && Is64BitMode &&
- "Invalid rip-relative address");
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
- emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
- return;
- }
-
- // Indicate that the displacement will use an pcrel or absolute reference
- // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
- // while others, unless explicit asked to use RIP, use absolute references.
- bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
-
- // Is a SIB byte needed?
- // If no BaseReg, issue a RIP relative instruction only if the MCE can
- // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
- // 2-7) and absolute references.
- unsigned BaseRegNo = -1U;
- if (BaseReg != 0 && BaseReg != X86::RIP)
- BaseRegNo = getX86RegNum(BaseReg);
-
- if (// The SIB byte must be used if there is an index register.
- IndexReg.getReg() == 0 &&
- // The SIB byte must be used if the base is ESP/RSP/R12, all of which
- // encode to an R/M value of 4, which indicates that a SIB byte is
- // present.
- BaseRegNo != N86::ESP &&
- // If there is no base register and we're in 64-bit mode, we need a SIB
- // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
- (!Is64BitMode || BaseReg != 0)) {
- if (BaseReg == 0 || // [disp32] in X86-32 mode
- BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
- emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
- return;
- }
-
- // If the base is not EBP/ESP and there is no displacement, use simple
- // indirect register encoding, this handles addresses like [EAX]. The
- // encoding for [EBP] with no displacement means [disp32] so we handle it
- // by emitting a displacement of 0 below.
- if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
- MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
- return;
- }
-
- // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
- if (!DispForReloc && isDisp8(DispVal)) {
- MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
- emitConstant(DispVal, 1);
- return;
- }
-
- // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
- MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
- emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
- return;
- }
-
- // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
- assert(IndexReg.getReg() != X86::ESP &&
- IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
- bool ForceDisp32 = false;
- bool ForceDisp8 = false;
- if (BaseReg == 0) {
- // If there is no base register, we emit the special case SIB byte with
- // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
- ForceDisp32 = true;
- } else if (DispForReloc) {
- // Emit the normal disp32 encoding.
- MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
- ForceDisp32 = true;
- } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
- // Emit no displacement ModR/M byte
- MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
- } else if (isDisp8(DispVal)) {
- // Emit the disp8 encoding...
- MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
- ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
- } else {
- // Emit the normal disp32 encoding...
- MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
- }
-
- // Calculate what the SS field value should be...
- static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
- unsigned SS = SSTable[Scale.getImm()];
-
- if (BaseReg == 0) {
- // Handle the SIB byte for the case where there is no base, see Intel
- // Manual 2A, table 2-7. The displacement has already been output.
- unsigned IndexRegNo;
- if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
- else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
- IndexRegNo = 4;
- emitSIBByte(SS, IndexRegNo, 5);
- } else {
- unsigned BaseRegNo = getX86RegNum(BaseReg);
- unsigned IndexRegNo;
- if (IndexReg.getReg())
- IndexRegNo = getX86RegNum(IndexReg.getReg());
- else
- IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
- emitSIBByte(SS, IndexRegNo, BaseRegNo);
- }
-
- // Do we need to output a displacement?
- if (ForceDisp8) {
- emitConstant(DispVal, 1);
- } else if (DispVal != 0 || ForceDisp32) {
- emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
- }
-}
-
-static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II,
- unsigned Opcode) {
- const MCInstrDesc *Desc = &II->get(Opcode);
- MI.setDesc(*Desc);
- return Desc;
-}
-
-/// Is16BitMemOperand - Return true if the specified instruction has
-/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
-static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) {
- const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
-}
-
-/// Is32BitMemOperand - Return true if the specified instruction has
-/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
-static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) {
- const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
-}
-
-/// Is64BitMemOperand - Return true if the specified instruction has
-/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
-#ifndef NDEBUG
-static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) {
- const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
- const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
-
- if ((BaseReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
- (IndexReg.getReg() != 0 &&
- X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
- return true;
- return false;
-}
-#endif
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const {
- // Emit the operand size opcode prefix as needed.
- if (((TSFlags & X86II::OpSizeMask) >> X86II::OpSizeShift) == X86II::OpSize16)
- MCE.emitByte(0x66);
-
- switch (Desc->TSFlags & X86II::OpPrefixMask) {
- case X86II::PD: // 66
- MCE.emitByte(0x66);
- break;
- case X86II::XS: // F3
- MCE.emitByte(0xF3);
- break;
- case X86II::XD: // F2
- MCE.emitByte(0xF2);
- break;
- }
-
- // Handle REX prefix.
- if (Is64BitMode) {
- if (unsigned REX = determineREX(MI))
- MCE.emitByte(0x40 | REX);
- }
-
- // 0x0F escape code must be emitted just before the opcode.
- switch (Desc->TSFlags & X86II::OpMapMask) {
- case X86II::TB: // Two-byte opcode map
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- MCE.emitByte(0x0F);
- break;
- }
-
- switch (Desc->TSFlags & X86II::OpMapMask) {
- case X86II::T8: // 0F 38
- MCE.emitByte(0x38);
- break;
- case X86II::TA: // 0F 3A
- MCE.emitByte(0x3A);
- break;
- }
-}
-
-// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
-// 0-7 and the difference between the 2 groups is given by the REX prefix.
-// In the VEX prefix, registers are seen sequencially from 0-15 and encoded
-// in 1's complement form, example:
-//
-// ModRM field => XMM9 => 1
-// VEX.VVVV => XMM9 => ~9
-//
-// See table 4-35 of Intel AVX Programming Reference for details.
-template<class CodeEmitter>
-unsigned char
-Emitter<CodeEmitter>::getVEXRegisterEncoding(const MachineInstr &MI,
- unsigned OpNum) const {
- unsigned SrcReg = MI.getOperand(OpNum).getReg();
- unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg());
- if (X86II::isX86_64ExtendedReg(SrcReg))
- SrcRegNum |= 8;
-
- // The registers represented through VEX_VVVV should
- // be encoded in 1's complement form.
- return (~SrcRegNum) & 0xf;
-}
-
-/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI) const {
- if (MemOperand < 0)
- return; // No memory operand
-
- // Check for explicit segment override on memory operand.
- switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
- default: llvm_unreachable("Unknown segment register!");
- case 0: break;
- case X86::CS: MCE.emitByte(0x2E); break;
- case X86::SS: MCE.emitByte(0x36); break;
- case X86::DS: MCE.emitByte(0x3E); break;
- case X86::ES: MCE.emitByte(0x26); break;
- case X86::FS: MCE.emitByte(0x64); break;
- case X86::GS: MCE.emitByte(0x65); break;
- }
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
- int MemOperand,
- const MachineInstr &MI,
- const MCInstrDesc *Desc) const {
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
-
- // VEX_R: opcode externsion equivalent to REX.R in
- // 1's complement (inverted) form
- //
- // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
- // 0: Same as REX_R=1 (64 bit mode only)
- //
- unsigned char VEX_R = 0x1;
-
- // VEX_X: equivalent to REX.X, only used when a
- // register is used for index in SIB Byte.
- //
- // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
- // 0: Same as REX.X=1 (64-bit mode only)
- unsigned char VEX_X = 0x1;
-
- // VEX_B:
- //
- // 1: Same as REX_B=0 (ignored in 32-bit mode)
- // 0: Same as REX_B=1 (64 bit mode only)
- //
- unsigned char VEX_B = 0x1;
-
- // VEX_W: opcode specific (use like REX.W, or used for
- // opcode extension, or ignored, depending on the opcode byte)
- unsigned char VEX_W = 0;
-
- // VEX_5M (VEX m-mmmmm field):
- //
- // 0b00000: Reserved for future use
- // 0b00001: implied 0F leading opcode
- // 0b00010: implied 0F 38 leading opcode bytes
- // 0b00011: implied 0F 3A leading opcode bytes
- // 0b00100-0b11111: Reserved for future use
- // 0b01000: XOP map select - 08h instructions with imm byte
- // 0b01001: XOP map select - 09h instructions with no imm byte
- // 0b01010: XOP map select - 0Ah instructions with imm dword
- unsigned char VEX_5M = 0;
-
- // VEX_4V (VEX vvvv field): a register specifier
- // (in 1's complement form) or 1111 if unused.
- unsigned char VEX_4V = 0xf;
-
- // VEX_L (Vector Length):
- //
- // 0: scalar or 128-bit vector
- // 1: 256-bit vector
- //
- unsigned char VEX_L = 0;
-
- // VEX_PP: opcode extension providing equivalent
- // functionality of a SIMD prefix
- //
- // 0b00: None
- // 0b01: 66
- // 0b10: F3
- // 0b11: F2
- //
- unsigned char VEX_PP = 0;
-
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
- VEX_W = 1;
-
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
- VEX_L = 1;
-
- switch (TSFlags & X86II::OpPrefixMask) {
- default: break; // VEX_PP already correct
- case X86II::PD: VEX_PP = 0x1; break; // 66
- case X86II::XS: VEX_PP = 0x2; break; // F3
- case X86II::XD: VEX_PP = 0x3; break; // F2
- }
-
- switch (TSFlags & X86II::OpMapMask) {
- default: llvm_unreachable("Invalid prefix!");
- case X86II::TB: VEX_5M = 0x1; break; // 0F
- case X86II::T8: VEX_5M = 0x2; break; // 0F 38
- case X86II::TA: VEX_5M = 0x3; break; // 0F 3A
- case X86II::XOP8: VEX_5M = 0x8; break;
- case X86II::XOP9: VEX_5M = 0x9; break;
- case X86II::XOPA: VEX_5M = 0xA; break;
- }
-
- // Classify VEX_B, VEX_4V, VEX_R, VEX_X
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
- ++CurOp;
- else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
- assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
- // Special case for GATHER with 2 TIED_TO operands
- // Skip the first 2 operands: dst, mask_wb
- CurOp += 2;
- }
-
- switch (TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
- case X86II::RawFrm:
- break;
- case X86II::MRMDestMem: {
- // MRMDestMem instructions forms:
- // MemAddr, src1(ModR/M)
- // MemAddr, src1(VEX_4V), src2(ModR/M)
- // MemAddr, src1(ModR/M), imm8
- //
- if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
- VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
- VEX_X = 0x0;
-
- CurOp = X86::AddrNumOperands;
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- const MachineOperand &MO = MI.getOperand(CurOp);
- if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
- VEX_R = 0x0;
- break;
- }
- case X86II::MRMSrcMem:
- // MRMSrcMem instructions forms:
- // src1(ModR/M), MemAddr
- // src1(ModR/M), src2(VEX_4V), MemAddr
- // src1(ModR/M), MemAddr, imm8
- // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
- //
- // FMA4:
- // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
- // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_R = 0x0;
- CurOp++;
-
- if (HasVEX_4V) {
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- CurOp++;
- }
-
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
- VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
- VEX_X = 0x0;
-
- if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
- break;
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
- // MRM[0-9]m instructions forms:
- // MemAddr
- // src1(VEX_4V), MemAddr
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
- VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(
- MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
- VEX_X = 0x0;
- break;
- }
- case X86II::MRMSrcReg:
- // MRMSrcReg instructions forms:
- // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
- // dst(ModR/M), src1(ModR/M)
- // dst(ModR/M), src1(ModR/M), imm8
- //
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_R = 0x0;
- CurOp++;
-
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- if (HasMemOp4) // Skip second register source (encoded in I8IMM)
- CurOp++;
-
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0x0;
- CurOp++;
- if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- break;
- case X86II::MRMDestReg:
- // MRMDestReg instructions forms:
- // dst(ModR/M), src(ModR/M)
- // dst(ModR/M), src(ModR/M), imm8
- // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0x0;
- CurOp++;
-
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
-
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_R = 0x0;
- break;
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- // MRM0r-MRM7r instructions forms:
- // dst(VEX_4V), src(ModR/M), imm8
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- CurOp++;
-
- if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
- VEX_B = 0x0;
- break;
- }
-
- // Emit segment override opcode prefix as needed.
- emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
-
- // VEX opcode prefix can have 2 or 3 bytes
- //
- // 3 bytes:
- // +-----+ +--------------+ +-------------------+
- // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
- // +-----+ +--------------+ +-------------------+
- // 2 bytes:
- // +-----+ +-------------------+
- // | C5h | | R | vvvv | L | pp |
- // +-----+ +-------------------+
- //
- // XOP uses a similar prefix:
- // +-----+ +--------------+ +-------------------+
- // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp |
- // +-----+ +--------------+ +-------------------+
- unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
-
- // Can this use the 2 byte VEX prefix?
- if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
- MCE.emitByte(0xC5);
- MCE.emitByte(LastByte | (VEX_R << 7));
- return;
- }
-
- // 3 byte VEX prefix
- MCE.emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4);
- MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M);
- MCE.emitByte(LastByte | (VEX_W << 7));
-}
-
-template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
- const MCInstrDesc *Desc) {
- DEBUG(dbgs() << MI);
-
- // If this is a pseudo instruction, lower it.
- switch (Desc->getOpcode()) {
- case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;
- case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;
- case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;
- case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;
- case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;
- case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;
- case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;
- case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;
- case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;
- case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;
- case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
- case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
- case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
- case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;
- case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
- case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
- case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;
- }
-
-
- MCE.processDebugLoc(MI.getDebugLoc(), true);
-
- unsigned Opcode = Desc->Opcode;
-
- // If this is a two-address instruction, skip one of the register operands.
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
- ++CurOp;
- else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
- assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
- // Special case for GATHER with 2 TIED_TO operands
- // Skip the first 2 operands: dst, mask_wb
- CurOp += 2;
- }
-
- uint64_t TSFlags = Desc->TSFlags;
-
- // Encoding type for this instruction.
- unsigned char Encoding = (TSFlags & X86II::EncodingMask) >>
- X86II::EncodingShift;
-
- // It uses the VEX.VVVV field?
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
- const unsigned MemOp4_I8IMMOperand = 2;
-
- // Determine where the memory operand starts, if present.
- int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
- if (MemoryOperand != -1) MemoryOperand += CurOp;
-
- // Emit the lock opcode prefix as needed.
- if (Desc->TSFlags & X86II::LOCK)
- MCE.emitByte(0xF0);
-
- // Emit segment override opcode prefix as needed.
- emitSegmentOverridePrefix(TSFlags, MemoryOperand, MI);
-
- // Emit the repeat opcode prefix as needed.
- if (Desc->TSFlags & X86II::REP)
- MCE.emitByte(0xF3);
-
- // Emit the address size opcode prefix as needed.
- bool need_address_override;
- if (TSFlags & X86II::AdSize) {
- need_address_override = true;
- } else if (MemoryOperand < 0) {
- need_address_override = false;
- } else if (Is64BitMode) {
- assert(!Is16BitMemOperand(MI, MemoryOperand));
- need_address_override = Is32BitMemOperand(MI, MemoryOperand);
- } else {
- assert(!Is64BitMemOperand(MI, MemoryOperand));
- need_address_override = Is16BitMemOperand(MI, MemoryOperand);
- }
-
- if (need_address_override)
- MCE.emitByte(0x67);
-
- if (Encoding == 0)
- emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
- else
- emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
-
- unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
- switch (TSFlags & X86II::FormMask) {
- default:
- llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
- case X86II::Pseudo:
- // Remember the current PC offset, this is the PIC relocation
- // base address.
- switch (Opcode) {
- default:
- llvm_unreachable("pseudo instructions should be removed before code"
- " emission");
- // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
- // to make it slightly easier to see.
- case X86::Int_MemBarrier:
- DEBUG(dbgs() << "#MEMBARRIER\n");
- break;
-
- case TargetOpcode::INLINEASM:
- // We allow inline assembler nodes with empty bodies - they can
- // implicitly define registers, which is ok for JIT.
- if (MI.getOperand(0).getSymbolName()[0]) {
- DebugLoc DL = MI.getDebugLoc();
- DL.print(MI.getParent()->getParent()->getFunction()->getContext(),
- llvm::errs());
- report_fatal_error("JIT does not support inline asm!");
- }
- break;
- case TargetOpcode::DBG_VALUE:
- case TargetOpcode::CFI_INSTRUCTION:
- break;
- case TargetOpcode::GC_LABEL:
- case TargetOpcode::EH_LABEL:
- MCE.emitLabel(MI.getOperand(0).getMCSymbol());
- break;
-
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- break;
-
- case X86::SEH_PushReg:
- case X86::SEH_SaveReg:
- case X86::SEH_SaveXMM:
- case X86::SEH_StackAlloc:
- case X86::SEH_SetFrame:
- case X86::SEH_PushFrame:
- case X86::SEH_EndPrologue:
- break;
-
- case X86::MOVPC32r: {
- // This emits the "call" portion of this pseudo instruction.
- MCE.emitByte(BaseOpcode);
- emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
- // Remember PIC base.
- PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
- X86JITInfo *JTI = TM.getJITInfo();
- JTI->setPICBase(MCE.getCurrentPCValue());
- break;
- }
- }
- CurOp = NumOps;
- break;
- case X86II::RawFrm: {
- MCE.emitByte(BaseOpcode);
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO = MI.getOperand(CurOp++);
-
- DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
- DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
- DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
- DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
- DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
-
- if (MO.isMBB()) {
- emitPCRelativeBlockAddress(MO.getMBB());
- break;
- }
-
- if (MO.isGlobal()) {
- emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
- MO.getOffset(), 0);
- break;
- }
-
- if (MO.isSymbol()) {
- emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
- break;
- }
-
- // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
- if (MO.isJTI()) {
- emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
- break;
- }
-
- assert(MO.isImm() && "Unknown RawFrm operand!");
- if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
- // Fix up immediate operand for pc relative calls.
- intptr_t Imm = (intptr_t)MO.getImm();
- Imm = Imm - MCE.getCurrentPCValue() - 4;
- emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
- } else
- emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
- break;
- }
-
- case X86II::AddRegFrm: {
- MCE.emitByte(BaseOpcode +
- getX86RegNum(MI.getOperand(CurOp++).getReg()));
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm()) {
- emitConstant(MO1.getImm(), Size);
- break;
- }
-
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV32ri64)
- rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
- // This should not occur on Darwin for relocatable objects.
- if (Opcode == X86::MOV64ri)
- rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
- if (MO1.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
- emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- Indirect);
- } else if (MO1.isSymbol())
- emitExternalSymbolAddress(MO1.getSymbolName(), rt);
- else if (MO1.isCPI())
- emitConstPoolAddress(MO1.getIndex(), rt);
- else if (MO1.isJTI())
- emitJumpTableAddress(MO1.getIndex(), rt);
- break;
- }
-
- case X86II::MRMDestReg: {
- MCE.emitByte(BaseOpcode);
-
- unsigned SrcRegNum = CurOp+1;
- if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- SrcRegNum++;
-
- emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
- CurOp = SrcRegNum + 1;
- break;
- }
- case X86II::MRMDestMem: {
- MCE.emitByte(BaseOpcode);
-
- unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
- if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- SrcRegNum++;
- emitMemModRMByte(MI, CurOp,
- getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
- CurOp = SrcRegNum + 1;
- break;
- }
-
- case X86II::MRMSrcReg: {
- MCE.emitByte(BaseOpcode);
-
- unsigned SrcRegNum = CurOp+1;
- if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
- ++SrcRegNum;
-
- if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
- ++SrcRegNum;
-
- emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(),
- getX86RegNum(MI.getOperand(CurOp).getReg()));
- // 2 operands skipped with HasMemOp4, compensate accordingly
- CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
- if (HasVEX_4VOp3)
- ++CurOp;
- break;
- }
- case X86II::MRMSrcMem: {
- int AddrOperands = X86::AddrNumOperands;
- unsigned FirstMemOp = CurOp+1;
- if (HasVEX_4V) {
- ++AddrOperands;
- ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
- }
- if (HasMemOp4) // Skip second register source (encoded in I8IMM)
- ++FirstMemOp;
-
- MCE.emitByte(BaseOpcode);
-
- intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
- X86II::getSizeOfImm(Desc->TSFlags) : 0;
- emitMemModRMByte(MI, FirstMemOp,
- getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
- CurOp += AddrOperands + 1;
- if (HasVEX_4VOp3)
- ++CurOp;
- break;
- }
-
- case X86II::MRMXr:
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r: {
- if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
- ++CurOp;
- MCE.emitByte(BaseOpcode);
- uint64_t Form = (Desc->TSFlags & X86II::FormMask);
- emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
- (Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r);
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm()) {
- emitConstant(MO1.getImm(), Size);
- break;
- }
-
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64ri32)
- rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
- if (MO1.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
- emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- Indirect);
- } else if (MO1.isSymbol())
- emitExternalSymbolAddress(MO1.getSymbolName(), rt);
- else if (MO1.isCPI())
- emitConstPoolAddress(MO1.getIndex(), rt);
- else if (MO1.isJTI())
- emitJumpTableAddress(MO1.getIndex(), rt);
- break;
- }
-
- case X86II::MRMXm:
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
- if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
- ++CurOp;
- intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
- (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
- X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
-
- MCE.emitByte(BaseOpcode);
- uint64_t Form = (Desc->TSFlags & X86II::FormMask);
- emitMemModRMByte(MI, CurOp, (Form==X86II::MRMXm) ? 0 : Form - X86II::MRM0m,
- PCAdj);
- CurOp += X86::AddrNumOperands;
-
- if (CurOp == NumOps)
- break;
-
- const MachineOperand &MO = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO.isImm()) {
- emitConstant(MO.getImm(), Size);
- break;
- }
-
- unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
- : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
- if (Opcode == X86::MOV64mi32)
- rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
- if (MO.isGlobal()) {
- bool Indirect = gvNeedsNonLazyPtr(MO, TM);
- emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- Indirect);
- } else if (MO.isSymbol())
- emitExternalSymbolAddress(MO.getSymbolName(), rt);
- else if (MO.isCPI())
- emitConstPoolAddress(MO.getIndex(), rt);
- else if (MO.isJTI())
- emitJumpTableAddress(MO.getIndex(), rt);
- break;
- }
-
- case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
- case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
- case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E0: case X86II::MRM_E1:
- case X86II::MRM_E2: case X86II::MRM_E3: case X86II::MRM_E4:
- case X86II::MRM_E5: case X86II::MRM_E8: case X86II::MRM_E9:
- case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
- case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_F0:
- case X86II::MRM_F1: case X86II::MRM_F2: case X86II::MRM_F3:
- case X86II::MRM_F4: case X86II::MRM_F5: case X86II::MRM_F6:
- case X86II::MRM_F7: case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_FA: case X86II::MRM_FB: case X86II::MRM_FC:
- case X86II::MRM_FD: case X86II::MRM_FE: case X86II::MRM_FF:
- MCE.emitByte(BaseOpcode);
-
- unsigned char MRM;
- switch (TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Invalid Form");
- case X86II::MRM_C0: MRM = 0xC0; break;
- case X86II::MRM_C1: MRM = 0xC1; break;
- case X86II::MRM_C2: MRM = 0xC2; break;
- case X86II::MRM_C3: MRM = 0xC3; break;
- case X86II::MRM_C4: MRM = 0xC4; break;
- case X86II::MRM_C8: MRM = 0xC8; break;
- case X86II::MRM_C9: MRM = 0xC9; break;
- case X86II::MRM_CA: MRM = 0xCA; break;
- case X86II::MRM_CB: MRM = 0xCB; break;
- case X86II::MRM_D0: MRM = 0xD0; break;
- case X86II::MRM_D1: MRM = 0xD1; break;
- case X86II::MRM_D4: MRM = 0xD4; break;
- case X86II::MRM_D5: MRM = 0xD5; break;
- case X86II::MRM_D6: MRM = 0xD6; break;
- case X86II::MRM_D8: MRM = 0xD8; break;
- case X86II::MRM_D9: MRM = 0xD9; break;
- case X86II::MRM_DA: MRM = 0xDA; break;
- case X86II::MRM_DB: MRM = 0xDB; break;
- case X86II::MRM_DC: MRM = 0xDC; break;
- case X86II::MRM_DD: MRM = 0xDD; break;
- case X86II::MRM_DE: MRM = 0xDE; break;
- case X86II::MRM_DF: MRM = 0xDF; break;
- case X86II::MRM_E0: MRM = 0xE0; break;
- case X86II::MRM_E1: MRM = 0xE1; break;
- case X86II::MRM_E2: MRM = 0xE2; break;
- case X86II::MRM_E3: MRM = 0xE3; break;
- case X86II::MRM_E4: MRM = 0xE4; break;
- case X86II::MRM_E5: MRM = 0xE5; break;
- case X86II::MRM_E8: MRM = 0xE8; break;
- case X86II::MRM_E9: MRM = 0xE9; break;
- case X86II::MRM_EA: MRM = 0xEA; break;
- case X86II::MRM_EB: MRM = 0xEB; break;
- case X86II::MRM_EC: MRM = 0xEC; break;
- case X86II::MRM_ED: MRM = 0xED; break;
- case X86II::MRM_EE: MRM = 0xEE; break;
- case X86II::MRM_F0: MRM = 0xF0; break;
- case X86II::MRM_F1: MRM = 0xF1; break;
- case X86II::MRM_F2: MRM = 0xF2; break;
- case X86II::MRM_F3: MRM = 0xF3; break;
- case X86II::MRM_F4: MRM = 0xF4; break;
- case X86II::MRM_F5: MRM = 0xF5; break;
- case X86II::MRM_F6: MRM = 0xF6; break;
- case X86II::MRM_F7: MRM = 0xF7; break;
- case X86II::MRM_F8: MRM = 0xF8; break;
- case X86II::MRM_F9: MRM = 0xF9; break;
- case X86II::MRM_FA: MRM = 0xFA; break;
- case X86II::MRM_FB: MRM = 0xFB; break;
- case X86II::MRM_FC: MRM = 0xFC; break;
- case X86II::MRM_FD: MRM = 0xFD; break;
- case X86II::MRM_FE: MRM = 0xFE; break;
- case X86II::MRM_FF: MRM = 0xFF; break;
- }
- MCE.emitByte(MRM);
- break;
- }
-
- while (CurOp != NumOps && NumOps - CurOp <= 2) {
- // The last source register of a 4 operand instruction in AVX is encoded
- // in bits[7:4] of a immediate byte.
- if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
- const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
- : CurOp);
- ++CurOp;
- unsigned RegNum = getX86RegNum(MO.getReg()) << 4;
- if (X86II::isX86_64ExtendedReg(MO.getReg()))
- RegNum |= 1 << 7;
- // If there is an additional 5th operand it must be an immediate, which
- // is encoded in bits[3:0]
- if (CurOp != NumOps) {
- const MachineOperand &MIMM = MI.getOperand(CurOp++);
- if (MIMM.isImm()) {
- unsigned Val = MIMM.getImm();
- assert(Val < 16 && "Immediate operand value out of range");
- RegNum |= Val;
- }
- }
- emitConstant(RegNum, 1);
- } else {
- emitConstant(MI.getOperand(CurOp++).getImm(),
- X86II::getSizeOfImm(Desc->TSFlags));
- }
- }
-
- if (!MI.isVariadic() && CurOp != NumOps) {
-#ifndef NDEBUG
- dbgs() << "Cannot encode all operands of: " << MI << "\n";
-#endif
- llvm_unreachable(nullptr);
- }
-
- MCE.processDebugLoc(MI.getDebugLoc(), false);
-}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ce554ba..95cb718 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -64,7 +64,7 @@ public:
X86ScalarSSEf32 = Subtarget->hasSSE1();
}
- bool TargetSelectInstruction(const Instruction *I) override;
+ bool fastSelectInstruction(const Instruction *I) override;
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
@@ -73,7 +73,9 @@ public:
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
- bool FastLowerArguments() override;
+ bool fastLowerArguments() override;
+ bool fastLowerCall(CallLoweringInfo &CLI) override;
+ bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
#include "X86GenFastISel.inc"
@@ -124,13 +126,8 @@ private:
bool X86SelectFPExt(const Instruction *I);
bool X86SelectFPTrunc(const Instruction *I);
- bool X86VisitIntrinsicCall(const IntrinsicInst &I);
- bool X86SelectCall(const Instruction *I);
-
- bool DoSelectCall(const Instruction *I, const char *MemIntName);
-
const X86InstrInfo *getInstrInfo() const {
- return getTargetMachine()->getInstrInfo();
+ return getTargetMachine()->getSubtargetImpl()->getInstrInfo();
}
const X86TargetMachine *getTargetMachine() const {
return static_cast<const X86TargetMachine *>(&TM);
@@ -138,11 +135,14 @@ private:
bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
- unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
+ unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned X86MaterializeGV(const GlobalValue *GV,MVT VT);
+ unsigned fastMaterializeConstant(const Constant *C) override;
- unsigned TargetMaterializeAlloca(const AllocaInst *C) override;
+ unsigned fastMaterializeAlloca(const AllocaInst *C) override;
- unsigned TargetMaterializeFloatZero(const ConstantFP *CF) override;
+ unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
@@ -164,46 +164,6 @@ private:
} // end anonymous namespace.
-static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) {
- // If both operands are the same, then try to optimize or fold the cmp.
- CmpInst::Predicate Predicate = CI->getPredicate();
- if (CI->getOperand(0) != CI->getOperand(1))
- return Predicate;
-
- switch (Predicate) {
- default: llvm_unreachable("Invalid predicate!");
- case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
- case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
- case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
-
- case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
- case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
- case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
- }
-
- return Predicate;
-}
-
static std::pair<X86::CondCode, bool>
getX86ConditionCode(CmpInst::Predicate Predicate) {
X86::CondCode CC = X86::COND_INVALID;
@@ -532,7 +492,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned Src, EVT SrcVT,
unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
if (RR == 0)
return false;
@@ -996,8 +956,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
- I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
@@ -1020,7 +979,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// The calling-convention tables for x87 returns don't tell
// the whole story.
- if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
return false;
unsigned SrcReg = Reg + VA.getValNo();
@@ -1039,12 +998,12 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (SrcVT == MVT::i1) {
if (Outs[0].Flags.isSExt())
return false;
- SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
}
unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
ISD::SIGN_EXTEND;
- SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
SrcReg, /*TODO: Kill=*/false);
}
@@ -1107,7 +1066,7 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1197,7 +1156,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
ResultReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
ResultReg);
- ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;
@@ -1212,7 +1171,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
}
if (ResultReg) {
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1253,7 +1212,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
FlagReg2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
ResultReg).addReg(FlagReg1).addReg(FlagReg2);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1271,7 +1230,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1288,7 +1247,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
if (SrcVT.SimpleTy == MVT::i1) {
// Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
if (ResultReg == 0)
@@ -1315,13 +1274,13 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
ResultReg)
.addImm(0).addReg(Result32).addImm(X86::sub_32bit);
} else if (DstVT != MVT::i8) {
- ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
ResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
}
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1345,8 +1304,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
switch (Predicate) {
default: break;
- case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true;
- case CmpInst::FCMP_TRUE: FastEmitBranch(TrueMBB, DbgLoc); return true;
+ case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
+ case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
}
const Value *CmpLHS = CI->getOperand(0);
@@ -1416,7 +1375,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Emits an unconditional branch to the FalseBB, obtains the branch
// weight, and adds it to the successor list.
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
return true;
}
@@ -1448,7 +1407,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
@@ -1468,7 +1427,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
@@ -1487,7 +1446,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
.addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DbgLoc);
+ fastEmitBranch(FalseMBB, DbgLoc);
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
@@ -1561,7 +1520,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
.addReg(Op0Reg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1715,7 +1674,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
ResultSuperReg).addReg(SourceSuperReg).addImm(8);
// Now reference the 8-bit subreg of the result.
- ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
/*Kill=*/true, X86::sub_8bit);
}
// Copy the result out of the physreg if we haven't already.
@@ -1724,7 +1683,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
.addReg(OpEntry.DivRemResultReg);
}
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1840,9 +1799,9 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return false;
unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
- unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+ unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
LHSReg, LHSIsKill);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1920,15 +1879,15 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
return false;
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
LHSReg, LHSIsKill);
- unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
RHSReg, RHSIsKill);
- unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
AndReg, /*IsKill=*/true);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -1991,8 +1950,8 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
unsigned ResultReg =
- FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
- UpdateValueMap(I, ResultReg);
+ fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -2021,7 +1980,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(OpReg, getKillRegState(OpIsKill));
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -2054,7 +2013,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::CVTSS2SDrr), ResultReg)
.addReg(OpReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -2073,7 +2032,7 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::CVTSD2SSrr), ResultReg)
.addReg(OpReg);
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
}
@@ -2099,7 +2058,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
if (SrcVT == MVT::i8) {
// Truncate from i8 to i1; no code needed.
- UpdateValueMap(I, InputReg);
+ updateValueMap(I, InputReg);
return true;
}
@@ -2116,13 +2075,13 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
}
// Issue an extract_subreg.
- unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+ unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
InputReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;
- UpdateValueMap(I, ResultReg);
+ updateValueMap(I, ResultReg);
return true;
}
@@ -2166,24 +2125,12 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
return true;
}
-static bool isCommutativeIntrinsic(IntrinsicInst const &I) {
- switch (I.getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- return true;
- default:
- return false;
- }
-}
-
-bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
+bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// FIXME: Handle more intrinsics.
- switch (I.getIntrinsicID()) {
+ switch (II->getIntrinsicID()) {
default: return false;
case Intrinsic::frameaddress: {
- Type *RetTy = I.getCalledFunction()->getReturnType();
+ Type *RetTy = II->getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
@@ -2203,8 +2150,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF));
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
@@ -2223,7 +2170,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// movq (%rax), %rax
// ...
unsigned DestReg;
- unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
+ unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
while (Depth--) {
DestReg = createResultReg(RC);
addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -2231,23 +2178,23 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
SrcReg = DestReg;
}
- UpdateValueMap(&I, SrcReg);
+ updateValueMap(II, SrcReg);
return true;
}
case Intrinsic::memcpy: {
- const MemCpyInst &MCI = cast<MemCpyInst>(I);
+ const MemCpyInst *MCI = cast<MemCpyInst>(II);
// Don't handle volatile or variable length memcpys.
- if (MCI.isVolatile())
+ if (MCI->isVolatile())
return false;
- if (isa<ConstantInt>(MCI.getLength())) {
+ if (isa<ConstantInt>(MCI->getLength())) {
// Small memcpy's are common enough that we want to do them
// without a call if possible.
- uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+ uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
if (IsMemcpySmall(Len)) {
X86AddressMode DestAM, SrcAM;
- if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
- !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI->getRawSource(), SrcAM))
return false;
TryEmitSmallMemcpy(DestAM, SrcAM, Len);
return true;
@@ -2255,35 +2202,35 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
- if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
+ if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
return false;
- if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
+ if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
return false;
- return DoSelectCall(&I, "memcpy");
+ return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
}
case Intrinsic::memset: {
- const MemSetInst &MSI = cast<MemSetInst>(I);
+ const MemSetInst *MSI = cast<MemSetInst>(II);
- if (MSI.isVolatile())
+ if (MSI->isVolatile())
return false;
unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
- if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
+ if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
return false;
- if (MSI.getDestAddressSpace() > 255)
+ if (MSI->getDestAddressSpace() > 255)
return false;
- return DoSelectCall(&I, "memset");
+ return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
}
case Intrinsic::stackprotector: {
// Emit code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
- const Value *Op1 = I.getArgOperand(0); // The guard's value.
- const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+ const Value *Op1 = II->getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
@@ -2294,7 +2241,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return true;
}
case Intrinsic::dbg_declare: {
- const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
X86AddressMode AM;
assert(DI->getAddress() && "Null address should be checked earlier!");
if (!X86SelectAddress(DI->getAddress(), AM))
@@ -2302,8 +2249,10 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM).
- addImm(0).addMetadata(DI->getVariable());
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
+ .addImm(0)
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
return true;
}
case Intrinsic::trap: {
@@ -2314,13 +2263,13 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
if (!Subtarget->hasSSE1())
return false;
- Type *RetTy = I.getCalledFunction()->getReturnType();
+ Type *RetTy = II->getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
- // Unfortunately we can't use FastEmit_r, because the AVX version of FSQRT
+ // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
// is not generated by FastISel yet.
// FIXME: Update this code once tablegen can handle it.
static const unsigned SqrtOpc[2][2] = {
@@ -2336,7 +2285,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
}
- const Value *SrcVal = I.getArgOperand(0);
+ const Value *SrcVal = II->getArgOperand(0);
unsigned SrcReg = getRegForValue(SrcVal);
if (SrcReg == 0)
@@ -2359,7 +2308,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
MIB.addReg(SrcReg);
- UpdateValueMap(&I, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
case Intrinsic::sadd_with_overflow:
@@ -2370,7 +2319,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
case Intrinsic::umul_with_overflow: {
// This implements the basic lowering of the xalu with overflow intrinsics
// into add/sub/mul followed by either seto or setb.
- const Function *Callee = I.getCalledFunction();
+ const Function *Callee = II->getCalledFunction();
auto *Ty = cast<StructType>(Callee->getReturnType());
Type *RetTy = Ty->getTypeAtIndex(0U);
Type *CondTy = Ty->getTypeAtIndex(1);
@@ -2382,23 +2331,31 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
if (VT < MVT::i8 || VT > MVT::i64)
return false;
- const Value *LHS = I.getArgOperand(0);
- const Value *RHS = I.getArgOperand(1);
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
- isCommutativeIntrinsic(I))
+ isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
+ bool UseIncDec = false;
+ if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
+ UseIncDec = true;
+
unsigned BaseOpc, CondOpc;
- switch (I.getIntrinsicID()) {
+ switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
+ BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
+ CondOpc = X86::SETOr;
+ break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
+ BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
+ CondOpc = X86::SETOr;
+ break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
@@ -2414,9 +2371,24 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
unsigned ResultReg = 0;
// Check if we have an immediate version.
- if (auto const *C = dyn_cast<ConstantInt>(RHS)) {
- ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
- C->getZExtValue());
+ if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
+ static const unsigned Opc[2][2][4] = {
+ { { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
+ { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } },
+ { { X86::INC8r, X86::INC64_16r, X86::INC64_32r, X86::INC64r },
+ { X86::DEC8r, X86::DEC64_16r, X86::DEC64_32r, X86::DEC64r } }
+ };
+
+ if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsDec = BaseOpc == X86ISD::DEC;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc[Is64Bit][IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ } else
+ ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
+ CI->getZExtValue());
}
unsigned RHSReg;
@@ -2426,7 +2398,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
if (RHSReg == 0)
return false;
RHSIsKill = hasTrivialKill(RHS);
- ResultReg = FastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
+ ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
RHSIsKill);
}
@@ -2441,7 +2413,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
.addReg(LHSReg, getKillRegState(LHSIsKill));
- ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
+ ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
} else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
static const unsigned MULOpc[] =
@@ -2452,10 +2424,10 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), X86::AL)
.addReg(LHSReg, getKillRegState(LHSIsKill));
- ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
+ ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
RHSIsKill);
} else
- ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
+ ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
RHSReg, RHSIsKill);
}
@@ -2468,7 +2440,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
ResultReg2);
- UpdateValueMap(&I, ResultReg, 2);
+ updateValueMap(II, ResultReg, 2);
return true;
}
case Intrinsic::x86_sse_cvttss2si:
@@ -2476,7 +2448,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64: {
bool IsInputDouble;
- switch (I.getIntrinsicID()) {
+ switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic.");
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
@@ -2492,7 +2464,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
break;
}
- Type *RetTy = I.getCalledFunction()->getReturnType();
+ Type *RetTy = II->getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
@@ -2512,7 +2484,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
// Check if we can fold insertelement instructions into the convert.
- const Value *Op = I.getArgOperand(0);
+ const Value *Op = II->getArgOperand(0);
while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
const Value *Index = IE->getOperand(2);
if (!isa<ConstantInt>(Index))
@@ -2534,13 +2506,13 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Reg);
- UpdateValueMap(&I, ResultReg);
+ updateValueMap(II, ResultReg);
return true;
}
}
}
-bool X86FastISel::FastLowerArguments() {
+bool X86FastISel::fastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
@@ -2630,58 +2602,57 @@ bool X86FastISel::FastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
- UpdateValueMap(&Arg, ResultReg);
+ updateValueMap(&Arg, ResultReg);
}
return true;
}
-bool X86FastISel::X86SelectCall(const Instruction *I) {
- const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = CI->getCalledValue();
-
- // Can't handle inline asm yet.
- if (isa<InlineAsm>(Callee))
- return false;
-
- // Handle intrinsic calls.
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
- return X86VisitIntrinsicCall(*II);
-
- // Allow SelectionDAG isel to handle tail calls.
- if (cast<CallInst>(I)->isTailCall())
- return false;
-
- return DoSelectCall(I, nullptr);
-}
-
-static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
- const ImmutableCallSite &CS) {
- if (Subtarget.is64Bit())
+static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
+ CallingConv::ID CC,
+ ImmutableCallSite *CS) {
+ if (Subtarget->is64Bit())
return 0;
- if (Subtarget.getTargetTriple().isOSMSVCRT())
+ if (Subtarget->getTargetTriple().isOSMSVCRT())
return 0;
- CallingConv::ID CC = CS.getCallingConv();
- if (CC == CallingConv::Fast || CC == CallingConv::GHC)
+ if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::HiPE)
return 0;
- if (!CS.paramHasAttr(1, Attribute::StructRet))
+ if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
return 0;
- if (CS.paramHasAttr(1, Attribute::InReg))
+ if (CS && CS->paramHasAttr(1, Attribute::InReg))
return 0;
return 4;
}
-// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
-bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
- const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = CI->getCalledValue();
-
- // Handle only C and fastcc calling conventions for now.
- ImmutableCallSite CS(CI);
- CallingConv::ID CC = CS.getCallingConv();
- bool isWin64 = Subtarget->isCallingConvWin64(CC);
- if (CC != CallingConv::C && CC != CallingConv::Fast &&
- CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 &&
- CC != CallingConv::X86_64_SysV)
+bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
+ auto &OutVals = CLI.OutVals;
+ auto &OutFlags = CLI.OutFlags;
+ auto &OutRegs = CLI.OutRegs;
+ auto &Ins = CLI.Ins;
+ auto &InRegs = CLI.InRegs;
+ CallingConv::ID CC = CLI.CallConv;
+ bool &IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ const char *SymName = CLI.SymName;
+
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isCallingConvWin64(CC);
+
+ // Handle only C, fastcc, and webkit_js calling conventions for now.
+ switch (CC) {
+ default: return false;
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::WebKit_JS:
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_64_Win64:
+ case CallingConv::X86_64_SysV:
+ break;
+ }
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
return false;
// fastcc with -tailcallopt is intended to provide a guaranteed
@@ -2689,150 +2660,77 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- bool isVarArg = FTy->isVarArg();
-
// Don't know how to handle Win64 varargs yet. Nothing special needed for
- // x86-32. Special handling for x86-64 is implemented.
- if (isVarArg && isWin64)
+ // x86-32. Special handling for x86-64 is implemented.
+ if (IsVarArg && IsWin64)
return false;
// Don't know about inalloca yet.
- if (CS.hasInAllocaArgument())
+ if (CLI.CS && CLI.CS->hasInAllocaArgument())
return false;
// Fast-isel doesn't know about callee-pop yet.
- if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+ if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
TM.Options.GuaranteedTailCallOpt))
return false;
- // Check whether the function can return without sret-demotion.
- SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
- bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- *FuncInfo.MF, FTy->isVarArg(),
- Outs, FTy->getContext());
- if (!CanLowerReturn)
- return false;
-
- // Materialize callee address in a register. FIXME: GV address can be
- // handled with a CALLpcrel32 instead.
- X86AddressMode CalleeAM;
- if (!X86SelectCallAddress(Callee, CalleeAM))
- return false;
- unsigned CalleeOp = 0;
- const GlobalValue *GV = nullptr;
- if (CalleeAM.GV != nullptr) {
- GV = CalleeAM.GV;
- } else if (CalleeAM.Base.Reg != 0) {
- CalleeOp = CalleeAM.Base.Reg;
- } else
- return false;
-
- // Deal with call operands first.
- SmallVector<const Value *, 8> ArgVals;
- SmallVector<unsigned, 8> Args;
- SmallVector<MVT, 8> ArgVTs;
- SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- unsigned arg_size = CS.arg_size();
- Args.reserve(arg_size);
- ArgVals.reserve(arg_size);
- ArgVTs.reserve(arg_size);
- ArgFlags.reserve(arg_size);
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
- // If we're lowering a mem intrinsic instead of a regular call, skip the
- // last two arguments, which should not passed to the underlying functions.
- if (MemIntName && e-i <= 2)
- break;
- Value *ArgVal = *i;
- ISD::ArgFlagsTy Flags;
- unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
- Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
- Flags.setZExt();
-
- if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
- PointerType *Ty = cast<PointerType>(ArgVal->getType());
- Type *ElementTy = Ty->getElementType();
- unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
- unsigned FrameAlign = CS.getParamAlignment(AttrInd);
- if (!FrameAlign)
- FrameAlign = TLI.getByValTypeAlignment(ElementTy);
- Flags.setByVal();
- Flags.setByValSize(FrameSize);
- Flags.setByValAlign(FrameAlign);
- if (!IsMemcpySmall(FrameSize))
- return false;
- }
-
- if (CS.paramHasAttr(AttrInd, Attribute::InReg))
- Flags.setInReg();
- if (CS.paramHasAttr(AttrInd, Attribute::Nest))
- Flags.setNest();
-
- // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
- // instruction. This is safe because it is common to all fastisel supported
- // calling conventions on x86.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
- if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
- CI->getBitWidth() == 16) {
+ // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all FastISel supported
+ // calling conventions on x86.
+ for (int i = 0, e = OutVals.size(); i != e; ++i) {
+ Value *&Val = OutVals[i];
+ ISD::ArgFlagsTy Flags = OutFlags[i];
+ if (auto *CI = dyn_cast<ConstantInt>(Val)) {
+ if (CI->getBitWidth() < 32) {
if (Flags.isSExt())
- ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+ Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
else
- ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+ Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
}
}
- unsigned ArgReg;
-
// Passing bools around ends up doing a trunc to i1 and passing it.
// Codegen this as an argument + "and 1".
- if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
- cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
- ArgVal->hasOneUse()) {
- ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
- ArgReg = getRegForValue(ArgVal);
- if (ArgReg == 0) return false;
-
- MVT ArgVT;
- if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
-
- ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
- ArgVal->hasOneUse(), 1);
- } else {
- ArgReg = getRegForValue(ArgVal);
- }
+ if (auto *TI = dyn_cast<TruncInst>(Val)) {
+ if (TI->getType()->isIntegerTy(1) && CLI.CS &&
+ (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
+ TI->hasOneUse()) {
+ Val = cast<TruncInst>(Val)->getOperand(0);
+ unsigned ResultReg = getRegForValue(Val);
+
+ if (!ResultReg)
+ return false;
- if (ArgReg == 0) return false;
+ MVT ArgVT;
+ if (!isTypeLegal(Val->getType(), ArgVT))
+ return false;
- Type *ArgTy = ArgVal->getType();
- MVT ArgVT;
- if (!isTypeLegal(ArgTy, ArgVT))
- return false;
- if (ArgVT == MVT::x86mmx)
- return false;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- Flags.setOrigAlign(OriginalAlignment);
+ ResultReg =
+ fastEmit_ri(ArgVT, ArgVT, ISD::AND, ResultReg, Val->hasOneUse(), 1);
- Args.push_back(ArgReg);
- ArgVals.push_back(ArgVal);
- ArgVTs.push_back(ArgVT);
- ArgFlags.push_back(Flags);
+ if (!ResultReg)
+ return false;
+ updateValueMap(Val, ResultReg);
+ }
+ }
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
- I->getParent()->getContext());
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
// Allocate shadow area for Win64
- if (isWin64)
+ if (IsWin64)
CCInfo.AllocateStack(32, 8);
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
+ SmallVector<MVT, 16> OutVTs;
+ for (auto *Val : OutVals) {
+ MVT VT;
+ if (!isTypeLegal(Val->getType(), VT))
+ return false;
+ OutVTs.push_back(VT);
+ }
+ CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -2842,13 +2740,20 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
.addImm(NumBytes);
- // Process argument: walk the register/memloc assignments, inserting
- // copies / loads.
- SmallVector<unsigned, 4> RegArgs;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- unsigned Arg = Args[VA.getValNo()];
- EVT ArgVT = ArgVTs[VA.getValNo()];
+ CCValAssign const &VA = ArgLocs[i];
+ const Value *ArgVal = OutVals[VA.getValNo()];
+ MVT ArgVT = OutVTs[VA.getValNo()];
+
+ if (ArgVT == MVT::x86mmx)
+ return false;
+
+ unsigned ArgReg = getRegForValue(ArgVal);
+ if (!ArgReg)
+ return false;
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -2856,8 +2761,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
case CCValAssign::SExt: {
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
"Unexpected extend");
- bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
+ bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
@@ -2865,8 +2770,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
case CCValAssign::ZExt: {
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
"Unexpected extend");
- bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
+ bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
@@ -2874,66 +2779,75 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
case CCValAssign::AExt: {
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
"Unexpected extend");
- bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
+ bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
if (!Emitted)
- Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
+ Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
if (!Emitted)
- Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
- Arg, ArgVT, Arg);
+ Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
+ ArgVT, ArgReg);
assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::BCvt: {
- unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
- ISD::BITCAST, Arg, /*TODO: Kill=*/false);
- assert(BC != 0 && "Failed to emit a bitcast!");
- Arg = BC;
+ ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
+ /*TODO: Kill=*/false);
+ assert(ArgReg && "Failed to emit a bitcast!");
ArgVT = VA.getLocVT();
break;
}
- case CCValAssign::VExt:
+ case CCValAssign::VExt:
// VExt has not been implemented, so this should be impossible to reach
// for now. However, fallback to Selection DAG isel once implemented.
return false;
+ case CCValAssign::AExtUpper:
+ case CCValAssign::SExtUpper:
+ case CCValAssign::ZExtUpper:
+ case CCValAssign::FPExt:
+ llvm_unreachable("Unexpected loc info!");
case CCValAssign::Indirect:
// FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
// support this.
return false;
- case CCValAssign::FPExt:
- llvm_unreachable("Unexpected loc info!");
}
if (VA.isRegLoc()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
- RegArgs.push_back(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
+ OutRegs.push_back(VA.getLocReg());
} else {
+ assert(VA.isMemLoc());
+
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
unsigned LocMemOffset = VA.getLocMemOffset();
X86AddressMode AM;
- const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(
- getTargetMachine()->getRegisterInfo());
AM.Base.Reg = RegInfo->getStackRegister();
AM.Disp = LocMemOffset;
- const Value *ArgVal = ArgVals[VA.getValNo()];
- ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
-
+ ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
+ ArgVT.getStoreSize(), Alignment);
if (Flags.isByVal()) {
X86AddressMode SrcAM;
- SrcAM.Base.Reg = Arg;
- bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
- assert(Res && "memcpy length already checked!"); (void)Res;
+ SrcAM.Base.Reg = ArgReg;
+ if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
+ return false;
} else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
// If this is a really simple value, emit this with the Value* version
// of X86FastEmitStore. If it isn't simple, we don't want to do this,
// as it can cause us to reevaluate the argument.
- if (!X86FastEmitStore(ArgVT, ArgVal, AM))
+ if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
return false;
} else {
- if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
+ bool ValIsKill = hasTrivialKill(ArgVal);
+ if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
return false;
}
}
@@ -2947,37 +2861,53 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
}
- if (Subtarget->is64Bit() && isVarArg && !isWin64) {
+ if (Is64Bit && IsVarArg && !IsWin64) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
// Count the number of XMM registers allocated.
static const MCPhysReg XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
X86::AL).addImm(NumXMMRegs);
}
+ // Materialize callee address in a register. FIXME: GV address can be
+ // handled with a CALLpcrel32 instead.
+ X86AddressMode CalleeAM;
+ if (!X86SelectCallAddress(Callee, CalleeAM))
+ return false;
+
+ unsigned CalleeOp = 0;
+ const GlobalValue *GV = nullptr;
+ if (CalleeAM.GV != nullptr) {
+ GV = CalleeAM.GV;
+ } else if (CalleeAM.Base.Reg != 0) {
+ CalleeOp = CalleeAM.Base.Reg;
+ } else
+ return false;
+
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
// Register-indirect call.
- unsigned CallOpc;
- if (Subtarget->is64Bit())
- CallOpc = X86::CALL64r;
- else
- CallOpc = X86::CALL32r;
+ unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
.addReg(CalleeOp);
-
} else {
// Direct call.
assert(GV && "Not a direct call");
- unsigned CallOpc;
- if (Subtarget->is64Bit())
- CallOpc = X86::CALL64pcrel32;
- else
- CallOpc = X86::CALLpcrel32;
+ unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
@@ -3000,92 +2930,72 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
OpFlags = X86II::MO_DARWIN_STUB;
}
-
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
- if (MemIntName)
- MIB.addExternalSymbol(MemIntName, OpFlags);
+ if (SymName)
+ MIB.addExternalSymbol(SymName, OpFlags);
else
MIB.addGlobalAddress(GV, 0, OpFlags);
}
- // Add a register mask with the call-preserved registers.
+ // Add a register mask operand representing the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
// Add an implicit use GOT pointer in EBX.
if (Subtarget->isPICStyleGOT())
MIB.addReg(X86::EBX, RegState::Implicit);
- if (Subtarget->is64Bit() && isVarArg && !isWin64)
+ if (Is64Bit && IsVarArg && !IsWin64)
MIB.addReg(X86::AL, RegState::Implicit);
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (auto Reg : OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
// Issue CALLSEQ_END
+ unsigned NumBytesForCalleeToPop =
+ computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
- .addImm(NumBytes).addImm(NumBytesCallee);
-
- // Build info for return calling conv lowering code.
- // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
- SmallVector<ISD::InputArg, 32> Ins;
- SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(TLI, I->getType(), RetTys);
- for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
- EVT VT = RetTys[i];
- MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
- unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
- for (unsigned j = 0; j != NumRegs; ++j) {
- ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
- MyFlags.Used = !CS.getInstruction()->use_empty();
- if (CS.paramHasAttr(0, Attribute::SExt))
- MyFlags.Flags.setSExt();
- if (CS.paramHasAttr(0, Attribute::ZExt))
- MyFlags.Flags.setZExt();
- if (CS.paramHasAttr(0, Attribute::InReg))
- MyFlags.Flags.setInReg();
- Ins.push_back(MyFlags);
- }
- }
+ .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
// Now handle call return values.
- SmallVector<unsigned, 4> UsedRegs;
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
- I->getParent()->getContext());
- unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
+ CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
+ CLI.RetTy->getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
for (unsigned i = 0; i != RVLocs.size(); ++i) {
- EVT CopyVT = RVLocs[i].getValVT();
+ CCValAssign &VA = RVLocs[i];
+ EVT CopyVT = VA.getValVT();
unsigned CopyReg = ResultReg + i;
- // If this is a call to a function that returns an fp value on the x87 fp
- // stack, but where we prefer to use the value in xmm registers, copy it
- // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
- if ((RVLocs[i].getLocReg() == X86::ST0 ||
- RVLocs[i].getLocReg() == X86::ST1)) {
- if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
- CopyVT = MVT::f80;
- CopyReg = createResultReg(&X86::RFP80RegClass);
- }
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::FpPOP_RETVAL), CopyReg);
- } else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- CopyReg).addReg(RVLocs[i].getLocReg());
- UsedRegs.push_back(RVLocs[i].getLocReg());
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ report_fatal_error("SSE register return with SSE disabled");
}
- if (CopyVT != RVLocs[i].getValVT()) {
- // Round the F80 the right size, which also moves to the appropriate xmm
- // register. This is accomplished by storing the F80 value in memory and
- // then loading it back. Ewww...
- EVT ResVT = RVLocs[i].getValVT();
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT())) {
+ CopyVT = MVT::f80;
+ CopyReg = createResultReg(&X86::RFP80RegClass);
+ }
+
+ // Copy out the result.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
+ InRegs.push_back(VA.getLocReg());
+
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register. This is accomplished by storing the f80 value in memory
+ // and then loading it back.
+ if (CopyVT != VA.getValVT()) {
+ EVT ResVT = VA.getValVT();
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
int FI = MFI.CreateStackObject(MemSize, MemSize, false);
@@ -3098,18 +3008,15 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
}
}
- if (RVLocs.size())
- UpdateValueMap(I, ResultReg, RVLocs.size());
-
- // Set all unused physreg defs as dead.
- static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = RVLocs.size();
+ CLI.Call = MIB;
return true;
}
-
bool
-X86FastISel::TargetSelectInstruction(const Instruction *I) {
+X86FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default: break;
case Instruction::Load:
@@ -3125,8 +3032,6 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return X86SelectZExt(I);
case Instruction::Br:
return X86SelectBranch(I);
- case Instruction::Call:
- return X86SelectCall(I);
case Instruction::LShr:
case Instruction::AShr:
case Instruction::Shl:
@@ -3154,7 +3059,7 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return X86SelectTrunc(I);
unsigned Reg = getRegForValue(I->getOperand(0));
if (Reg == 0) return false;
- UpdateValueMap(I, Reg);
+ updateValueMap(I, Reg);
return true;
}
}
@@ -3162,13 +3067,69 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return false;
}
-unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
- MVT VT;
- if (!isTypeLegal(C->getType(), VT))
+unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
+ if (VT > MVT::i64)
return 0;
+ uint64_t Imm = CI->getZExtValue();
+ if (Imm == 0) {
+ unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type");
+ case MVT::i1:
+ case MVT::i8:
+ return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ X86::sub_8bit);
+ case MVT::i16:
+ return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
+ X86::sub_16bit);
+ case MVT::i32:
+ return SrcReg;
+ case MVT::i64: {
+ unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
+ .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
+ return ResultReg;
+ }
+ }
+ }
+
+ unsigned Opc = 0;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type");
+ case MVT::i1: VT = MVT::i8; // fall-through
+ case MVT::i8: Opc = X86::MOV8ri; break;
+ case MVT::i16: Opc = X86::MOV16ri; break;
+ case MVT::i32: Opc = X86::MOV32ri; break;
+ case MVT::i64: {
+ if (isUInt<32>(Imm))
+ Opc = X86::MOV32ri;
+ else if (isInt<32>(Imm))
+ Opc = X86::MOV64ri32;
+ else
+ Opc = X86::MOV64ri;
+ break;
+ }
+ }
+ if (VT == MVT::i64 && Opc == X86::MOV32ri) {
+ unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
+ unsigned ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
+ .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
+ return ResultReg;
+ }
+ return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
+}
+
+unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ if (CFP->isNullValue())
+ return fastMaterializeFloatZero(CFP);
+
// Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
+ CodeModel::Model CM = TM.getCodeModel();
+ if (CM != CodeModel::Small && CM != CodeModel::Large)
return 0;
// Get opcode and regclass of the output for the given load instruction.
@@ -3176,23 +3137,6 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
const TargetRegisterClass *RC = nullptr;
switch (VT.SimpleTy) {
default: return 0;
- case MVT::i8:
- Opc = X86::MOV8rm;
- RC = &X86::GR8RegClass;
- break;
- case MVT::i16:
- Opc = X86::MOV16rm;
- RC = &X86::GR16RegClass;
- break;
- case MVT::i32:
- Opc = X86::MOV32rm;
- RC = &X86::GR32RegClass;
- break;
- case MVT::i64:
- // Must be in x86-64 mode.
- Opc = X86::MOV64rm;
- RC = &X86::GR64RegClass;
- break;
case MVT::f32:
if (X86ScalarSSEf32) {
Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
@@ -3216,39 +3160,11 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
}
- // Materialize addresses with LEA/MOV instructions.
- if (isa<GlobalValue>(C)) {
- X86AddressMode AM;
- if (X86SelectAddress(C, AM)) {
- // If the expression is just a basereg, then we're done, otherwise we need
- // to emit an LEA.
- if (AM.BaseType == X86AddressMode::RegBase &&
- AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
- return AM.Base.Reg;
-
- unsigned ResultReg = createResultReg(RC);
- if (TM.getRelocationModel() == Reloc::Static &&
- TLI.getPointerTy() == MVT::i64) {
- // The displacement code be more than 32 bits away so we need to use
- // an instruction with a 64 bit immediate
- Opc = X86::MOV64ri;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C));
- } else {
- Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Opc), ResultReg), AM);
- }
- return ResultReg;
- }
- return 0;
- }
-
// MachineConstantPool wants an explicit alignment.
- unsigned Align = DL.getPrefTypeAlignment(C->getType());
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
if (Align == 0) {
- // Alignment of vector types. FIXME!
- Align = DL.getTypeAllocSize(C->getType());
+ // Alignment of vector types. FIXME!
+ Align = DL.getTypeAllocSize(CFP->getType());
}
// x86-32 PIC requires a PIC base register for constant pools.
@@ -3266,23 +3182,88 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
}
// Create the load from the constant pool.
- unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+ unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
unsigned ResultReg = createResultReg(RC);
+
+ if (CM == CodeModel::Large) {
+ unsigned AddrReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
+ AddrReg)
+ .addConstantPoolIndex(CPI, 0, OpFlag);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg);
+ addDirectMem(MIB, AddrReg);
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+ TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), Align);
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+ return ResultReg;
+ }
+
addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg),
- MCPOffset, PICBase, OpFlag);
-
+ CPI, PICBase, OpFlag);
return ResultReg;
}
-unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
+unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
+
+ // Materialize addresses with LEA/MOV instructions.
+ X86AddressMode AM;
+ if (X86SelectAddress(GV, AM)) {
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
+ return AM.Base.Reg;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ if (TM.getRelocationModel() == Reloc::Static &&
+ TLI.getPointerTy() == MVT::i64) {
+ // The displacement code could be more than 32 bits away so we need to use
+ // an instruction with a 64 bit immediate
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
+ ResultReg)
+ .addGlobalAddress(GV);
+ } else {
+ unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg), AM);
+ }
+ return ResultReg;
+ }
+ return 0;
+}
+
+unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple())
+ return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const auto *CI = dyn_cast<ConstantInt>(C))
+ return X86MaterializeInt(CI, VT);
+ else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return X86MaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return X86MaterializeGV(GV, VT);
+
+ return 0;
+}
+
+unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
// Fail on dynamic allocas. At this point, getRegForValue has already
// checked its CSE maps, so if we're here trying to handle a dynamic
// alloca, we're not going to succeed. X86SelectAddress has a
// check for dynamic allocas, because it's called directly from
- // various places, but TargetMaterializeAlloca also needs a check
+ // various places, but targetMaterializeAlloca also needs a check
// in order to avoid recursion between getRegForValue,
- // X86SelectAddrss, and TargetMaterializeAlloca.
+ // X86SelectAddrss, and targetMaterializeAlloca.
if (!FuncInfo.StaticAllocaMap.count(C))
return 0;
assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
@@ -3290,7 +3271,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
X86AddressMode AM;
if (!X86SelectAddress(C, AM))
return 0;
- unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3298,7 +3279,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
return ResultReg;
}
-unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
MVT VT;
if (!isTypeLegal(CF->getType(), VT))
return 0;
@@ -3356,7 +3337,8 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
AM.getFullAddress(AddrOps);
MachineInstr *Result =
- XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
+ Size, Alignment, /*AllowCommute=*/true);
if (!Result)
return false;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 4be766a..02736ac 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -7,9 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the pass which will find instructions which
-// can be re-written as LEA instructions in order to reduce pipeline
-// delays for some models of the Intel Atom family.
+// This file defines the pass that finds instructions that can be
+// re-written as LEA instructions in order to reduce pipeline delays.
//
//===----------------------------------------------------------------------===//
@@ -40,7 +39,7 @@ class FixupLEAPass : public MachineFunctionPass {
/// where appropriate.
bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
- const char *getPassName() const override { return "X86 Atom LEA Fixup"; }
+ const char *getPassName() const override { return "X86 LEA Fixup"; }
/// \brief Given a machine register, look for the instruction
/// which writes it in the current basic block. If found,
@@ -156,7 +155,8 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
if (!ST.LEAusesAG() && !ST.slowLEA())
return false;
- TII = static_cast<const X86InstrInfo *>(TM->getInstrInfo());
+ TII =
+ static_cast<const X86InstrInfo *>(TM->getSubtargetImpl()->getInstrInfo());
DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
@@ -218,7 +218,8 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
if (usesRegister(p, CurInst) == RU_Write) {
return CurInst;
}
- InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
+ InstrDistance += TII->getInstrLatency(
+ TM->getSubtargetImpl()->getInstrItineraryData(), CurInst);
Found = getPreviousInstr(CurInst, MFI);
}
return nullptr;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index c8a3ab3..6189109 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -28,12 +28,14 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
@@ -41,7 +43,9 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <bitset>
using namespace llvm;
#define DEBUG_TYPE "x86-codegen"
@@ -50,6 +54,8 @@ STATISTIC(NumFXCH, "Number of fxch instructions inserted");
STATISTIC(NumFP , "Number of floating point instructions");
namespace {
+ const unsigned ScratchFPReg = 7;
+
struct FPS : public MachineFunctionPass {
static char ID;
FPS() : MachineFunctionPass(ID) {
@@ -137,7 +143,7 @@ namespace {
unsigned StackTop; // The current top of the FP stack.
enum {
- NumFPRegs = 16 // Including scratch pseudo-registers.
+ NumFPRegs = 8 // Including scratch pseudo-registers.
};
// For each live FP<n> register, point to its Stack[] entry.
@@ -146,27 +152,6 @@ namespace {
// register allocator thinks.
unsigned RegMap[NumFPRegs];
- // Pending fixed registers - Inline assembly needs FP registers to appear
- // in fixed stack slot positions. This is handled by copying FP registers
- // to ST registers before the instruction, and copying back after the
- // instruction.
- //
- // This is modeled with pending ST registers. NumPendingSTs is the number
- // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
- // register that holds the ST value. The ST registers are not moved into
- // place until immediately before the instruction that needs them.
- //
- // It can happen that we need an ST register to be live when no FP register
- // holds the value:
- //
- // %ST0 = COPY %FP4<kill>
- //
- // When that happens, we allocate a scratch FP register to hold the ST
- // value. That means every register in PendingST must be live.
-
- unsigned NumPendingSTs;
- unsigned char PendingST[8];
-
// Set up our stack model to match the incoming registers to MBB.
void setupBlockStack();
@@ -180,9 +165,6 @@ namespace {
dbgs() << " FP" << Stack[i];
assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
}
- for (unsigned i = 0; i != NumPendingSTs; ++i)
- dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
- dbgs() << "\n";
}
#endif
@@ -199,19 +181,6 @@ namespace {
return Slot < StackTop && Stack[Slot] == RegNo;
}
- /// getScratchReg - Return an FP register that is not currently in use.
- unsigned getScratchReg() const {
- for (int i = NumFPRegs - 1; i >= 8; --i)
- if (!isLive(i))
- return i;
- llvm_unreachable("Ran out of scratch FP registers");
- }
-
- /// isScratchReg - Returns trus if RegNo is a scratch FP register.
- static bool isScratchReg(unsigned RegNo) {
- return RegNo > 8 && RegNo < NumFPRegs;
- }
-
/// getStackEntry - Return the X86::FP<n> register in register ST(i).
unsigned getStackEntry(unsigned STi) const {
if (STi >= StackTop)
@@ -263,21 +232,6 @@ namespace {
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
- /// duplicatePendingSTBeforeKill - The instruction at I is about to kill
- /// RegNo. If any PendingST registers still need the RegNo value, duplicate
- /// them to new scratch registers.
- void duplicatePendingSTBeforeKill(unsigned RegNo, MachineInstr *I) {
- for (unsigned i = 0; i != NumPendingSTs; ++i) {
- if (PendingST[i] != RegNo)
- continue;
- unsigned SR = getScratchReg();
- DEBUG(dbgs() << "Duplicating pending ST" << i
- << " in FP" << RegNo << " to FP" << SR << '\n');
- duplicateToTop(RegNo, SR, I);
- PendingST[i] = SR;
- }
- }
-
/// popStackAfter - Pop the current value off of the top of the FP stack
/// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
@@ -304,6 +258,7 @@ namespace {
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+ void handleCall(MachineBasicBlock::iterator &I);
void handleZeroArgFP(MachineBasicBlock::iterator &I);
void handleOneArgFP(MachineBasicBlock::iterator &I);
void handleOneArgFPRW(MachineBasicBlock::iterator &I);
@@ -320,6 +275,8 @@ namespace {
return X86::RFP80RegClass.contains(DstReg) ||
X86::RFP80RegClass.contains(SrcReg);
}
+
+ void setKillFlags(MachineBasicBlock &MBB) const;
};
char FPS::ID = 0;
}
@@ -354,7 +311,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (!FPIsUsed) return false;
Bundles = &getAnalysis<EdgeBundles>();
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
// Prepare cross-MBB liveness.
bundleCFG(MF);
@@ -367,15 +324,13 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *Entry = MF.begin();
bool Changed = false;
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
- I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
- I != E; ++I)
- Changed |= processBasicBlock(MF, **I);
+ for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
+ Changed |= processBasicBlock(MF, *BB);
// Process any unreachable blocks in arbitrary order now.
if (MF.size() != Processed.size())
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- if (Processed.insert(BB))
+ if (Processed.insert(BB).second)
Changed |= processBasicBlock(MF, *BB);
LiveBundles.clear();
@@ -409,8 +364,8 @@ void FPS::bundleCFG(MachineFunction &MF) {
bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
bool Changed = false;
MBB = &BB;
- NumPendingSTs = 0;
+ setKillFlags(BB);
setupBlockStack();
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
@@ -428,6 +383,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
X86::RFP80RegClass.contains(MI->getOperand(0).getReg()))
FPInstClass = X86II::SpecialFP;
+ if (MI->isCall())
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
@@ -462,7 +420,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// after definition. If so, pop them.
for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
unsigned Reg = DeadRegs[i];
- if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ // Check if Reg is live on the stack. An inline-asm register operand that
+ // is in the clobber list and marked dead might not be live on the stack.
+ if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) {
DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
freeStackSlotAfter(I, Reg-X86::FP0);
}
@@ -874,7 +834,9 @@ FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
RegMap[TopReg] = OldSlot;
RegMap[FPRegNo] = ~0;
Stack[--StackTop] = ~0;
- return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr))
+ .addReg(STReg)
+ .getInstr();
}
/// adjustLiveRegs - Kill and revive registers such that exactly the FP
@@ -966,6 +928,31 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,
// Instruction transformation implementation
//===----------------------------------------------------------------------===//
+void FPS::handleCall(MachineBasicBlock::iterator &I) {
+ unsigned STReturns = 0;
+
+ for (const auto &MO : I->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned R = MO.getReg() - X86::FP0;
+
+ if (R < 8) {
+ assert(MO.isDef() && MO.isImplicit());
+ STReturns |= 1 << R;
+ }
+ }
+
+ unsigned N = CountTrailingOnes_32(STReturns);
+
+ // FP registers used for function return must be consecutive starting at
+ // FP0.
+ assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
+
+ for (unsigned I = 0; I < N; ++I)
+ pushReg(N - I - 1);
+}
+
/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
///
void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
@@ -992,9 +979,6 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
- if (KillsSrc)
- duplicatePendingSTBeforeKill(Reg, I);
-
// FISTP64m is strange because there isn't a non-popping versions.
// If we have one _and_ we don't want to pop the operand, duplicate the value
// on the stack instead of moving it. This ensure that popping the value is
@@ -1015,7 +999,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MI->getOpcode() == X86::ISTT_Fp32m80 ||
MI->getOpcode() == X86::ISTT_Fp64m80 ||
MI->getOpcode() == X86::ST_FpP80m)) {
- duplicateToTop(Reg, getScratchReg(), I);
+ duplicateToTop(Reg, ScratchFPReg, I);
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
@@ -1058,7 +1042,6 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
if (KillsSrc) {
- duplicatePendingSTBeforeKill(Reg, I);
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
@@ -1314,71 +1297,22 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
/// floating point instructions. This is primarily intended for use by pseudo
/// instructions.
///
-void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
- MachineInstr *MI = I;
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
+ MachineInstr *MI = Inst;
+
+ if (MI->isCall()) {
+ handleCall(Inst);
+ return;
+ }
+
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown SpecialFP instruction!");
case TargetOpcode::COPY: {
// We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
const MachineOperand &MO1 = MI->getOperand(1);
const MachineOperand &MO0 = MI->getOperand(0);
- unsigned DstST = MO0.getReg() - X86::ST0;
- unsigned SrcST = MO1.getReg() - X86::ST0;
bool KillsSrc = MI->killsRegister(MO1.getReg());
- // ST = COPY FP. Set up a pending ST register.
- if (DstST < 8) {
- unsigned SrcFP = getFPReg(MO1);
- assert(isLive(SrcFP) && "Cannot copy dead register");
- assert(!MO0.isDead() && "Cannot copy to dead ST register");
-
- // Unallocated STs are marked as the nonexistent FP255.
- while (NumPendingSTs <= DstST)
- PendingST[NumPendingSTs++] = NumFPRegs;
-
- // STi could still be live from a previous inline asm.
- if (isScratchReg(PendingST[DstST])) {
- DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
- << '\n');
- freeStackSlotBefore(MI, PendingST[DstST]);
- }
-
- // When the source is killed, allocate a scratch FP register.
- if (KillsSrc) {
- duplicatePendingSTBeforeKill(SrcFP, I);
- unsigned Slot = getSlot(SrcFP);
- unsigned SR = getScratchReg();
- PendingST[DstST] = SR;
- Stack[Slot] = SR;
- RegMap[SR] = Slot;
- } else
- PendingST[DstST] = SrcFP;
- break;
- }
-
- // FP = COPY ST. Extract fixed stack value.
- // Any instruction defining ST registers must have assigned them to a
- // scratch register.
- if (SrcST < 8) {
- unsigned DstFP = getFPReg(MO0);
- assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
- assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
- unsigned SrcFP = PendingST[SrcST];
- assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
- assert(isLive(SrcFP) && "Scratch holding ST is dead");
-
- // DstFP steals the stack slot from SrcFP.
- unsigned Slot = getSlot(SrcFP);
- Stack[Slot] = DstFP;
- RegMap[DstFP] = Slot;
-
- // Always treat the ST as killed.
- PendingST[SrcST] = NumFPRegs;
- while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
- --NumPendingSTs;
- break;
- }
-
// FP <- FP copy.
unsigned DstFP = getFPReg(MO0);
unsigned SrcFP = getFPReg(MO1);
@@ -1392,7 +1326,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
} else {
// For COPY we just duplicate the specified value to a new stack slot.
// This could be made better, but would require substantial changes.
- duplicateToTop(SrcFP, DstFP, I);
+ duplicateToTop(SrcFP, DstFP, Inst);
}
break;
}
@@ -1401,41 +1335,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// All FP registers must be explicitly defined, so load a 0 instead.
unsigned Reg = MI->getOperand(0).getReg() - X86::FP0;
DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
- BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+ BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::LD_F0));
pushReg(Reg);
break;
}
- case X86::FpPOP_RETVAL: {
- // The FpPOP_RETVAL instruction is used after calls that return a value on
- // the floating point stack. We cannot model this with ST defs since CALL
- // instructions have fixed clobber lists. This instruction is interpreted
- // to mean that there is one more live register on the stack than we
- // thought.
- //
- // This means that StackTop does not match the hardware stack between a
- // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
- // between CALL and FpPOP_RETVAL as long as they don't overflow the
- // hardware stack.
- unsigned DstFP = getFPReg(MI->getOperand(0));
-
- // Move existing stack elements up to reflect reality.
- assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
- if (StackTop) {
- std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
- for (unsigned i = 0; i != NumFPRegs; ++i)
- ++RegMap[i];
- }
- ++StackTop;
-
- // DstFP is the new bottom of the stack.
- Stack[0] = DstFP;
- RegMap[DstFP] = 0;
-
- // DstFP will be killed by processBasicBlock if this was a dead def.
- break;
- }
-
case TargetOpcode::INLINEASM: {
// The inline asm MachineInstr currently only *uses* FP registers for the
// 'f' constraint. These should be turned into the current ST(x) register
@@ -1472,19 +1376,30 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// only tell clobbers from defs by looking at the asm descriptor.
unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
unsigned NumOps = 0;
+ SmallSet<unsigned, 1> FRegIdx;
+ unsigned RCID;
+
for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
unsigned Flags = MI->getOperand(i).getImm();
+
NumOps = InlineAsm::getNumOperandRegisters(Flags);
if (NumOps != 1)
continue;
const MachineOperand &MO = MI->getOperand(i + 1);
if (!MO.isReg())
continue;
- unsigned STReg = MO.getReg() - X86::ST0;
+ unsigned STReg = MO.getReg() - X86::FP0;
if (STReg >= 8)
continue;
+ // If the flag has a register class constraint, this must be an operand
+ // with constraint "f". Record its index and continue.
+ if (InlineAsm::hasRegClassConstraint(Flags, RCID)) {
+ FRegIdx.insert(i + 1);
+ continue;
+ }
+
switch (InlineAsm::getKind(Flags)) {
case InlineAsm::Kind_RegUse:
STUses |= (1u << STReg);
@@ -1527,71 +1442,42 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
<< NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
- // Scan the instruction for FP uses corresponding to "f" constraints.
- // Collect FP registers to kill afer the instruction.
- // Always kill all the scratch regs.
+#ifndef NDEBUG
+ // If any input operand uses constraint "f", all output register
+ // constraints must be early-clobber defs.
+ for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I)
+ if (FRegIdx.count(I)) {
+ assert((1 << getFPReg(MI->getOperand(I)) & STDefs) == 0 &&
+ "Operands with constraint \"f\" cannot overlap with defs");
+ }
+#endif
+
+ // Collect all FP registers (register operands with constraints "t", "u",
+ // and "f") to kill afer the instruction.
unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
- unsigned FPUsed = 0;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
- if (!Op.isUse())
- MI->emitError("illegal \"f\" output constraint");
unsigned FPReg = getFPReg(Op);
- FPUsed |= 1U << FPReg;
// If we kill this operand, make sure to pop it from the stack after the
// asm. We just remember it for now, and pop them all off at the end in
// a batch.
- if (Op.isKill())
+ if (Op.isUse() && Op.isKill())
FPKills |= 1U << FPReg;
}
- // The popped inputs will be killed by the instruction, so duplicate them
- // if the FP register needs to be live after the instruction, or if it is
- // used in the instruction itself. We effectively treat the popped inputs
- // as early clobbers.
- for (unsigned i = 0; i < NumSTPopped; ++i) {
- if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
- continue;
- unsigned SR = getScratchReg();
- duplicateToTop(PendingST[i], SR, I);
- DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
- << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
- PendingST[i] = SR;
- }
-
- // Make sure we have a unique live register for every fixed use. Some of
- // them could be undef uses, and we need to emit LD_F0 instructions.
- for (unsigned i = 0; i < NumSTUses; ++i) {
- if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
- // Check for shared assignments.
- for (unsigned j = 0; j < i; ++j) {
- if (PendingST[j] != PendingST[i])
- continue;
- // STi and STj are inn the same register, create a copy.
- unsigned SR = getScratchReg();
- duplicateToTop(PendingST[i], SR, I);
- DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
- << unsigned(PendingST[i])
- << " to avoid collision with ST" << j << '\n');
- PendingST[i] = SR;
- }
- continue;
- }
- unsigned SR = getScratchReg();
- DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
- BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
- pushReg(SR);
- PendingST[i] = SR;
- if (NumPendingSTs == i)
- ++NumPendingSTs;
- }
- assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+ // Do not include registers that are implicitly popped by defs/clobbers.
+ FPKills &= ~(STDefs | STClobbers);
// Now we can rearrange the live registers to match what was requested.
- shuffleStackTop(PendingST, NumPendingSTs, I);
+ unsigned char STUsesArray[8];
+
+ for (unsigned I = 0; I < NumSTUses; ++I)
+ STUsesArray[I] = I;
+
+ shuffleStackTop(STUsesArray, NumSTUses, Inst);
DEBUG({dbgs() << "Before asm: "; dumpStack();});
// With the stack layout fixed, rewrite the FP registers.
@@ -1599,36 +1485,22 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
MachineOperand &Op = MI->getOperand(i);
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
+
unsigned FPReg = getFPReg(Op);
- Op.setReg(getSTReg(FPReg));
+
+ if (FRegIdx.count(i))
+ // Operand with constraint "f".
+ Op.setReg(getSTReg(FPReg));
+ else
+ // Operand with a single register class constraint ("t" or "u").
+ Op.setReg(X86::ST0 + FPReg);
}
// Simulate the inline asm popping its inputs and pushing its outputs.
StackTop -= NumSTPopped;
- // Hold the fixed output registers in scratch FP registers. They will be
- // transferred to real FP registers by copies.
- NumPendingSTs = 0;
- for (unsigned i = 0; i < NumSTDefs; ++i) {
- unsigned SR = getScratchReg();
- pushReg(SR);
- FPKills &= ~(1u << SR);
- }
for (unsigned i = 0; i < NumSTDefs; ++i)
- PendingST[NumPendingSTs++] = getStackEntry(i);
- DEBUG({dbgs() << "After asm: "; dumpStack();});
-
- // If any of the ST defs were dead, pop them immediately. Our caller only
- // handles dead FP defs.
- MachineBasicBlock::iterator InsertPt = MI;
- for (unsigned i = 0; STDefs & (1u << i); ++i) {
- if (!(STDeadDefs & (1u << i)))
- continue;
- freeStackSlotAfter(InsertPt, PendingST[i]);
- PendingST[i] = NumFPRegs;
- }
- while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
- --NumPendingSTs;
+ pushReg(NumSTDefs - i - 1);
// If this asm kills any FP registers (is the last use of them) we must
// explicitly emit pop instructions for them. Do this now after the asm has
@@ -1640,9 +1512,10 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
while (FPKills) {
unsigned FPReg = countTrailingZeros(FPKills);
if (isLive(FPReg))
- freeStackSlotAfter(InsertPt, FPReg);
+ freeStackSlotAfter(Inst, FPReg);
FPKills &= ~(1U << FPReg);
}
+
// Don't delete the inline asm!
return;
}
@@ -1655,12 +1528,12 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
unsigned FPReg = getFPReg(Op);
if (Op.isKill())
- moveToTop(FPReg, I);
+ moveToTop(FPReg, Inst);
else
- duplicateToTop(FPReg, FPReg, I);
+ duplicateToTop(FPReg, FPReg, Inst);
// Emit the call. This will pop the operand.
- BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
+ BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
.addExternalSymbol("_ftol2")
.addReg(X86::ST0, RegState::ImplicitKill)
.addReg(X86::ECX, RegState::ImplicitDefine)
@@ -1738,7 +1611,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
- unsigned NewReg = getScratchReg();
+ unsigned NewReg = ScratchFPReg;
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
@@ -1761,13 +1634,54 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
return;
}
- I = MBB->erase(I); // Remove the pseudo instruction
+ Inst = MBB->erase(Inst); // Remove the pseudo instruction
// We want to leave I pointing to the previous instruction, but what if we
// just erased the first instruction?
- if (I == MBB->begin()) {
+ if (Inst == MBB->begin()) {
DEBUG(dbgs() << "Inserting dummy KILL\n");
- I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ Inst = BuildMI(*MBB, Inst, DebugLoc(), TII->get(TargetOpcode::KILL));
} else
- --I;
+ --Inst;
+}
+
+void FPS::setKillFlags(MachineBasicBlock &MBB) const {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ LivePhysRegs LPR(TRI);
+
+ LPR.addLiveOuts(&MBB);
+
+ for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ std::bitset<8> Defs;
+ SmallVector<MachineOperand *, 2> Uses;
+ MachineInstr &MI = *I;
+
+ for (auto &MO : I->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg() - X86::FP0;
+
+ if (Reg >= 8)
+ continue;
+
+ if (MO.isDef()) {
+ Defs.set(Reg);
+ if (!LPR.contains(MO.getReg()))
+ MO.setIsDead();
+ } else
+ Uses.push_back(&MO);
+ }
+
+ for (auto *MO : Uses)
+ if (Defs.test(getFPReg(*MO)) || !LPR.contains(MO->getReg()))
+ MO->setIsKill();
+
+ LPR.stepBackward(MI);
+ }
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 8c029a8..b9920b1 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
+#include <cstdlib>
using namespace llvm;
@@ -46,14 +47,15 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineModuleInfo &MMI = MF.getMMI();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit() || MMI.callsEHReturn());
+ MMI.callsUnwindInit() || MMI.callsEHReturn() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint());
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -80,6 +82,17 @@ static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
}
}
+static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
+ if (IsLP64) {
+ if (isInt<8>(Imm))
+ return X86::AND64ri8;
+ return X86::AND64ri32;
+ }
+ if (isInt<8>(Imm))
+ return X86::AND32ri8;
+ return X86::AND32ri;
+}
+
static unsigned getLEArOpcode(unsigned IsLP64) {
return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
@@ -148,32 +161,32 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
- bool Is64Bit, bool IsLP64, bool UseLEA,
+ bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
unsigned Opc;
if (UseLEA)
- Opc = getLEArOpcode(IsLP64);
+ Opc = getLEArOpcode(Is64BitStackPtr);
else
Opc = isSub
- ? getSUBriOpcode(IsLP64, Offset)
- : getADDriOpcode(IsLP64, Offset);
+ ? getSUBriOpcode(Is64BitStackPtr, Offset)
+ : getADDriOpcode(Is64BitStackPtr, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
while (Offset) {
uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
- if (ThisVal == (Is64Bit ? 8 : 4)) {
+ if (ThisVal == (Is64BitTarget ? 8 : 4)) {
// Use push / pop instead.
unsigned Reg = isSub
- ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
- : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
if (Reg) {
Opc = isSub
- ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
- : (Is64Bit ? X86::POP64r : X86::POP32r);
+ ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64BitTarget ? X86::POP64r : X86::POP32r);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
if (isSub)
@@ -314,7 +327,7 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -352,6 +365,23 @@ static bool usesTheStack(const MachineFunction &MF) {
return false;
}
+void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
+ unsigned &CallOp,
+ const char *&Symbol) {
+ CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32;
+
+ if (STI.is64Bit()) {
+ if (STI.isTargetCygMing()) {
+ Symbol = "___chkstk_ms";
+ } else {
+ Symbol = "__chkstk";
+ }
+ } else if (STI.isTargetCygMing())
+ Symbol = "_alloca";
+ else
+ Symbol = "_chkstk";
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -440,8 +470,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
@@ -449,11 +479,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
- bool IsLP64 = STI.isTarget64BitLP64();
+ // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
+ const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
bool IsWin64 = STI.isTargetWin64();
- bool IsWinEH =
- MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
- ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64.
+ // Not necessarily synonymous with IsWin64.
+ bool IsWinEH = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::ItaniumWinEH;
bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
bool NeedsDwarfCFI =
!IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
@@ -461,6 +492,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
+ getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
unsigned StackPtr = RegInfo->getStackRegister();
unsigned BasePtr = RegInfo->getBaseRegister();
DebugLoc DL;
@@ -482,6 +515,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+ bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMacho());
+
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
@@ -507,7 +542,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
+ TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta)
@@ -551,7 +586,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
- .addReg(FramePtr, RegState::Kill)
+ .addReg(MachineFramePtr, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI) {
@@ -564,7 +599,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
// Change the rule for the FramePtr to be an "offset" rule.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
+ unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr,
DwarfFramePtr, 2 * stackGrowth));
@@ -580,14 +615,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI) {
// Mark effective beginning of when frame pointer becomes valid.
// Define the current CFA to use the EBP/RBP register.
- unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
+ unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -596,7 +631,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Mark the FramePtr as live-in in every block.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- I->addLiveIn(FramePtr);
+ I->addLiveIn(MachineFramePtr);
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
@@ -633,11 +668,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// able to calculate their offsets from the frame pointer).
if (RegInfo->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
+ uint64_t Val = -MaxAlign;
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
.addReg(StackPtr)
- .addImm(-MaxAlign)
+ .addImm(Val)
.setMIFlag(MachineInstr::FrameSetup);
// The EFLAGS implicit def is dead.
@@ -655,6 +691,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Adjust stack pointer: ESP -= numbytes.
+ static const size_t PageSize = 4096;
+
// Windows and cygwin/mingw require a prologue helper routine when allocating
// more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
// uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
@@ -663,19 +701,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetMacho()) {
+ if (NumBytes >= PageSize && UseStackProbe) {
const char *StackProbeSymbol;
+ unsigned CallOp;
- if (Is64Bit) {
- if (STI.isTargetCygMing()) {
- StackProbeSymbol = "___chkstk_ms";
- } else {
- StackProbeSymbol = "__chkstk";
- }
- } else if (STI.isTargetCygMing())
- StackProbeSymbol = "_alloca";
- else
- StackProbeSymbol = "_chkstk";
+ getStackProbeFunction(STI, CallOp, StackProbeSymbol);
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
@@ -706,7 +736,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
}
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+ TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
@@ -722,15 +752,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.setMIFlag(MachineInstr::FrameSetup);
}
if (isEAXAlive) {
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
- MI->setFlag(MachineInstr::FrameSetup);
- MBB.insert(MBBI, MI);
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MI->setFlag(MachineInstr::FrameSetup);
+ MBB.insert(MBBI, MI);
}
} else if (NumBytes) {
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
UseLEA, TII, *RegInfo);
}
@@ -746,7 +776,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// will restore SP to (BP - SEHFrameOffset)
for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
int offset = MFI->getObjectOffset(Info.getFrameIdx());
- SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
+ SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
}
SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
@@ -804,7 +834,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// to reference locals.
if (RegInfo->hasBasePointer(MF)) {
// Update the base pointer with the current stack pointer.
- unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
+ unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
@@ -834,21 +864,29 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
const MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no instructions");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
- bool IsLP64 = STI.isTarget64BitLP64();
+ // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
+ const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
+ const bool Is64BitILP32 = STI.isTarget64BitILP32();
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned MachineFramePtr = Is64BitILP32 ?
+ getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
unsigned StackPtr = RegInfo->getStackRegister();
+ bool IsWinEH = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::ItaniumWinEH;
+ bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
+
switch (RetOpcode) {
default:
llvm_unreachable("Can only insert epilog into returning blocks");
@@ -898,7 +936,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Pop EBP.
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
} else {
NumBytes = StackSize - CSSize;
}
@@ -930,27 +968,39 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (RegInfo->needsStackRealignment(MF))
MBBI = FirstCSPop;
if (CSSize != 0) {
- unsigned Opc = getLEArOpcode(IsLP64);
+ unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
FramePtr, false, -CSSize);
+ --MBBI;
} else {
- unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(FramePtr);
+ --MBBI;
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
TII, *RegInfo);
+ --MBBI;
}
+ // Windows unwinder will not invoke function's exception handler if IP is
+ // either in prologue or in epilogue. This behavior causes a problem when a
+ // call immediately precedes an epilogue, because the return address points
+ // into the epilogue. To cope with that, we insert an epilogue marker here,
+ // then replace it with a 'nop' if it ends up immediately after a CALL in the
+ // final emitted code.
+ if (NeedsWinEH)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
+
// We're returning from function via eh_return.
if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &DestAddr = MBBI->getOperand(0);
assert(DestAddr.isReg() && "Offset should be in register!");
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
StackPtr).addReg(DestAddr.getReg());
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
RetOpcode == X86::TCRETURNmi ||
@@ -976,7 +1026,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
UseLEA, TII, *RegInfo);
}
@@ -1021,7 +1071,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
*RegInfo);
}
}
@@ -1029,7 +1079,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
const MachineFrameInfo *MFI = MF.getFrameInfo();
int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
uint64_t StackSize = MFI->getStackSize();
@@ -1072,7 +1122,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
// We can't calculate offset from frame pointer if the stack is realigned,
// so enforce usage of stack/base pointer. The base pointer is used when we
// have dynamic allocas in addition to dynamic realignment.
@@ -1090,7 +1140,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
std::vector<CalleeSavedInfo> &CSI) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1107,7 +1157,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// about avoiding it later.
unsigned FPReg = RegInfo->getFrameRegister(MF);
for (unsigned i = 0; i < CSI.size(); ++i) {
- if (CSI[i].getReg() == FPReg) {
+ if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
CSI.erase(CSI.begin() + i);
break;
}
@@ -1138,7 +1188,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
// ensure alignment
- SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
+ SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
// spill into slot
SpillSlotOffset -= RC->getSize();
int SlotIndex =
@@ -1157,7 +1207,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Push GPRs. It increases frame size.
@@ -1205,7 +1255,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Reload XMMs from stack frame.
@@ -1237,7 +1287,7 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1278,7 +1328,7 @@ HasNestArgument(const MachineFunction *MF) {
/// and the properties of the function either one or two registers will be
/// needed. Set primary to true for the first register, false for the second.
static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
// Erlang stuff.
@@ -1289,8 +1339,12 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
return Primary ? X86::EBX : X86::EDI;
}
- if (Is64Bit)
- return Primary ? X86::R11 : X86::R12;
+ if (Is64Bit) {
+ if (IsLP64)
+ return Primary ? X86::R11 : X86::R12;
+ else
+ return Primary ? X86::R11D : X86::R12D;
+ }
bool IsNested = HasNestArgument(&MF);
@@ -1314,14 +1368,15 @@ void
X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
uint64_t StackSize;
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
+ const bool IsLP64 = STI.isTarget64BitLP64();
unsigned TlsReg, TlsOffset;
DebugLoc DL;
- unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
@@ -1359,7 +1414,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
}
if (IsNested)
- allocMBB->addLiveIn(X86::R10);
+ allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
MF.push_front(allocMBB);
MF.push_front(checkMBB);
@@ -1372,7 +1427,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
if (Is64Bit) {
if (STI.isTargetLinux()) {
TlsReg = X86::FS;
- TlsOffset = 0x70;
+ TlsOffset = IsLP64 ? 0x70 : 0x40;
} else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
@@ -1387,12 +1442,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
}
if (CompareStackPointer)
- ScratchReg = X86::RSP;
+ ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
else
- BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
+ BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
if (STI.isTargetLinux()) {
@@ -1426,11 +1481,11 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
bool SaveScratch2;
if (CompareStackPointer) {
// The primary scratch register is available for holding the TLS offset.
- ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
SaveScratch2 = false;
} else {
// Need to use a second register to hold the TLS offset
- ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+ ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
// Unfortunately, with fastcc the second scratch register may hold an
// argument.
@@ -1468,15 +1523,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// Functions with nested arguments use R10, so it needs to be saved across
// the call to _morestack
+ const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
+ const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
+ const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
+ const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
+ const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
+
if (IsNested)
- BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
+ BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
- BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
+ BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
.addImm(StackSize);
- BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
+ BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
.addImm(X86FI->getArgumentStackSize());
- MF.getRegInfo().setPhysRegUsed(X86::R10);
- MF.getRegInfo().setPhysRegUsed(X86::R11);
+ MF.getRegInfo().setPhysRegUsed(Reg10);
+ MF.getRegInfo().setPhysRegUsed(Reg11);
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
@@ -1523,13 +1584,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
/// temp0 = sp - MaxStack
/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
const unsigned SlotSize =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo())
+ static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo())
->getSlotSize();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
const bool Is64Bit = STI.is64Bit();
+ const bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL;
// HiPE-specific values
const unsigned HipeLeafWords = 24;
@@ -1623,7 +1685,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
SPLimitOffset = 0x4c;
}
- ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"HiPE prologue scratch register is live-in");
@@ -1657,9 +1719,9 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- const X86RegisterInfo &RegInfo =
- *static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
unsigned StackPtr = RegInfo.getStackRegister();
bool reseveCallFrame = hasReservedCallFrame(MF);
int Opcode = I->getOpcode();
@@ -1682,8 +1744,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned StackAlign =
- MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment();
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
MachineInstr *New = nullptr;
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 5ad3d4d..7740c3a 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_FRAMELOWERING_H
-#define X86_FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
+#define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
@@ -20,12 +20,17 @@ namespace llvm {
class MCSymbol;
class X86TargetMachine;
+class X86Subtarget;
class X86FrameLowering : public TargetFrameLowering {
public:
explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
+ static void getStackProbeFunction(const X86Subtarget &STI,
+ unsigned &CallOp,
+ const char *&Symbol);
+
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL) const;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index ba2f5f6..3ef7b2c 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
@@ -33,6 +34,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <stdint.h>
using namespace llvm;
#define DEBUG_TYPE "x86-isel"
@@ -192,7 +194,6 @@ namespace {
private:
SDNode *Select(SDNode *N) override;
SDNode *SelectGather(SDNode *N, unsigned Opc);
- SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
@@ -237,10 +238,10 @@ namespace {
inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
- Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
- CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
- getTargetLowering()->getPointerTy()) :
- AM.Base_Reg;
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
+ TLI->getPointerTy())
+ : AM.Base_Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
// These are 32-bit even in 64-bit mode since RIP relative offset
@@ -297,7 +298,14 @@ namespace {
/// getInstrInfo - Return a reference to the TargetInstrInfo, casted
/// to the target-specific type.
const X86InstrInfo *getInstrInfo() const {
- return getTargetMachine().getInstrInfo();
+ return getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ }
+
+ /// \brief Address-mode matching performs shift-of-and to and-of-shift
+ /// reassociation in order to expose more scaled addressing
+ /// opportunities.
+ bool ComplexPatternFuncMutatesDAG() const override {
+ return true;
}
};
}
@@ -510,7 +518,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// If the source and destination are SSE registers, then this is a legal
// conversion that should not be lowered.
const X86TargetLowering *X86Lowering =
- static_cast<const X86TargetLowering *>(getTargetLowering());
+ static_cast<const X86TargetLowering *>(TLI);
bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
if (SrcIsSSE && DstIsSSE)
@@ -544,7 +552,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
false, false, 0);
SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
MachinePointerInfo(),
- MemVT, false, false, 0);
+ MemVT, false, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
@@ -565,7 +573,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
/// the main function.
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
- const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
if (Subtarget->isTargetCygMing()) {
unsigned CallOp =
Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
@@ -775,9 +783,10 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
}
}
-// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
-// allows us to convert the shift and and into an h-register extract and
-// a scaled index. Returns false if the simplification is performed.
+// Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
+// safe. This allows us to convert the shift and and into an h-register
+// extract and a scaled index. Returns false if the simplification is
+// performed.
static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
@@ -1429,7 +1438,7 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
if (RN && RN->getReg() == 0)
Base = CurDAG->getRegister(0, MVT::i64);
- else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) {
+ else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
// Base could already be %rip, particularly in the x32 ABI.
Base = SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
@@ -1563,26 +1572,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg,
- getTargetLowering()->getPointerTy()).getNode();
-}
-
-SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
- SDValue Chain = Node->getOperand(0);
- SDValue In1 = Node->getOperand(1);
- SDValue In2L = Node->getOperand(2);
- SDValue In2H = Node->getOperand(3);
-
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
- return nullptr;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
- SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
- MVT::i32, MVT::i32, MVT::Other, Ops);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
- return ResNode;
+ return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
}
/// Atomic opcode table
@@ -1716,16 +1706,23 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
SDLoc dl,
enum AtomicOpc &Op, MVT NVT,
- SDValue Val) {
+ SDValue Val,
+ const X86Subtarget *Subtarget) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
int64_t CNVal = CN->getSExtValue();
// Quit if not 32-bit imm.
if ((int32_t)CNVal != CNVal)
return Val;
+ // Quit if INT32_MIN: it would be negated as it is negative and overflow,
+ // producing an immediate that does not fit in the 32 bits available for
+ // an immediate operand to sub. However, it still fits in 32 bits for the
+ // add (since it is not negated) so we can return target-constant.
+ if (CNVal == INT32_MIN)
+ return CurDAG->getTargetConstant(CNVal, NVT);
// For atomic-load-add, we could do some optimizations.
if (Op == ADD) {
// Translate to INC/DEC if ADD by 1 or -1.
- if ((CNVal == 1) || (CNVal == -1)) {
+ if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
Op = (CNVal == 1) ? INC : DEC;
// No more constant operand after being translated into INC/DEC.
return SDValue();
@@ -1774,8 +1771,8 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
SDValue Chain = Node->getOperand(0);
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ SDValue Base, Scale, Index, Disp, Segment;
+ if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
return nullptr;
// Which index into the table.
@@ -1797,7 +1794,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
break;
}
- Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+ Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
bool isUnOp = !Val.getNode();
bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
@@ -1829,31 +1826,40 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
Opc = AtomicOpcTbl[Op][I32];
break;
case MVT::i64:
- Opc = AtomicOpcTbl[Op][I64];
if (isCN) {
if (immSext8(Val.getNode()))
Opc = AtomicOpcTbl[Op][SextConstantI64];
else if (i64immSExt32(Val.getNode()))
Opc = AtomicOpcTbl[Op][ConstantI64];
- }
+ else
+ llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith");
+ } else
+ Opc = AtomicOpcTbl[Op][I64];
break;
}
assert(Opc != 0 && "Invalid arith lock transform!");
+ // Building the new node.
SDValue Ret;
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, NVT), 0);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
if (isUnOp) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain };
Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
} else {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain };
Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
}
+
+ // Copying the MachineMemOperand.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+
+ // We need to have two outputs as that is what the original instruction had.
+ // So we add a dummy, undefined output. This is safe as we checked first
+ // that no-one uses our output anyway.
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
SDValue RetVals[] = { Undef, Ret };
return CurDAG->getMergeValues(RetVals, dl).getNode();
}
@@ -2125,6 +2131,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+ case X86ISD::SHRUNKBLEND: {
+ // SHRUNKBLEND selects like a regular VSELECT.
+ SDValue VSelect = CurDAG->getNode(
+ ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2));
+ ReplaceUses(SDValue(Node, 0), VSelect);
+ SelectCode(VSelect.getNode());
+ // We already called ReplaceUses.
+ return nullptr;
+ }
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
@@ -2212,6 +2228,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
getI8Imm(ShlVal));
}
+ case X86ISD::UMUL8:
+ case X86ISD::SMUL8: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ return nullptr;
+ }
+
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
@@ -2387,11 +2422,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
case ISD::SDIVREM:
- case ISD::UDIVREM: {
+ case ISD::UDIVREM:
+ case X86ISD::SDIVREM8_SEXT_HREG:
+ case X86ISD::UDIVREM8_ZEXT_HREG: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- bool isSigned = Opcode == ISD::SDIVREM;
+ bool isSigned = (Opcode == ISD::SDIVREM ||
+ Opcode == X86ISD::SDIVREM8_SEXT_HREG);
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
@@ -2507,33 +2545,43 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
}
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
+ // Prevent use of AH in a REX instruction by explicitly copying it to
+ // an ABCD_L register.
//
// The current assumption of the register allocator is that isel
- // won't generate explicit references to the GPR8_NOREX registers. If
+ // won't generate explicit references to the GR8_ABCD_H registers. If
// the allocator and/or the backend get enhanced to be more robust in
// that regard, this can be, and should be, removed.
- if (HiReg == X86::AH && Subtarget->is64Bit() &&
- !SDValue(Node, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
-
- // If we also need AL (the quotient), get it by extracting a subreg from
- // Result. The fast register allocator does not like multiple CopyFromReg
- // nodes using aliasing registers.
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0),
- CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
-
- // Shift AX right by 8 bits instead of using AH.
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)),
- 0);
- ReplaceUses(SDValue(Node, 1),
- CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
+ SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
+ unsigned AHExtOpcode =
+ isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
+
+ SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
+ MVT::Glue, AHCopy, InFlag);
+ SDValue Result(RNode, 0);
+ InFlag = SDValue(RNode, 1);
+
+ if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
+ Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
+ if (Node->getValueType(1) == MVT::i64) {
+ // It's not possible to directly movsx AH to a 64bit register, because
+ // the latter needs the REX prefix, but the former can't have it.
+ assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
+ "Unexpected i64 sext of h-register");
+ Result =
+ SDValue(CurDAG->getMachineNode(
+ TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
+ CurDAG->getTargetConstant(0, MVT::i64), Result,
+ CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
+ 0);
+ }
+ } else {
+ Result =
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
+ }
+ ReplaceUses(SDValue(Node, 1), Result);
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
@@ -2563,12 +2611,30 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
- // use a smaller encoding.
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
- HasNoSignedComparisonUses(Node))
- // Look past the truncate if CMP is the only use of it.
+ HasNoSignedComparisonUses(Node)) {
+ // Look for (X86cmp (truncate $op, i1), 0) and try to convert to a
+ // smaller encoding
+ if (Opcode == X86ISD::CMP && N0.getValueType() == MVT::i1 &&
+ X86::isZeroNode(N1)) {
+ SDValue Reg = N0.getOperand(0);
+ SDValue Imm = CurDAG->getTargetConstant(1, MVT::i8);
+
+ // Emit testb
+ if (Reg.getScalarValueSizeInBits() > 8)
+ Reg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Reg);
+ // Emit a testb.
+ SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+ Reg, Imm);
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ return nullptr;
+ }
+
N0 = N0.getOperand(0);
+ }
+ // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+ // use a smaller encoding.
+ // Look past the truncate if CMP is the only use of it.
if ((N0.getNode()->getOpcode() == ISD::AND ||
(N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
N0.getNode()->hasOneUse() &&
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5ccff20..f05b6c6 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19,6 +19,7 @@
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -49,6 +50,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
+#include "X86IntrinsicsInfo.h"
#include <bitset>
#include <numeric>
#include <cctype>
@@ -65,10 +67,16 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
cl::Hidden);
static cl::opt<bool> ExperimentalVectorShuffleLowering(
- "x86-experimental-vector-shuffle-lowering", cl::init(false),
+ "x86-experimental-vector-shuffle-lowering", cl::init(true),
cl::desc("Enable an experimental vector shuffle lowering code path."),
cl::Hidden);
+static cl::opt<int> ReciprocalEstimateRefinementSteps(
+ "x86-recip-refinement-steps", cl::init(1),
+ cl::desc("Specify the number of Newton-Raphson iterations applied to the "
+ "result of the hardware reciprocal estimate instruction."),
+ cl::NotHidden);
+
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
@@ -191,28 +199,10 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
-static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
- if (TT.isOSBinFormatMachO()) {
- if (TT.getArch() == Triple::x86_64)
- return new X86_64MachoTargetObjectFile();
- return new TargetLoweringObjectFileMachO();
- }
-
- if (TT.isOSLinux())
- return new X86LinuxTargetObjectFile();
- if (TT.isOSBinFormatELF())
- return new TargetLoweringObjectFileELF();
- if (TT.isKnownWindowsMSVCEnvironment())
- return new X86WindowsTargetObjectFile();
- if (TT.isOSBinFormatCOFF())
- return new TargetLoweringObjectFileCOFF();
- llvm_unreachable("unknown subtarget type");
-}
-
// FIXME: This should stop caching the target machine as soon as
// we can remove resetOperationActions et al.
-X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
- : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
+X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM)
+ : TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
@@ -255,7 +245,7 @@ void X86TargetLowering::resetOperationActions() {
else
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
+ TM.getSubtarget<X86Subtarget>().getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides on Atom when compiling with O2
@@ -316,6 +306,8 @@ void X86TargetLowering::resetOperationActions() {
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// SETOEQ and SETUNE require checking two conditions.
setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
@@ -519,10 +511,21 @@ void X86TargetLowering::resetOperationActions() {
// If we don't have F16C support, then lower half float conversions
// into library calls.
if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
- setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
- setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
+ // There's never any support for operations beyond MVT::f32.
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f80, MVT::f16, Expand);
+
if (Subtarget->hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
@@ -648,8 +651,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
- MVT::i64 : MVT::i32, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@@ -797,6 +799,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
@@ -878,7 +882,12 @@ void X86TargetLowering::resetOperationActions() {
(MVT::SimpleValueType)InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, Expand);
+
+ // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like types,
+ // we have to deal with them whether we ask for Expansion or not. Setting
+ // Expand causes its own optimisation problems though, so leave them legal.
+ if (VT.getVectorElementType() == MVT::i1)
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -935,12 +944,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
- // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+ // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
// registers cannot be used even for integer operations.
addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
@@ -995,6 +1005,20 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
+ // We support custom legalizing of sext and anyext loads for specific
+ // memory vector types which we can load as a scalar (or sequence of
+ // scalars) and extend in-register to a legal 128-bit vector type. For sext
+ // loads these must work with a single scalar load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8i8, Custom);
+
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
@@ -1027,8 +1051,6 @@ void X86TargetLowering::resetOperationActions() {
AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
@@ -1090,7 +1112,13 @@ void X86TargetLowering::resetOperationActions() {
// some vselects for now.
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
- // i8 and i16 vectors are custom , because the source register and source
+ // SSE41 brings specific instructions for doing vector sign extend even in
+ // cases where we don't have SRA.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, Custom);
+
+ // i8 and i16 vectors are custom because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
// information.
@@ -1104,7 +1132,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
- // FIXME: these should be Legal but thats only for the case where
+ // FIXME: these should be Legal, but that's only for the case where
// the index is constant. For now custom expand to deal with that.
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
@@ -1254,6 +1282,10 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::VSELECT, MVT::v16i16, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
+
+ // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
+ // when we have a 256bit-wide blend with immediate.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
@@ -1378,6 +1410,10 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
@@ -1489,6 +1525,43 @@ void X86TargetLowering::resetOperationActions() {
}
}// has AVX-512
+ if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
+ addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
+ addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
+
+ addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
+ addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
+
+ setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
+ setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
+ setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
+
+ for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
+ const MVT VT = (MVT::SimpleValueType)i;
+
+ const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ // Do not attempt to promote non-256-bit vectors
+ if (!VT.is512BitVector())
+ continue;
+
+ if ( EltSize < 32) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ }
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
+ addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
+ addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
+
+ setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
+ }
+
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
// of this type with custom code.
for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
@@ -1521,9 +1594,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::UMULO, VT, Custom);
}
- // There are no 8-bit 3-address imul/mul instructions
- setOperationAction(ISD::SMULO, MVT::i8, Expand);
- setOperationAction(ISD::UMULO, MVT::i8, Expand);
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
@@ -1600,6 +1670,14 @@ void X86TargetLowering::resetOperationActions() {
PredictableSelectIsExpensive = !Subtarget->isAtom();
setPrefFunctionAlignment(4); // 2^4 bytes.
+
+ verifyIntrinsicTables();
+}
+
+// This has so far only been implemented for 64-bit MachO.
+bool X86TargetLowering::useLoadStackGuardNode() const {
+ return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO &&
+ Subtarget->is64Bit();
}
TargetLoweringBase::LegalizeTypeAction
@@ -1616,10 +1694,40 @@ EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
- if (Subtarget->hasAVX512())
- switch(VT.getVectorNumElements()) {
- case 8: return MVT::v8i1;
- case 16: return MVT::v16i1;
+ const unsigned NumElts = VT.getVectorNumElements();
+ const EVT EltVT = VT.getVectorElementType();
+ if (VT.is512BitVector()) {
+ if (Subtarget->hasAVX512())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+ if (Subtarget->hasBWI())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 32: return MVT::v32i1;
+ case 64: return MVT::v64i1;
+ }
+ }
+
+ if (VT.is256BitVector() || VT.is128BitVector()) {
+ if (Subtarget->hasVLX())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 2: return MVT::v2i1;
+ case 4: return MVT::v4i1;
+ case 8: return MVT::v8i1;
+ }
+ if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ case 32: return MVT::v32i1;
+ }
}
return VT.changeVectorElementTypeToInteger();
@@ -1726,9 +1834,10 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
}
bool
-X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
- unsigned,
- bool *Fast) const {
+X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned,
+ unsigned,
+ bool *Fast) const {
if (Fast)
*Fast = Subtarget->isUnalignedMemAccessFast();
return true;
@@ -1794,9 +1903,7 @@ X86TargetLowering::findRepresentativeClass(MVT VT) const{
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
- RRC = Subtarget->is64Bit() ?
- (const TargetRegisterClass*)&X86::GR64RegClass :
- (const TargetRegisterClass*)&X86::GR32RegClass;
+ RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
break;
case MVT::x86mmx:
RRC = &X86::VR64RegClass;
@@ -1851,8 +1958,7 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
- RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
@@ -1871,8 +1977,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
SDValue Flag;
@@ -1918,8 +2023,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
- if (VA.getLocReg() == X86::ST0 ||
- VA.getLocReg() == X86::ST1) {
+ if (VA.getLocReg() == X86::FP0 ||
+ VA.getLocReg() == X86::FP1) {
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
@@ -2005,6 +2110,13 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
+ // If we are returning more than one value, we can definitely
+ // not make a tail call see PR19530
+ if (UI->getNumOperands() > 4)
+ return false;
+ if (UI->getNumOperands() == 4 &&
+ UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
+ return false;
HasRet = true;
}
@@ -2015,8 +2127,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-MVT
-X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
@@ -2025,7 +2137,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
else
ReturnMVT = MVT::i32;
- MVT MinVT = getRegisterType(ReturnMVT);
+ EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
@@ -2042,8 +2154,8 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
@@ -2057,33 +2169,21 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
report_fatal_error("SSE register return with SSE disabled");
}
- SDValue Val;
-
- // If this is a call to a function that returns an fp value on the floating
- // point stack, we must guarantee the value is popped from the stack, so
- // a CopyFromReg is not good enough - the copy instruction may be eliminated
- // if the return value is not used. We use the FpPOP_RETVAL instruction
- // instead.
- if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
- // If we prefer to use the value in xmm registers, copy it out as f80 and
- // use a truncate to move it from fp stack reg to xmm reg.
- if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
- SDValue Ops[] = { Chain, InFlag };
- Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
- MVT::Other, MVT::Glue, Ops), 1);
- Val = Chain.getValue(0);
-
- // Round the f80 to the right size, which also moves it to the appropriate
- // xmm register.
- if (CopyVT != VA.getValVT())
- Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
- // This truncation won't change the value.
- DAG.getIntPtrConstant(1));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- CopyVT, InFlag).getValue(1);
- Val = Chain.getValue(0);
- }
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT()))
+ CopyVT = MVT::f80;
+
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ SDValue Val = Chain.getValue(0);
+
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+
InFlag = Chain.getValue(2);
InVals.push_back(Val);
}
@@ -2224,6 +2324,55 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
}
}
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
+ const X86Subtarget *Subtarget) {
+ assert(Subtarget->is64Bit());
+
+ if (Subtarget->isCallingConvWin64(CallConv)) {
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
+ }
+
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
+}
+
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
+ CallingConv::ID CallConv,
+ const X86Subtarget *Subtarget) {
+ assert(Subtarget->is64Bit());
+ if (Subtarget->isCallingConvWin64(CallConv)) {
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ // TODO: __vectorcall will change this.
+ return None;
+ }
+
+ const Function *Fn = MF.getFunction();
+ bool NoImplicitFloatOps = Fn->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
+ "SSE register cannot be used when SSE is disabled!");
+ if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+ !Subtarget->hasSSE1())
+ // Kernel mode asks for SSE to be disabled, so there are no XMM argument
+ // registers.
+ return None;
+
+ static const MCPhysReg XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
+}
+
SDValue
X86TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -2251,8 +2400,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
- ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64)
@@ -2296,6 +2444,10 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = &X86::VK8RegClass;
else if (RegVT == MVT::v16i1)
RC = &X86::VK16RegClass;
+ else if (RegVT == MVT::v32i1)
+ RC = &X86::VK32RegClass;
+ else if (RegVT == MVT::v64i1)
+ RC = &X86::VK64RegClass;
else
llvm_unreachable("Unknown argument type!");
@@ -2362,60 +2514,53 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg) {
- if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall)) {
- FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
- }
- if (Is64Bit) {
- unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
-
- // FIXME: We should really autogenerate these arrays
- static const MCPhysReg GPR64ArgRegsWin64[] = {
- X86::RCX, X86::RDX, X86::R8, X86::R9
- };
- static const MCPhysReg GPR64ArgRegs64Bit[] = {
- X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
- };
- static const MCPhysReg XMMArgRegs64Bit[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
- X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
- };
- const MCPhysReg *GPR64ArgRegs;
- unsigned NumXMMRegs = 0;
-
- if (IsWin64) {
- // The XMM registers which might contain var arg parameters are shadowed
- // in their paired GPR. So we only need to save the GPR to their home
- // slots.
- TotalNumIntRegs = 4;
- GPR64ArgRegs = GPR64ArgRegsWin64;
- } else {
- TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
- GPR64ArgRegs = GPR64ArgRegs64Bit;
-
- NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
- TotalNumXMMRegs);
+ // the start of the first vararg value... for expansion of llvm.va_start. We
+ // can skip this if there are no va_start calls.
+ if (MFI->hasVAStart() &&
+ (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall))) {
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(1, StackSize, true));
+ }
+
+ // 64-bit calling conventions support varargs and register parameters, so we
+ // have to do extra work to spill them in the prologue or forward them to
+ // musttail calls.
+ if (Is64Bit && isVarArg &&
+ (MFI->hasVAStart() || MFI->hasMustTailInVarArgFunc())) {
+ // Find the first unallocated argument registers.
+ ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
+ ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
+ unsigned NumIntRegs =
+ CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
+ unsigned NumXMMRegs =
+ CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
+ assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ "SSE register cannot be used when SSE is disabled!");
+
+ // Gather all the live in physical registers.
+ SmallVector<SDValue, 6> LiveGPRs;
+ SmallVector<SDValue, 8> LiveXMMRegs;
+ SDValue ALVal;
+ for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
+ unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
+ LiveGPRs.push_back(
+ DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
+ }
+ if (!ArgXMMs.empty()) {
+ unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
+ ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
+ for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
+ unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
+ LiveXMMRegs.push_back(
+ DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
}
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
- TotalNumIntRegs);
-
- bool NoImplicitFloatOps = Fn->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
- assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
- "SSE register cannot be used when SSE is disabled!");
- assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
- NoImplicitFloatOps) &&
- "SSE register cannot be used when SSE is disabled!");
- if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
- !Subtarget->hasSSE1())
- // Kernel mode asks for SSE to be disabled, so don't push them
- // on the stack.
- TotalNumXMMRegs = 0;
+ }
+ // Store them to the va_list returned by va_start.
+ if (MFI->hasVAStart()) {
if (IsWin64) {
- const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2429,10 +2574,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// registers, then we must store them to their spots on the stack so
// they may be loaded by deferencing the result of va_next.
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
- FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
- FuncInfo->setRegSaveFrameIndex(
- MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
- false));
+ FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
+ ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
}
// Store the integer parameter registers.
@@ -2440,12 +2584,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
unsigned Offset = FuncInfo->getVarArgsGPOffset();
- for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ for (SDValue Val : LiveGPRs) {
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
- unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
- &X86::GR64RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo::getFixedStack(
@@ -2455,32 +2596,51 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
Offset += 8;
}
- if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+ if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
// Now store the XMM (fp + vector) parameter registers.
- SmallVector<SDValue, 11> SaveXMMOps;
+ SmallVector<SDValue, 12> SaveXMMOps;
SaveXMMOps.push_back(Chain);
-
- unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
- SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
-
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getRegSaveFrameIndex()));
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getVarArgsFPOffset()));
-
- for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
- unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
- &X86::VR128RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
- SaveXMMOps.push_back(Val);
- }
+ SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
+ LiveXMMRegs.end());
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
MVT::Other, SaveXMMOps));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ } else {
+ // Add all GPRs, al, and XMMs to the list of forwards. We will add then
+ // to the liveout set on a musttail call.
+ assert(MFI->hasMustTailInVarArgFunc());
+ auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
+ typedef X86MachineFunctionInfo::Forward Forward;
+
+ for (unsigned I = 0, E = LiveGPRs.size(); I != E; ++I) {
+ unsigned VReg =
+ MF.getRegInfo().createVirtualRegister(&X86::GR64RegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveGPRs[I]);
+ Forwards.push_back(Forward(VReg, ArgGPRs[NumIntRegs + I], MVT::i64));
+ }
+
+ if (!ArgXMMs.empty()) {
+ unsigned ALVReg =
+ MF.getRegInfo().createVirtualRegister(&X86::GR8RegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, ALVReg, ALVal);
+ Forwards.push_back(Forward(ALVReg, X86::AL, MVT::i8));
+
+ for (unsigned I = 0, E = LiveXMMRegs.size(); I != E; ++I) {
+ unsigned VReg =
+ MF.getRegInfo().createVirtualRegister(&X86::VR128RegClass);
+ Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveXMMRegs[I]);
+ Forwards.push_back(
+ Forward(VReg, ArgXMMs[NumXMMRegs + I], MVT::v4f32));
+ }
+ }
}
}
@@ -2583,6 +2743,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
+ X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
@@ -2614,8 +2775,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
- ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsWin64)
@@ -2636,7 +2796,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
int FPDiff = 0;
if (isTailCall && !IsSibcall && !IsMustTail) {
// Lower arguments at fp - stackoffset + fpdiff.
- X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
@@ -2655,8 +2814,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// arguments passed in memory when using inalloca.
if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
NumBytesToPush = 0;
- assert(ArgLocs.back().getLocMemOffset() == 0 &&
- "an inalloca argument must be the only memory argument");
+ if (!ArgLocs.back().isMemLoc())
+ report_fatal_error("cannot use inalloca attribute on a register "
+ "parameter");
+ if (ArgLocs.back().getLocMemOffset() != 0)
+ report_fatal_error("any parameter with the inalloca attribute must be "
+ "the only memory argument");
}
if (!IsSibcall)
@@ -2675,8 +2838,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
// Skip inalloca arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -2775,7 +2938,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- if (Is64Bit && isVarArg && !IsWin64) {
+ if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -2797,6 +2960,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(NumXMMRegs, MVT::i8)));
}
+ if (Is64Bit && isVarArg && IsMustTail) {
+ const auto &Forwards = X86Info->getForwardedMustTailRegParms();
+ for (const auto &F : Forwards) {
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
+ RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+ }
+ }
+
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
// don't need this because the eligibility check rejects calls that require
// shuffling arguments passed in memory.
@@ -2946,6 +3117,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
+ } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) {
+ // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
+ Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
}
// Returns a chain & a flag for retval copy to use.
@@ -2972,7 +3146,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3043,7 +3217,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
// achieved by reserving an area the size of the argument delta right after the
-// original REtADDR, but before the saved framepointer or the spilled registers
+// original RETADDR, but before the saved framepointer or the spilled registers
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
// stack layout:
// arg1
@@ -3063,9 +3237,9 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
- const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
@@ -3178,8 +3352,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
if (RegInfo->needsStackRealignment(MF))
return false;
@@ -3207,8 +3381,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
@@ -3228,12 +3402,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
- if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
return false;
}
}
@@ -3242,13 +3416,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs1, *DAG.getContext());
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
+ *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
- CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- DAG.getTarget(), RVLocs2, *DAG.getContext());
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
+ *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
@@ -3274,8 +3448,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// Allocate shadow area for Win64
if (IsCalleeWin64)
@@ -3292,7 +3466,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
- static_cast<const X86InstrInfo *>(DAG.getTarget().getInstrInfo());
+ static_cast<const X86InstrInfo *>(DAG.getSubtarget().getInstrInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
@@ -3362,6 +3536,8 @@ static bool MayFoldIntoStore(SDValue Op) {
static bool isTargetShuffle(unsigned Opcode) {
switch(Opcode) {
default: return false;
+ case X86ISD::BLENDI:
+ case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
@@ -3379,7 +3555,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case X86ISD::VPERMI:
return true;
@@ -3405,7 +3581,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
case X86ISD::VPERMI:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
@@ -3417,6 +3593,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PALIGNR:
+ case X86ISD::VALIGN:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -3443,8 +3620,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -3494,23 +3671,18 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
/// own arguments. Callee pop is necessary to support tail calls.
bool X86::isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt) {
- if (IsVarArg)
- return false;
-
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
- return !is64Bit;
case CallingConv::X86_FastCall:
- return !is64Bit;
case CallingConv::X86_ThisCall:
return !is64Bit;
case CallingConv::Fast:
- return TailCallOpt;
case CallingConv::GHC:
- return TailCallOpt;
case CallingConv::HiPE:
+ if (IsVarArg)
+ return false;
return TailCallOpt;
}
}
@@ -3687,14 +3859,23 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
}
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
-/// the second operand.
-static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
- if (VT == MVT::v4f32 || VT == MVT::v4i32 )
- return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
- if (VT == MVT::v2f64 || VT == MVT::v2i64)
- return (Mask[0] < 2 && Mask[1] < 2);
- return false;
+/// is suitable for input to PSHUFD. That is, it doesn't reference the other
+/// operand - by default will match for first operand.
+static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT,
+ bool TestSecondOperand = false) {
+ if (VT != MVT::v4f32 && VT != MVT::v4i32 &&
+ VT != MVT::v2f64 && VT != MVT::v2i64)
+ return false;
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Lo = TestSecondOperand ? NumElems : 0;
+ unsigned Hi = Lo + NumElems;
+
+ for (unsigned i = 0; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], (int)Lo, (int)Hi))
+ return false;
+
+ return true;
}
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
@@ -3755,16 +3936,12 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
return true;
}
-/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
-/// is suitable for input to PALIGNR.
-static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
- const X86Subtarget *Subtarget) {
- if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
- (VT.is256BitVector() && !Subtarget->hasInt256()))
- return false;
-
+/// \brief Return true if the mask specifies a shuffle of elements that is
+/// suitable for input to intralane (palignr) or interlane (valign) vector
+/// right-shift.
+static bool isAlignrMask(ArrayRef<int> Mask, MVT VT, bool InterLane) {
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
+ unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
// Do not handle 64-bit element shuffles with palignr.
@@ -3828,6 +4005,29 @@ static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
return true;
}
+/// \brief Return true if the node specifies a shuffle of elements that is
+/// suitable for input to PALIGNR.
+static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
+ const X86Subtarget *Subtarget) {
+ if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget->hasInt256()) ||
+ VT.is512BitVector())
+ // FIXME: Add AVX512BW.
+ return false;
+
+ return isAlignrMask(Mask, VT, false);
+}
+
+/// \brief Return true if the node specifies a shuffle of elements that is
+/// suitable for input to VALIGN.
+static bool isVALIGNMask(ArrayRef<int> Mask, MVT VT,
+ const X86Subtarget *Subtarget) {
+ // FIXME: Add AVX512VL.
+ if (!VT.is512BitVector() || !Subtarget->hasAVX512())
+ return false;
+ return isAlignrMask(Mask, VT, true);
+}
+
/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
@@ -4070,43 +4270,34 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckl");
- // AVX defines UNPCK* to operate independently on 128-bit lanes.
- unsigned NumLanes;
- unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
- if (VT.is256BitVector()) {
- if (NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
- NumLanes = 2;
- NumOf256BitLanes = 1;
- } else if (VT.is512BitVector()) {
- assert(VT.getScalarType().getSizeInBits() >= 32 &&
- "Unsupported vector type for unpckh");
- NumLanes = 2;
- NumOf256BitLanes = 2;
- } else {
- NumLanes = 1;
- NumOf256BitLanes = 1;
- }
- unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
- unsigned NumLaneElts = NumEltsInStride/NumLanes;
+ assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
+ "Unsupported vector type for unpckh");
- for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
- for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
- for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l256*NumEltsInStride+l+i];
- int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
- if (!isUndefOrEqual(BitI, j+l256*NumElts))
- return false;
- if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l+i];
+ int BitI1 = Mask[l+i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
return false;
- if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
return false;
}
}
}
+
return true;
}
@@ -4117,39 +4308,29 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
- // AVX defines UNPCK* to operate independently on 128-bit lanes.
- unsigned NumLanes;
- unsigned NumOf256BitLanes;
unsigned NumElts = VT.getVectorNumElements();
- if (VT.is256BitVector()) {
- if (NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
- NumLanes = 2;
- NumOf256BitLanes = 1;
- } else if (VT.is512BitVector()) {
- assert(VT.getScalarType().getSizeInBits() >= 32 &&
- "Unsupported vector type for unpckh");
- NumLanes = 2;
- NumOf256BitLanes = 2;
- } else {
- NumLanes = 1;
- NumOf256BitLanes = 1;
- }
- unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
- unsigned NumLaneElts = NumEltsInStride/NumLanes;
+ assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
+ "Unsupported vector type for unpckh");
- for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
- for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
- for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l256*NumEltsInStride+l+i];
- int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
- if (!isUndefOrEqual(BitI, j+l256*NumElts))
- return false;
- if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts/NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l+i];
+ int BitI1 = Mask[l+i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
return false;
- if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
+ } else {
+ if (!isUndefOrEqual(BitI1, j+NumElts))
return false;
}
}
@@ -4652,11 +4833,13 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
return Mask;
}
-/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+/// \brief Return the appropriate immediate to shuffle the specified
+/// VECTOR_SHUFFLE mask with the PALIGNR (if InterLane is false) or with
+/// VALIGN (if Interlane is true) instructions.
+static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp,
+ bool InterLane) {
MVT VT = SVOp->getSimpleValueType(0);
- unsigned EltSize = VT.is512BitVector() ? 1 :
+ unsigned EltSize = InterLane ? 1 :
VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
@@ -4677,6 +4860,19 @@ static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
return (Val - i) * EltSize;
}
+/// \brief Return the appropriate immediate to shuffle the specified
+/// VECTOR_SHUFFLE mask with the PALIGNR instruction.
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+ return getShuffleAlignrImmediate(SVOp, false);
+}
+
+/// \brief Return the appropriate immediate to shuffle the specified
+/// VECTOR_SHUFFLE mask with the VALIGN instruction.
+static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) {
+ return getShuffleAlignrImmediate(SVOp, true);
+}
+
+
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
@@ -4751,28 +4947,6 @@ bool X86::isZeroNode(SDValue Elt) {
return false;
}
-/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
-/// their permute mask.
-static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG) {
- MVT VT = SVOp->getSimpleValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> MaskVec;
-
- for (unsigned i = 0; i != NumElems; ++i) {
- int Idx = SVOp->getMaskElt(i);
- if (Idx >= 0) {
- if (Idx < (int)NumElems)
- Idx += NumElems;
- else
- Idx -= NumElems;
- }
- MaskVec.push_back(Idx);
- }
- return DAG.getVectorShuffle(VT, SDLoc(SVOp), SVOp->getOperand(1),
- SVOp->getOperand(0), &MaskVec[0]);
-}
-
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
@@ -4897,32 +5071,32 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Vec;
if (VT.is128BitVector()) { // SSE
if (Subtarget->hasSSE2()) { // SSE2
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst = DAG.getConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { // SSE1
- SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.is256BitVector()) { // AVX
if (Subtarget->hasInt256()) { // AVX2
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else {
// 256-bit logic and arithmetic instructions in AVX are all
// floating-point, no support for integer ops. Emit fp zeroed vectors.
- SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
}
} else if (VT.is512BitVector()) { // AVX-512
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
} else if (VT.getScalarType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
- SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
+ SDValue Cst = DAG.getConstant(0, MVT::i1);
SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
} else
@@ -4939,7 +5113,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Cst = DAG.getConstant(~0U, MVT::i32);
SDValue Vec;
if (VT.is256BitVector()) {
if (HasInt256) { // AVX2
@@ -5109,37 +5283,49 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
}
/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
-/// target specific opcode. Returns true if the Mask could be calculated.
-/// Sets IsUnary to true if only uses one source.
+/// target specific opcode. Returns true if the Mask could be calculated. Sets
+/// IsUnary to true if only uses one source. Note that this will set IsUnary for
+/// shuffles which use a single input multiple times, and in those cases it will
+/// adjust the mask to only have indices within that single input.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
IsUnary = false;
+ bool IsFakeUnary = false;
switch(N->getOpcode()) {
+ case X86ISD::BLENDI:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKL:
DecodeUNPCKLMask(VT, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::PALIGNR:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
break;
case X86ISD::PSHUFD:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
@@ -5154,6 +5340,72 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
+ case X86ISD::PSHUFB: {
+ IsUnary = true;
+ SDValue MaskNode = N->getOperand(1);
+ while (MaskNode->getOpcode() == ISD::BITCAST)
+ MaskNode = MaskNode->getOperand(0);
+
+ if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+ // If we have a build-vector, then things are easy.
+ EVT VT = MaskNode.getValueType();
+ assert(VT.isVector() &&
+ "Can't produce a non-vector with a build_vector!");
+ if (!VT.isInteger())
+ return false;
+
+ int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
+
+ SmallVector<uint64_t, 32> RawMask;
+ for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
+ SDValue Op = MaskNode->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF) {
+ RawMask.push_back((uint64_t)SM_SentinelUndef);
+ continue;
+ }
+ auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
+ if (!CN)
+ return false;
+ APInt MaskElement = CN->getAPIntValue();
+
+ // We now have to decode the element which could be any integer size and
+ // extract each byte of it.
+ for (int j = 0; j < NumBytesPerElement; ++j) {
+ // Note that this is x86 and so always little endian: the low byte is
+ // the first byte of the mask.
+ RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
+ MaskElement = MaskElement.lshr(8);
+ }
+ }
+ DecodePSHUFBMask(RawMask, Mask);
+ break;
+ }
+
+ auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+ if (!MaskLoad)
+ return false;
+
+ SDValue Ptr = MaskLoad->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper)
+ Ptr = Ptr->getOperand(0);
+
+ auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+ return false;
+
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+ // FIXME: Support AVX-512 here.
+ Type *Ty = C->getType();
+ if (!Ty->isVectorTy() || (Ty->getVectorNumElements() != 16 &&
+ Ty->getVectorNumElements() != 32))
+ return false;
+
+ DecodePSHUFBMask(C, Mask);
+ break;
+ }
+
+ return false;
+ }
case X86ISD::VPERMI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
@@ -5175,17 +5427,29 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
break;
+ case X86ISD::MOVSLDUP:
+ DecodeMOVSLDUPMask(VT, Mask);
+ break;
+ case X86ISD::MOVSHDUP:
+ DecodeMOVSHDUPMask(VT, Mask);
+ break;
case X86ISD::MOVDDUP:
case X86ISD::MOVLHPD:
case X86ISD::MOVLPD:
case X86ISD::MOVLPS:
- case X86ISD::MOVSHDUP:
- case X86ISD::MOVSLDUP:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
}
+ // If we have a fake unary shuffle, the shuffle mask is spread across two
+ // inputs that are actually the same node. Re-map the mask to always point
+ // into the first input.
+ if (IsFakeUnary)
+ for (int &M : Mask)
+ if (M >= (int)Mask.size())
+ M -= Mask.size();
+
return true;
}
@@ -5476,76 +5740,109 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
}
/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
-static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
- unsigned NonZeros, unsigned NumNonZero,
- unsigned NumZero, SelectionDAG &DAG,
+static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
- // We know there's at least one non-zero element
- unsigned FirstNonZeroIdx = 0;
- SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
- while (FirstNonZero.getOpcode() == ISD::UNDEF ||
- X86::isZeroNode(FirstNonZero)) {
- ++FirstNonZeroIdx;
- FirstNonZero = Op->getOperand(FirstNonZeroIdx);
+ // Find all zeroable elements.
+ bool Zeroable[4];
+ for (int i=0; i < 4; ++i) {
+ SDValue Elt = Op->getOperand(i);
+ Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt));
+ }
+ assert(std::count_if(&Zeroable[0], &Zeroable[4],
+ [](bool M) { return !M; }) > 1 &&
+ "We expect at least two non-zero elements!");
+
+ // We only know how to deal with build_vector nodes where elements are either
+ // zeroable or extract_vector_elt with constant index.
+ SDValue FirstNonZero;
+ for (int i=0; i < 4; ++i) {
+ if (Zeroable[i])
+ continue;
+ SDValue Elt = Op->getOperand(i);
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Elt.getOperand(1)))
+ return SDValue();
+ // Make sure that this node is extracting from a 128-bit vector.
+ MVT VT = Elt.getOperand(0).getSimpleValueType();
+ if (!VT.is128BitVector())
+ return SDValue();
+ if (!FirstNonZero.getNode())
+ FirstNonZero = Elt;
}
- if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
- return SDValue();
+ assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
+ SDValue V1 = FirstNonZero.getOperand(0);
+ MVT VT = V1.getSimpleValueType();
- SDValue V = FirstNonZero.getOperand(0);
- MVT VVT = V.getSimpleValueType();
- if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32))
- return SDValue();
+ // See if this build_vector can be lowered as a blend with zero.
+ SDValue Elt;
+ unsigned EltMaskIdx, EltIdx;
+ int Mask[4];
+ for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
+ if (Zeroable[EltIdx]) {
+ // The zero vector will be on the right hand side.
+ Mask[EltIdx] = EltIdx+4;
+ continue;
+ }
- unsigned FirstNonZeroDst =
- cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
- unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
- unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
- unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
+ Elt = Op->getOperand(EltIdx);
+ // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
+ EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
+ if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
+ break;
+ Mask[EltIdx] = EltIdx;
+ }
- for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
- SDValue Elem = Op.getOperand(Idx);
- if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
- continue;
+ if (EltIdx == 4) {
+ // Let the shuffle legalizer deal with blend operations.
+ SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
+ if (V1.getSimpleValueType() != VT)
+ V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), VT, V1);
+ return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, &Mask[0]);
+ }
- // TODO: What else can be here? Deal with it.
- if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
+ // See if we can lower this build_vector to a INSERTPS.
+ if (!Subtarget->hasSSE41())
+ return SDValue();
- // TODO: Some optimizations are still possible here
- // ex: Getting one element from a vector, and the rest from another.
- if (Elem.getOperand(0) != V)
- return SDValue();
+ SDValue V2 = Elt.getOperand(0);
+ if (Elt == FirstNonZero)
+ V1 = SDValue();
- unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
- if (Dst == Idx)
- ++CorrectIdx;
- else if (IncorrectIdx == -1U) {
- IncorrectIdx = Idx;
- IncorrectDst = Dst;
- } else
- // There was already one element with an incorrect index.
- // We can't optimize this case to an insertps.
- return SDValue();
+ bool CanFold = true;
+ for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
+ if (Zeroable[i])
+ continue;
+
+ SDValue Current = Op->getOperand(i);
+ SDValue SrcVector = Current->getOperand(0);
+ if (!V1.getNode())
+ V1 = SrcVector;
+ CanFold = SrcVector == V1 &&
+ cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
}
- if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
- SDLoc dl(Op);
- EVT VT = Op.getSimpleValueType();
- unsigned ElementMoveMask = 0;
- if (IncorrectIdx == -1U)
- ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
- else
- ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
+ if (!CanFold)
+ return SDValue();
- SDValue InsertpsMask =
- DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
- return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
- }
+ assert(V1.getNode() && "Expected at least two non-zero elements!");
+ if (V1.getSimpleValueType() != MVT::v4f32)
+ V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), MVT::v4f32, V1);
+ if (V2.getSimpleValueType() != MVT::v4f32)
+ V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2);
- return SDValue();
+ // Ok, we can emit an INSERTPS instruction.
+ unsigned ZMask = 0;
+ for (int i = 0; i < 4; ++i)
+ if (Zeroable[i])
+ ZMask |= 1 << i;
+
+ unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
+ assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
+ SDValue Result = DAG.getNode(X86ISD::INSERTPS, SDLoc(Op), MVT::v4f32, V1, V2,
+ DAG.getIntPtrConstant(InsertPSMask));
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op), VT, Result);
}
/// getVShift - Return a vector logical shift node.
@@ -5748,7 +6045,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
/// or SDValue() otherwise.
static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
- if (!Subtarget->hasFp256())
+ // VBROADCAST requires AVX.
+ // TODO: Splats could be generated for non-AVX CPUs using SSE
+ // instructions, but there's less potential gain for only 128-bit vectors.
+ if (!Subtarget->hasAVX())
return SDValue();
MVT VT = Op.getSimpleValueType();
@@ -5825,17 +6125,34 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
}
}
+ unsigned ScalarSize = Ld.getValueType().getSizeInBits();
bool IsGE256 = (VT.getSizeInBits() >= 256);
- // Handle the broadcasting a single constant scalar from the constant pool
- // into a vector. On Sandybridge it is still better to load a constant vector
+ // When optimizing for size, generate up to 5 extra bytes for a broadcast
+ // instruction to save 8 or more bytes of constant pool data.
+ // TODO: If multiple splats are generated to load the same constant,
+ // it may be detrimental to overall size. There needs to be a way to detect
+ // that condition to know if this is truly a size win.
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool OptForSize = F->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ // Handle broadcasting a single constant scalar from the constant pool
+ // into a vector.
+ // On Sandybridge (no AVX2), it is still better to load a constant vector
// from the constant pool and not to broadcast it from a scalar.
- if (ConstSplatVal && Subtarget->hasInt256()) {
+ // But override that restriction when optimizing for size.
+ // TODO: Check if splatting is recommended for other AVX-capable CPUs.
+ if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
- unsigned ScalarSize = CVT.getSizeInBits();
- if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
+ // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
+ // For size optimization, also splat v2f64 and v2i64, and for size opt
+ // with AVX2, also splat i8 and i16.
+ // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
+ if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
+ (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
@@ -5856,7 +6173,6 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
}
bool IsLoad = ISD::isNormalLoad(Ld.getNode());
- unsigned ScalarSize = Ld.getValueType().getSizeInBits();
// Handle AVX2 in-register broadcasts.
if (!IsLoad && Subtarget->hasInt256() &&
@@ -6241,11 +6557,6 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
VT == MVT::v2f64) && "build_vector with an invalid type found!");
- // Don't try to emit a VSELECT that cannot be lowered into a blend.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
- return SDValue();
-
// Odd-numbered elements in the input build vector are obtained from
// adding two integer/float elements.
// Even-numbered elements in the input build vector are obtained from
@@ -6257,14 +6568,14 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
for (unsigned i = 0, e = NumElts; i != e; i++) {
SDValue Op = BV->getOperand(i);
-
+
// Skip 'undef' values.
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::UNDEF) {
std::swap(ExpectedOpcode, NextExpectedOpcode);
continue;
}
-
+
// Early exit if we found an unexpected opcode.
if (Opcode != ExpectedOpcode)
return SDValue();
@@ -6318,34 +6629,11 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
std::swap(ExpectedOpcode, NextExpectedOpcode);
}
- // Don't try to fold this build_vector into a VSELECT if it has
- // too many UNDEF operands.
+ // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
- InVec1.getOpcode() != ISD::UNDEF) {
- // Emit a sequence of vector add and sub followed by a VSELECT.
- // The new VSELECT will be lowered into a BLENDI.
- // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
- // and emit a single ADDSUB instruction.
- SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
- SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
-
- // Construct the VSELECT mask.
- EVT MaskVT = VT.changeVectorElementTypeToInteger();
- EVT SVT = MaskVT.getVectorElementType();
- unsigned SVTBits = SVT.getSizeInBits();
- SmallVector<SDValue, 8> Ops;
-
- for (unsigned i = 0, e = NumElts; i != e; ++i) {
- APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
- APInt::getAllOnesValue(SVTBits);
- SDValue Constant = DAG.getConstant(Value, SVT);
- Ops.push_back(Constant);
- }
+ InVec1.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
- return DAG.getSelect(DL, VT, Mask, Sub, Add);
- }
-
return SDValue();
}
@@ -6581,6 +6869,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+
+ // If using the new shuffle lowering, just directly insert this.
+ if (ExperimentalVectorShuffleLowering)
+ return DAG.getNode(
+ ISD::BITCAST, dl, VT,
+ getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
+
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
// Now we have our 32-bit value zero extended in the low element of
@@ -6654,6 +6949,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // If using the new shuffle lowering, just directly insert this.
+ if (ExperimentalVectorShuffleLowering)
+ return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
+
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
SmallVector<int, 8> MaskVec;
@@ -6731,8 +7030,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
if (EVTBits == 32 && NumElems == 4) {
- SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero,
- NumZero, DAG, Subtarget, *this);
+ SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this);
if (V.getNode())
return V;
}
@@ -6923,6 +7221,89 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
return true;
}
+/// \brief Test whether there are elements crossing 128-bit lanes in this
+/// shuffle mask.
+///
+/// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
+/// and we routinely test for these.
+static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
+ int LaneSize = 128 / VT.getScalarSizeInBits();
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i)
+ if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
+ return true;
+ return false;
+}
+
+/// \brief Test whether a shuffle mask is equivalent within each 128-bit lane.
+///
+/// This checks a shuffle mask to see if it is performing the same
+/// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies
+/// that it is also not lane-crossing. It may however involve a blend from the
+/// same lane of a second vector.
+///
+/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
+/// non-trivial to compute in the face of undef lanes. The representation is
+/// *not* suitable for use with existing 128-bit shuffles as it will contain
+/// entries from both V1 and V2 inputs to the wider mask.
+static bool
+is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ int LaneSize = 128 / VT.getScalarSizeInBits();
+ RepeatedMask.resize(LaneSize, -1);
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+ // This entry crosses lanes, so there is no way to model this shuffle.
+ return false;
+
+ // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+ if (RepeatedMask[i % LaneSize] == -1)
+ // This is the first non-undef entry in this slot of a 128-bit lane.
+ RepeatedMask[i % LaneSize] =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
+ else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
+ // Found a mismatch with the repeated mask.
+ return false;
+ }
+ return true;
+}
+
+// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
+// 2013 will allow us to use it as a non-type template parameter.
+namespace {
+
+/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
+///
+/// See its documentation for details.
+bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
+ if (Mask.size() != Args.size())
+ return false;
+ for (int i = 0, e = Mask.size(); i < e; ++i) {
+ assert(*Args[i] >= 0 && "Arguments must be positive integers!");
+ if (Mask[i] != -1 && Mask[i] != *Args[i])
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
+/// arguments.
+///
+/// This is a fast way to test a shuffle mask against a fixed pattern:
+///
+/// if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... }
+///
+/// It returns true if the mask is exactly as wide as the argument list, and
+/// each element of the mask is either -1 (signifying undef) or the value given
+/// in the argument.
+static const VariadicFunction1<
+ bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
+
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -6947,6 +7328,764 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
return DAG.getConstant(Imm, MVT::i8);
}
+/// \brief Try to emit a blend instruction for a shuffle.
+///
+/// This doesn't do any checks for the availability of instructions for blending
+/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
+/// be matched in the backend with the type given. What it does check for is
+/// that the shuffle mask is in fact a blend.
+static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+
+ unsigned BlendMask = 0;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] >= Size) {
+ if (Mask[i] != i + Size)
+ return SDValue(); // Shuffled V2 input!
+ BlendMask |= 1u << i;
+ continue;
+ }
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return SDValue(); // Shuffled V1 input!
+ }
+ switch (VT.SimpleTy) {
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4f64:
+ case MVT::v8f32:
+ return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8));
+
+ case MVT::v4i64:
+ case MVT::v8i32:
+ assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ // FALLTHROUGH
+ case MVT::v2i64:
+ case MVT::v4i32:
+ // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
+ // that instruction.
+ if (Subtarget->hasAVX2()) {
+ // Scale the blend by the number of 32-bit dwords per element.
+ int Scale = VT.getScalarSizeInBits() / 32;
+ BlendMask = 0;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= Size)
+ for (int j = 0; j < Scale; ++j)
+ BlendMask |= 1u << (i * Scale + j);
+
+ MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
+ V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8)));
+ }
+ // FALLTHROUGH
+ case MVT::v8i16: {
+ // For integer shuffles we need to expand the mask and cast the inputs to
+ // v8i16s prior to blending.
+ int Scale = 8 / VT.getVectorNumElements();
+ BlendMask = 0;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= Size)
+ for (int j = 0; j < Scale; ++j)
+ BlendMask |= 1u << (i * Scale + j);
+
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8)));
+ }
+
+ case MVT::v16i16: {
+ assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ SmallVector<int, 8> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
+ // We can lower these with PBLENDW which is mirrored across 128-bit lanes.
+ assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
+ BlendMask = 0;
+ for (int i = 0; i < 8; ++i)
+ if (RepeatedMask[i] >= 16)
+ BlendMask |= 1u << i;
+ return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
+ DAG.getConstant(BlendMask, MVT::i8));
+ }
+ }
+ // FALLTHROUGH
+ case MVT::v32i8: {
+ assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
+ // Scale the blend by the number of bytes per element.
+ int Scale = VT.getScalarSizeInBits() / 8;
+ assert(Mask.size() * Scale == 32 && "Not a 256-bit vector!");
+
+ // Compute the VSELECT mask. Note that VSELECT is really confusing in the
+ // mix of LLVM's code generator and the x86 backend. We tell the code
+ // generator that boolean values in the elements of an x86 vector register
+ // are -1 for true and 0 for false. We then use the LLVM semantics of 'true'
+ // mapping a select to operand #1, and 'false' mapping to operand #2. The
+ // reality in x86 is that vector masks (pre-AVX-512) use only the high bit
+ // of the element (the remaining are ignored) and 0 in that high bit would
+ // mean operand #1 while 1 in the high bit would mean operand #2. So while
+ // the LLVM model for boolean values in vector elements gets the relevant
+ // bit set, it is set backwards and over constrained relative to x86's
+ // actual model.
+ SDValue VSELECTMask[32];
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ for (int j = 0; j < Scale; ++j)
+ VSELECTMask[Scale * i + j] =
+ Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
+ : DAG.getConstant(Mask[i] < Size ? -1 : 0, MVT::i8);
+
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V2);
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getNode(ISD::VSELECT, DL, MVT::v32i8,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, VSELECTMask),
+ V1, V2));
+ }
+
+ default:
+ llvm_unreachable("Not a supported integer vector type!");
+ }
+}
+
+/// \brief Generic routine to lower a shuffle and blend as a decomposed set of
+/// unblended shuffles followed by an unshuffled blend.
+///
+/// This matches the extremely common pattern for handling combined
+/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
+/// operations.
+static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,
+ SDValue V1,
+ SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ // Shuffle the input elements into the desired positions in V1 and V2 and
+ // blend them together.
+ SmallVector<int, 32> V1Mask(Mask.size(), -1);
+ SmallVector<int, 32> V2Mask(Mask.size(), -1);
+ SmallVector<int, 32> BlendMask(Mask.size(), -1);
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= 0 && Mask[i] < Size) {
+ V1Mask[i] = Mask[i];
+ BlendMask[i] = i;
+ } else if (Mask[i] >= Size) {
+ V2Mask[i] = Mask[i] - Size;
+ BlendMask[i] = i + Size;
+ }
+
+ V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
+ V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
+ return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
+}
+
+/// \brief Try to lower a vector shuffle as a byte rotation.
+///
+/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
+/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
+/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
+/// try to generically lower a vector shuffle through such an pattern. It
+/// does not check for the profitability of lowering either as PALIGNR or
+/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
+/// This matches shuffle vectors that look like:
+///
+/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+///
+/// Note that this only handles 128-bit vector widths currently.
+static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2,
+ ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+
+ // We need to detect various ways of spelling a rotation:
+ // [11, 12, 13, 14, 15, 0, 1, 2]
+ // [-1, 12, 13, 14, -1, -1, 1, -1]
+ // [-1, -1, -1, -1, -1, -1, 1, 2]
+ // [ 3, 4, 5, 6, 7, 8, 9, 10]
+ // [-1, 4, 5, 6, -1, -1, 9, -1]
+ // [-1, 4, 5, 6, -1, -1, -1, -1]
+ int Rotation = 0;
+ SDValue Lo, Hi;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && "Only -1 is a valid negative mask element!");
+
+ // Based on the mod-Size value of this mask element determine where
+ // a rotated vector would have started.
+ int StartIdx = i - (Mask[i] % Size);
+ if (StartIdx == 0)
+ // The identity rotation isn't interesting, stop.
+ return SDValue();
+
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the head.
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
+
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ // The rotations don't match, so we can't match this mask.
+ return SDValue();
+
+ // Compute which value this mask is pointing at.
+ SDValue MaskV = Mask[i] < Size ? V1 : V2;
+
+ // Compute which of the two target values this index should be assigned to.
+ // This reflects whether the high elements are remaining or the low elements
+ // are remaining.
+ SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (!TargetV)
+ TargetV = MaskV;
+ else if (TargetV != MaskV)
+ // This may be a rotation, but it pulls from the inputs in some
+ // unsupported interleaving.
+ return SDValue();
+ }
+
+ // Check that we successfully analyzed the mask, and normalize the results.
+ assert(Rotation != 0 && "Failed to locate a viable rotation!");
+ assert((Lo || Hi) && "Failed to find a rotated input vector!");
+ if (!Lo)
+ Lo = Hi;
+ else if (!Hi)
+ Hi = Lo;
+
+ assert(VT.getSizeInBits() == 128 &&
+ "Rotate-based lowering only supports 128-bit lowering!");
+ assert(Mask.size() <= 16 &&
+ "Can shuffle at most 16 bytes in a 128-bit vector!");
+
+ // The actual rotate instruction rotates bytes, so we need to scale the
+ // rotation based on how many bytes are in the vector.
+ int Scale = 16 / Mask.size();
+
+ // SSSE3 targets can use the palignr instruction
+ if (Subtarget->hasSSSE3()) {
+ // Cast the inputs to v16i8 to match PALIGNR.
+ Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi);
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo,
+ DAG.getConstant(Rotation * Scale, MVT::i8)));
+ }
+
+ // Default SSE2 implementation
+ int LoByteShift = 16 - Rotation * Scale;
+ int HiByteShift = Rotation * Scale;
+
+ // Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ.
+ Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);
+
+ SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
+ DAG.getConstant(8 * LoByteShift, MVT::i8));
+ SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
+ DAG.getConstant(8 * HiByteShift, MVT::i8));
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
+}
+
+/// \brief Compute whether each element of a shuffle is zeroable.
+///
+/// A "zeroable" vector shuffle element is one which can be lowered to zero.
+/// Either it is an undef element in the shuffle mask, the element of the input
+/// referenced is undef, or the element of the input referenced is known to be
+/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
+/// as many lanes with this technique as possible to simplify the remaining
+/// shuffle.
+static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2) {
+ SmallBitVector Zeroable(Mask.size(), false);
+
+ bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ int M = Mask[i];
+ // Handle the easy cases.
+ if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+ Zeroable[i] = true;
+ continue;
+ }
+
+ // If this is an index into a build_vector node, dig out the input value and
+ // use it.
+ SDValue V = M < Size ? V1 : V2;
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ continue;
+
+ SDValue Input = V.getOperand(M % Size);
+ // The UNDEF opcode check really should be dead code here, but not quite
+ // worth asserting on (it isn't invalid, just unexpected).
+ if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
+ Zeroable[i] = true;
+ }
+
+ return Zeroable;
+}
+
+/// \brief Try to lower a vector shuffle as a byte shift (shifts in zeros).
+///
+/// Attempts to match a shuffle mask against the PSRLDQ and PSLLDQ SSE2
+/// byte-shift instructions. The mask must consist of a shifted sequential
+/// shuffle from one of the input vectors and zeroable elements for the
+/// remaining 'shifted in' elements.
+///
+/// Note that this only handles 128-bit vector widths currently.
+static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ int Size = Mask.size();
+ int Scale = 16 / Size;
+
+ auto isSequential = [](int Base, int StartIndex, int EndIndex, int MaskOffset,
+ ArrayRef<int> Mask) {
+ for (int i = StartIndex; i < EndIndex; i++) {
+ if (Mask[i] < 0)
+ continue;
+ if (i + Base != Mask[i] - MaskOffset)
+ return false;
+ }
+ return true;
+ };
+
+ for (int Shift = 1; Shift < Size; Shift++) {
+ int ByteShift = Shift * Scale;
+
+ // PSRLDQ : (little-endian) right byte shift
+ // [ 5, 6, 7, zz, zz, zz, zz, zz]
+ // [ -1, 5, 6, 7, zz, zz, zz, zz]
+ // [ 1, 2, -1, -1, -1, -1, zz, zz]
+ bool ZeroableRight = true;
+ for (int i = Size - Shift; i < Size; i++) {
+ ZeroableRight &= Zeroable[i];
+ }
+
+ if (ZeroableRight) {
+ bool ValidShiftRight1 = isSequential(Shift, 0, Size - Shift, 0, Mask);
+ bool ValidShiftRight2 = isSequential(Shift, 0, Size - Shift, Size, Mask);
+
+ if (ValidShiftRight1 || ValidShiftRight2) {
+ // Cast the inputs to v2i64 to match PSRLDQ.
+ SDValue &TargetV = ValidShiftRight1 ? V1 : V2;
+ SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
+ SDValue Shifted = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, V,
+ DAG.getConstant(ByteShift * 8, MVT::i8));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
+ }
+ }
+
+ // PSLLDQ : (little-endian) left byte shift
+ // [ zz, 0, 1, 2, 3, 4, 5, 6]
+ // [ zz, zz, -1, -1, 2, 3, 4, -1]
+ // [ zz, zz, zz, zz, zz, zz, -1, 1]
+ bool ZeroableLeft = true;
+ for (int i = 0; i < Shift; i++) {
+ ZeroableLeft &= Zeroable[i];
+ }
+
+ if (ZeroableLeft) {
+ bool ValidShiftLeft1 = isSequential(-Shift, Shift, Size, 0, Mask);
+ bool ValidShiftLeft2 = isSequential(-Shift, Shift, Size, Size, Mask);
+
+ if (ValidShiftLeft1 || ValidShiftLeft2) {
+ // Cast the inputs to v2i64 to match PSLLDQ.
+ SDValue &TargetV = ValidShiftLeft1 ? V1 : V2;
+ SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
+ SDValue Shifted = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, V,
+ DAG.getConstant(ByteShift * 8, MVT::i8));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// \brief Lower a vector shuffle as a zero or any extension.
+///
+/// Given a specific number of elements, element bit width, and extension
+/// stride, produce either a zero or any extension based on the available
+/// features of the subtarget.
+static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
+ SDLoc DL, MVT VT, int NumElements, int Scale, bool AnyExt, SDValue InputV,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ assert(Scale > 1 && "Need a scale to extend.");
+ int EltBits = VT.getSizeInBits() / NumElements;
+ assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
+ "Only 8, 16, and 32 bit elements can be extended.");
+ assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
+
+ // Found a valid zext mask! Try various lowering strategies based on the
+ // input type and available ISA extensions.
+ if (Subtarget->hasSSE41()) {
+ MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
+ NumElements / Scale);
+ InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
+ }
+
+ // For any extends we can cheat for larger element sizes and use shuffle
+ // instructions that can fold with a load and/or copy.
+ if (AnyExt && EltBits == 32) {
+ int PSHUFDMask[4] = {0, -1, 1, -1};
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+ }
+ if (AnyExt && EltBits == 16 && Scale > 2) {
+ int PSHUFDMask[4] = {0, -1, 0, -1};
+ InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG));
+ int PSHUFHWMask[4] = {1, -1, -1, -1};
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV),
+ getV4X86ShuffleImm8ForMask(PSHUFHWMask, DAG)));
+ }
+
+ // If this would require more than 2 unpack instructions to expand, use
+ // pshufb when available. We can only use more than 2 unpack instructions
+ // when zero extending i8 elements which also makes it easier to use pshufb.
+ if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
+ assert(NumElements == 16 && "Unexpected byte vector width!");
+ SDValue PSHUFBMask[16];
+ for (int i = 0; i < 16; ++i)
+ PSHUFBMask[i] =
+ DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, MVT::i8);
+ InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
+ DAG.getNode(ISD::BUILD_VECTOR, DL,
+ MVT::v16i8, PSHUFBMask)));
+ }
+
+ // Otherwise emit a sequence of unpacks.
+ do {
+ MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
+ SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
+ : getZeroVector(InputVT, Subtarget, DAG, DL);
+ InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
+ InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
+ Scale /= 2;
+ EltBits *= 2;
+ NumElements /= 2;
+ } while (Scale > 1);
+ return DAG.getNode(ISD::BITCAST, DL, VT, InputV);
+}
+
+/// \brief Try to lower a vector shuffle as a zero extension on any micrarch.
+///
+/// This routine will try to do everything in its power to cleverly lower
+/// a shuffle which happens to match the pattern of a zero extend. It doesn't
+/// check for the profitability of this lowering, it tries to aggressively
+/// match this pattern. It will use all of the micro-architectural details it
+/// can to emit an efficient lowering. It handles both blends with all-zero
+/// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to
+/// masking out later).
+///
+/// The reason we have dedicated lowering for zext-style shuffles is that they
+/// are both incredibly common and often quite performance sensitive.
+static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
+ SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ int Bits = VT.getSizeInBits();
+ int NumElements = Mask.size();
+
+ // Define a helper function to check a particular ext-scale and lower to it if
+ // valid.
+ auto Lower = [&](int Scale) -> SDValue {
+ SDValue InputV;
+ bool AnyExt = true;
+ for (int i = 0; i < NumElements; ++i) {
+ if (Mask[i] == -1)
+ continue; // Valid anywhere but doesn't tell us anything.
+ if (i % Scale != 0) {
+ // Each of the extend elements needs to be zeroable.
+ if (!Zeroable[i])
+ return SDValue();
+
+ // We no lorger are in the anyext case.
+ AnyExt = false;
+ continue;
+ }
+
+ // Each of the base elements needs to be consecutive indices into the
+ // same input vector.
+ SDValue V = Mask[i] < NumElements ? V1 : V2;
+ if (!InputV)
+ InputV = V;
+ else if (InputV != V)
+ return SDValue(); // Flip-flopping inputs.
+
+ if (Mask[i] % NumElements != i / Scale)
+ return SDValue(); // Non-consecutive strided elemenst.
+ }
+
+ // If we fail to find an input, we have a zero-shuffle which should always
+ // have already been handled.
+ // FIXME: Maybe handle this here in case during blending we end up with one?
+ if (!InputV)
+ return SDValue();
+
+ return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
+ DL, VT, NumElements, Scale, AnyExt, InputV, Subtarget, DAG);
+ };
+
+ // The widest scale possible for extending is to a 64-bit integer.
+ assert(Bits % 64 == 0 &&
+ "The number of bits in a vector must be divisible by 64 on x86!");
+ int NumExtElements = Bits / 64;
+
+ // Each iteration, try extending the elements half as much, but into twice as
+ // many elements.
+ for (; NumExtElements < NumElements; NumExtElements *= 2) {
+ assert(NumElements % NumExtElements == 0 &&
+ "The input vector size must be divisble by the extended size.");
+ if (SDValue V = Lower(NumElements / NumExtElements))
+ return V;
+ }
+
+ // No viable ext lowering found.
+ return SDValue();
+}
+
+/// \brief Try to get a scalar value for a specific element of a vector.
+///
+/// Looks through BUILD_VECTOR and SCALAR_TO_VECTOR nodes to find a scalar.
+static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
+ SelectionDAG &DAG) {
+ MVT VT = V.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ // If the bitcasts shift the element size, we can't extract an equivalent
+ // element from it.
+ MVT NewVT = V.getSimpleValueType();
+ if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
+ return SDValue();
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR ||
+ (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR))
+ return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, V.getOperand(Idx));
+
+ return SDValue();
+}
+
+/// \brief Helper to test for a load that can be folded with x86 shuffles.
+///
+/// This is particularly important because the set of instructions varies
+/// significantly based on whether the operand is a load or not.
+static bool isShuffleFoldableLoad(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ return ISD::isNON_EXTLoad(V.getNode());
+}
+
+/// \brief Try to lower insertion of a single element into a zero vector.
+///
+/// This is a common pattern that we have especially efficient patterns to lower
+/// across all subtarget feature sets.
+static SDValue lowerVectorShuffleAsElementInsertion(
+ MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ MVT ExtVT = VT;
+ MVT EltVT = VT.getVectorElementType();
+
+ int V2Index = std::find_if(Mask.begin(), Mask.end(),
+ [&Mask](int M) { return M >= (int)Mask.size(); }) -
+ Mask.begin();
+ bool IsV1Zeroable = true;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (i != V2Index && !Zeroable[i]) {
+ IsV1Zeroable = false;
+ break;
+ }
+
+ // Check for a single input from a SCALAR_TO_VECTOR node.
+ // FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
+ // all the smarts here sunk into that routine. However, the current
+ // lowering of BUILD_VECTOR makes that nearly impossible until the old
+ // vector shuffle lowering is dead.
+ if (SDValue V2S = getScalarValueForVectorElement(
+ V2, Mask[V2Index] - Mask.size(), DAG)) {
+ // We need to zext the scalar if it is smaller than an i32.
+ V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
+ if (EltVT == MVT::i8 || EltVT == MVT::i16) {
+ // Using zext to expand a narrow element won't work for non-zero
+ // insertions.
+ if (!IsV1Zeroable)
+ return SDValue();
+
+ // Zero-extend directly to i32.
+ ExtVT = MVT::v4i32;
+ V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
+ }
+ V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
+ } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
+ EltVT == MVT::i16) {
+ // Either not inserting from the low element of the input or the input
+ // element size is too small to use VZEXT_MOVL to clear the high bits.
+ return SDValue();
+ }
+
+ if (!IsV1Zeroable) {
+ // If V1 can't be treated as a zero vector we have fewer options to lower
+ // this. We can't support integer vectors or non-zero targets cheaply, and
+ // the V1 elements can't be permuted in any way.
+ assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");
+ if (!VT.isFloatingPoint() || V2Index != 0)
+ return SDValue();
+ SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end());
+ V1Mask[V2Index] = -1;
+ if (!isNoopShuffleMask(V1Mask))
+ return SDValue();
+ // This is essentially a special case blend operation, but if we have
+ // general purpose blend operations, they are always faster. Bail and let
+ // the rest of the lowering handle these as blends.
+ if (Subtarget->hasSSE41())
+ return SDValue();
+
+ // Otherwise, use MOVSD or MOVSS.
+ assert((EltVT == MVT::f32 || EltVT == MVT::f64) &&
+ "Only two types of floating point element types to handle!");
+ return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL,
+ ExtVT, V1, V2);
+ }
+
+ V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
+ if (ExtVT != VT)
+ V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+
+ if (V2Index != 0) {
+ // If we have 4 or fewer lanes we can cheaply shuffle the element into
+ // the desired position. Otherwise it is more efficient to do a vector
+ // shift left. We know that we can do a vector shift left because all
+ // the inputs are zero.
+ if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) {
+ SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
+ V2Shuffle[V2Index] = 0;
+ V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
+ } else {
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2);
+ V2 = DAG.getNode(
+ X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
+ DAG.getConstant(
+ V2Index * EltVT.getSizeInBits(),
+ DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
+ V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
+ }
+ }
+ return V2;
+}
+
+/// \brief Try to lower broadcast of a single element.
+///
+/// For convenience, this code also bundles all of the subtarget feature set
+/// filtering. While a little annoying to re-dispatch on type here, there isn't
+/// a convenient way to factor it out.
+static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
+ ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ if (VT.isInteger() && !Subtarget->hasAVX2())
+ return SDValue();
+
+ // Check that the mask is a broadcast.
+ int BroadcastIdx = -1;
+ for (int M : Mask)
+ if (M >= 0 && BroadcastIdx == -1)
+ BroadcastIdx = M;
+ else if (M >= 0 && M != BroadcastIdx)
+ return SDValue();
+
+ assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
+ "a sorted mask where the broadcast "
+ "comes from V1.");
+
+ // Go up the chain of (vector) values to try and find a scalar load that
+ // we can combine with the broadcast.
+ for (;;) {
+ switch (V.getOpcode()) {
+ case ISD::CONCAT_VECTORS: {
+ int OperandSize = Mask.size() / V.getNumOperands();
+ V = V.getOperand(BroadcastIdx / OperandSize);
+ BroadcastIdx %= OperandSize;
+ continue;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
+ auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
+ if (!ConstantIdx)
+ break;
+
+ int BeginIdx = (int)ConstantIdx->getZExtValue();
+ int EndIdx =
+ BeginIdx + (int)VInner.getValueType().getVectorNumElements();
+ if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
+ BroadcastIdx -= BeginIdx;
+ V = VInner;
+ } else {
+ V = VOuter;
+ }
+ continue;
+ }
+ }
+ break;
+ }
+
+ // Check if this is a broadcast of a scalar. We special case lowering
+ // for scalars so that we can more effectively fold with loads.
+ if (V.getOpcode() == ISD::BUILD_VECTOR ||
+ (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
+ V = V.getOperand(BroadcastIdx);
+
+ // If the scalar isn't a load we can't broadcast from it in AVX1, only with
+ // AVX2.
+ if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
+ return SDValue();
+ } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
+ // We can't broadcast from a vector register w/o AVX2, and we can only
+ // broadcast from the zero-element of a vector register.
+ return SDValue();
+ }
+
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
+}
+
/// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
///
/// This is the basis function for the 2-lane 64-bit shuffles as we have full
@@ -6969,12 +8108,56 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// Straight shuffle of a single input vector. Simulate this by using the
// single input as both of the "inputs" to this instruction..
unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, we can use VPERMILPS which will allow folding a load
+ // into the shuffle.
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
DAG.getConstant(SHUFPDMask, MVT::i8));
}
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
+ // If we have a single input, insert that into V1 if we can do so cheaply.
+ if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2f64, DL, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+ // Try inverting the insertion since for v2 masks it is easy to do and we
+ // can't reliably sort the mask one way or the other.
+ int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
+ Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2f64, DL, V2, V1, InverseMask, Subtarget, DAG))
+ return Insertion;
+ }
+
+ // Try to use one of the special instruction patterns to handle two common
+ // blend patterns if a zero-blend above didn't work.
+ if (isShuffleEquivalent(Mask, 0, 3) || isShuffleEquivalent(Mask, 1, 3))
+ if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
+ // We can either use a special instruction to load over the low double or
+ // to move just the low double.
+ return DAG.getNode(
+ isShuffleFoldableLoad(V1S) ? X86ISD::MOVLPD : X86ISD::MOVSD,
+ DL, MVT::v2f64, V2,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
@@ -6998,6 +8181,11 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (isSingleInputShuffleMask(Mask)) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v2i64, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
// We have to map the mask as it is actually a v4i32 shuffle instruction.
@@ -7011,6 +8199,44 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
}
+ // If we have a single input from V2 insert that into V1 if we can do so
+ // cheaply.
+ if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2i64, DL, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+ // Try inverting the insertion since for v2 masks it is easy to do and we
+ // can't reliably sort the mask one way or the other.
+ int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
+ Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v2i64, DL, V2, V1, InverseMask, Subtarget, DAG))
+ return Insertion;
+ }
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v2i64, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
+ if (Subtarget->hasSSSE3())
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
+
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
@@ -7021,38 +8247,25 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
}
-/// \brief Lower 4-lane 32-bit floating point shuffles.
+/// \brief Lower a vector shuffle using the SHUFPS instruction.
///
-/// Uses instructions exclusively from the floating point unit to minimize
-/// domain crossing penalties, as these are sufficient to implement all v4f32
-/// shuffles.
-static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- SDLoc DL(Op);
- assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
- assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
- assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- ArrayRef<int> Mask = SVOp->getMask();
- assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
-
+/// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
+/// It makes no assumptions about whether this is the *best* lowering, it simply
+/// uses it.
+static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
- if (NumV2Elements == 0)
- // Straight shuffle of a single input vector. We pass the input vector to
- // both operands to simulate this with a SHUFPS.
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
- getV4X86ShuffleImm8ForMask(Mask, DAG));
-
if (NumV2Elements == 1) {
int V2Index =
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
Mask.begin();
+
// Compute the index adjacent to V2Index and in the same half by toggling
// the low bit.
int V2AdjIndex = V2Index ^ 1;
@@ -7069,7 +8282,7 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// To make this work, blend them together as the first step.
int V1Index = V2AdjIndex;
int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
- V2 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V2, V1,
+ V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
getV4X86ShuffleImm8ForMask(BlendMask, DAG));
// Now proceed to reconstruct the final blend as we have the necessary
@@ -7086,9 +8299,17 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
} else if (NumV2Elements == 2) {
if (Mask[0] < 4 && Mask[1] < 4) {
// Handle the easy case where we have V1 in the low lanes and V2 in the
- // high lanes. We never see this reversed because we sort the shuffle.
+ // high lanes.
NewMask[2] -= 4;
NewMask[3] -= 4;
+ } else if (Mask[2] < 4 && Mask[3] < 4) {
+ // We also handle the reversed case because this utility may get called
+ // when we detect a SHUFPS pattern but can't easily commute the shuffle to
+ // arrange things in the right direction.
+ NewMask[0] -= 4;
+ NewMask[1] -= 4;
+ HighV = V1;
+ LowV = V2;
} else {
// We have a mixture of V1 and V2 in both low and high lanes. Rather than
// trying to place elements directly, just blend them and set up the final
@@ -7100,7 +8321,7 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask[2] < 4 ? Mask[2] : Mask[3],
(Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
(Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
- V1 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V2,
+ V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
getV4X86ShuffleImm8ForMask(BlendMask, DAG));
// Now we do a normal shuffle of V1 by giving V1 as both operands to
@@ -7112,10 +8333,116 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
NewMask[3] = Mask[2] < 4 ? 3 : 1;
}
}
- return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, LowV, HighV,
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
getV4X86ShuffleImm8ForMask(NewMask, DAG));
}
+/// \brief Lower 4-lane 32-bit floating point shuffles.
+///
+/// Uses instructions exclusively from the floating point unit to minimize
+/// domain crossing penalties, as these are sufficient to implement all v4f32
+/// shuffles.
+static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, we can use VPERMILPS which will allow folding a load
+ // into the shuffle.
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+ }
+
+ // Otherwise, use a straight shuffle of a single input vector. We pass the
+ // input vector to both operands to simulate this with a SHUFPS.
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+ }
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+
+ // There are special ways we can lower some single-element blends. However, we
+ // have custom ways we can lower more complex single-element blends below that
+ // we defer to if both this and BLENDPS fail to match, so restrict this to
+ // when the V2 input is targeting element 0 of the mask -- that is the fast
+ // case here.
+ if (NumV2Elements == 1 && Mask[0] >= 4)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4f32, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check for whether we can use INSERTPS to perform the blend. We only use
+ // INSERTPS when the V1 elements are already in the correct locations
+ // because otherwise we can just always use two SHUFPS instructions which
+ // are much smaller to encode than a SHUFPS and an INSERTPS.
+ if (NumV2Elements == 1 && Subtarget->hasSSE41()) {
+ int V2Index =
+ std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
+ Mask.begin();
+
+ // When using INSERTPS we can zero any lane of the destination. Collect
+ // the zero inputs into a mask and drop them from the lanes of V1 which
+ // actually need to be present as inputs to the INSERTPS.
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+
+ // Synthesize a shuffle mask for the non-zero and non-v2 inputs.
+ bool InsertNeedsShuffle = false;
+ unsigned ZMask = 0;
+ for (int i = 0; i < 4; ++i)
+ if (i != V2Index) {
+ if (Zeroable[i]) {
+ ZMask |= 1 << i;
+ } else if (Mask[i] != i) {
+ InsertNeedsShuffle = true;
+ break;
+ }
+ }
+
+ // We don't want to use INSERTPS or other insertion techniques if it will
+ // require shuffling anyways.
+ if (!InsertNeedsShuffle) {
+ // If all of V1 is zeroable, replace it with undef.
+ if ((ZMask | 1 << V2Index) == 0xF)
+ V1 = DAG.getUNDEF(MVT::v4f32);
+
+ unsigned InsertPSMask = (Mask[V2Index] - 4) << 6 | V2Index << 4 | ZMask;
+ assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
+
+ // Insert the V2 element into the desired position.
+ return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
+ DAG.getConstant(InsertPSMask, MVT::i8));
+ }
+ }
+
+ // Otherwise fall back to a SHUFPS lowering strategy.
+ return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
+}
+
/// \brief Lower 4-lane i32 vector shuffles.
///
/// We try to handle these with integer-domain shuffles where we can, but for
@@ -7131,11 +8458,66 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
- if (isSingleInputShuffleMask(Mask))
+ // Whenever we can lower this as a zext, that instruction is strictly faster
+ // than any alternative. It also allows us to fold memory operands into the
+ // shuffle in many cases.
+ if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
+ Mask, Subtarget, DAG))
+ return ZExt;
+
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
// Straight shuffle of a single input vector. For everything from SSE2
// onward this has a single fast instruction with no scary immediates.
+ // We coerce the shuffle pattern to be compatible with UNPCK instructions
+ // but we aren't actually going to use the UNPCK instruction because doing
+ // so prevents folding a load into this instruction or making a copy.
+ const int UnpackLoMask[] = {0, 0, 1, 1};
+ const int UnpackHiMask[] = {2, 2, 3, 3};
+ if (isShuffleEquivalent(Mask, 0, 0, 1, 1))
+ Mask = UnpackLoMask;
+ else if (isShuffleEquivalent(Mask, 2, 2, 3, 3))
+ Mask = UnpackHiMask;
+
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
+ }
+
+ // There are special ways we can lower some single-element blends.
+ if (NumV2Elements == 1)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v4i32, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
+ if (Subtarget->hasSSSE3())
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
@@ -7188,6 +8570,27 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i16, DL, V,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
+ if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v8i16, V, V, Mask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v8i16, V, V, Mask, Subtarget, DAG))
+ return Rotate;
+
// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
// such inputs we can swap two of the dwords across the half mark and end up
// with <=2 inputs to each half in each half. Once there, we can fall through
@@ -7196,22 +8599,126 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
// Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
//
- // Before we had 3-1 in the low half and 3-1 in the high half. Afterward, 2-2
- // and 2-2.
- auto balanceSides = [&](ArrayRef<int> ThreeInputs, int OneInput,
- int ThreeInputHalfSum, int OneInputHalfOffset) {
+ // However in some very rare cases we have a 1-into-3 or 3-into-1 on one half
+ // and an existing 2-into-2 on the other half. In this case we may have to
+ // pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or
+ // 1-into-3 which could cause us to cycle endlessly fixing each side in turn.
+ // Fortunately, we don't have to handle anything but a 2-into-2 pattern
+ // because any other situation (including a 3-into-1 or 1-into-3 in the other
+ // half than the one we target for fixing) will be fixed when we re-enter this
+ // path. We will also combine away any sequence of PSHUFD instructions that
+ // result into a single instruction. Here is an example of the tricky case:
+ //
+ // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
+ // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3]
+ //
+ // This now has a 1-into-3 in the high half! Instead, we do two shuffles:
+ //
+ // Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h]
+ // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6]
+ //
+ // Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h]
+ // Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6]
+ //
+ // The result is fine to be handled by the generic logic.
+ auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs,
+ ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs,
+ int AOffset, int BOffset) {
+ assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) &&
+ "Must call this with A having 3 or 1 inputs from the A half.");
+ assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) &&
+ "Must call this with B having 1 or 3 inputs from the B half.");
+ assert(AToAInputs.size() + BToAInputs.size() == 4 &&
+ "Must call this with either 3:1 or 1:3 inputs (summing to 4).");
+
// Compute the index of dword with only one word among the three inputs in
// a half by taking the sum of the half with three inputs and subtracting
// the sum of the actual three inputs. The difference is the remaining
// slot.
- int DWordA = (ThreeInputHalfSum -
- std::accumulate(ThreeInputs.begin(), ThreeInputs.end(), 0)) /
- 2;
- int DWordB = OneInputHalfOffset / 2 + (OneInput / 2 + 1) % 2;
+ int ADWord, BDWord;
+ int &TripleDWord = AToAInputs.size() == 3 ? ADWord : BDWord;
+ int &OneInputDWord = AToAInputs.size() == 3 ? BDWord : ADWord;
+ int TripleInputOffset = AToAInputs.size() == 3 ? AOffset : BOffset;
+ ArrayRef<int> TripleInputs = AToAInputs.size() == 3 ? AToAInputs : BToAInputs;
+ int OneInput = AToAInputs.size() == 3 ? BToAInputs[0] : AToAInputs[0];
+ int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
+ int TripleNonInputIdx =
+ TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
+ TripleDWord = TripleNonInputIdx / 2;
+
+ // We use xor with one to compute the adjacent DWord to whichever one the
+ // OneInput is in.
+ OneInputDWord = (OneInput / 2) ^ 1;
+
+ // Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA
+ // and BToA inputs. If there is also such a problem with the BToB and AToB
+ // inputs, we don't try to fix it necessarily -- we'll recurse and see it in
+ // the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it
+ // is essential that we don't *create* a 3<-1 as then we might oscillate.
+ if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
+ // Compute how many inputs will be flipped by swapping these DWords. We
+ // need
+ // to balance this to ensure we don't form a 3-1 shuffle in the other
+ // half.
+ int NumFlippedAToBInputs =
+ std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) +
+ std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1);
+ int NumFlippedBToBInputs =
+ std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) +
+ std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1);
+ if ((NumFlippedAToBInputs == 1 &&
+ (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) ||
+ (NumFlippedBToBInputs == 1 &&
+ (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) {
+ // We choose whether to fix the A half or B half based on whether that
+ // half has zero flipped inputs. At zero, we may not be able to fix it
+ // with that half. We also bias towards fixing the B half because that
+ // will more commonly be the high half, and we have to bias one way.
+ auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord,
+ ArrayRef<int> Inputs) {
+ int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot.
+ bool IsFixIdxInput = std::find(Inputs.begin(), Inputs.end(),
+ PinnedIdx ^ 1) != Inputs.end();
+ // Determine whether the free index is in the flipped dword or the
+ // unflipped dword based on where the pinned index is. We use this bit
+ // in an xor to conditionally select the adjacent dword.
+ int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
+ bool IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
+ FixFreeIdx) != Inputs.end();
+ if (IsFixIdxInput == IsFixFreeIdxInput)
+ FixFreeIdx += 1;
+ IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
+ FixFreeIdx) != Inputs.end();
+ assert(IsFixIdxInput != IsFixFreeIdxInput &&
+ "We need to be changing the number of flipped inputs!");
+ int PSHUFHalfMask[] = {0, 1, 2, 3};
+ std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
+ V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
+ MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DAG));
+
+ for (int &M : Mask)
+ if (M != -1 && M == FixIdx)
+ M = FixFreeIdx;
+ else if (M != -1 && M == FixFreeIdx)
+ M = FixIdx;
+ };
+ if (NumFlippedBToBInputs != 0) {
+ int BPinnedIdx =
+ BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
+ FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
+ } else {
+ assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
+ int APinnedIdx =
+ AToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
+ FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
+ }
+ }
+ }
int PSHUFDMask[] = {0, 1, 2, 3};
- PSHUFDMask[DWordA] = DWordB;
- PSHUFDMask[DWordB] = DWordA;
+ PSHUFDMask[ADWord] = BDWord;
+ PSHUFDMask[BDWord] = ADWord;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
@@ -7219,24 +8726,20 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
// Adjust the mask to match the new locations of A and B.
for (int &M : Mask)
- if (M != -1 && M/2 == DWordA)
- M = 2 * DWordB + M % 2;
- else if (M != -1 && M/2 == DWordB)
- M = 2 * DWordA + M % 2;
+ if (M != -1 && M/2 == ADWord)
+ M = 2 * BDWord + M % 2;
+ else if (M != -1 && M/2 == BDWord)
+ M = 2 * ADWord + M % 2;
// Recurse back into this routine to re-compute state now that this isn't
// a 3 and 1 problem.
return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
Mask);
};
- if (NumLToL == 3 && NumHToL == 1)
- return balanceSides(LToLInputs, HToLInputs[0], 0 + 1 + 2 + 3, 4);
- else if (NumLToL == 1 && NumHToL == 3)
- return balanceSides(HToLInputs, LToLInputs[0], 4 + 5 + 6 + 7, 0);
- else if (NumLToH == 1 && NumHToH == 3)
- return balanceSides(HToHInputs, LToHInputs[0], 4 + 5 + 6 + 7, 0);
- else if (NumLToH == 3 && NumHToH == 1)
- return balanceSides(LToHInputs, HToHInputs[0], 0 + 1 + 2 + 3, 4);
+ if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
+ return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
+ else if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
+ return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
// At this point there are at most two inputs to the low and high halves from
// each half. That means the inputs can always be grouped into dwords and
@@ -7250,9 +8753,10 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
// First fix the masks for all the inputs that are staying in their
// original halves. This will then dictate the targets of the cross-half
// shuffles.
- auto fixInPlaceInputs = [&PSHUFDMask](
- ArrayRef<int> InPlaceInputs, MutableArrayRef<int> SourceHalfMask,
- MutableArrayRef<int> HalfMask, int HalfOffset) {
+ auto fixInPlaceInputs =
+ [&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
+ MutableArrayRef<int> SourceHalfMask,
+ MutableArrayRef<int> HalfMask, int HalfOffset) {
if (InPlaceInputs.empty())
return;
if (InPlaceInputs.size() == 1) {
@@ -7261,6 +8765,14 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
return;
}
+ if (IncomingInputs.empty()) {
+ // Just fix all of the in place inputs.
+ for (int Input : InPlaceInputs) {
+ SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
+ PSHUFDMask[Input / 2] = Input / 2;
+ }
+ return;
+ }
assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
@@ -7272,10 +8784,8 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
};
- if (!HToLInputs.empty())
- fixInPlaceInputs(LToLInputs, PSHUFLMask, LoMask, 0);
- if (!LToHInputs.empty())
- fixInPlaceInputs(HToHInputs, PSHUFHMask, HiMask, 4);
+ fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
+ fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
// Now gather the cross-half inputs and place them into a free dword of
// their target half.
@@ -7284,7 +8794,8 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
auto moveInputsToRightHalf = [&PSHUFDMask](
MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
- int SourceOffset, int DestOffset) {
+ MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
+ int DestOffset) {
auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word;
};
@@ -7310,7 +8821,7 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
Input - SourceOffset;
// We have to swap the uses in our half mask in one sweep.
for (int &M : HalfMask)
- if (M == SourceHalfMask[Input - SourceOffset])
+ if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
M = Input;
else if (M == Input)
M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
@@ -7362,18 +8873,68 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
} else if (IncomingInputs.size() == 2) {
if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
- int SourceDWordBase = !isDWordClobbered(SourceHalfMask, 0) ? 0 : 2;
- assert(!isDWordClobbered(SourceHalfMask, SourceDWordBase) &&
- "Not all dwords can be clobbered!");
- SourceHalfMask[SourceDWordBase] = IncomingInputs[0] - SourceOffset;
- SourceHalfMask[SourceDWordBase + 1] = IncomingInputs[1] - SourceOffset;
+ // We have two non-adjacent or clobbered inputs we need to extract from
+ // the source half. To do this, we need to map them into some adjacent
+ // dword slot in the source mask.
+ int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
+ IncomingInputs[1] - SourceOffset};
+
+ // If there is a free slot in the source half mask adjacent to one of
+ // the inputs, place the other input in it. We use (Index XOR 1) to
+ // compute an adjacent index.
+ if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
+ SourceHalfMask[InputsFixed[0] ^ 1] == -1) {
+ SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
+ SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
+ InputsFixed[1] = InputsFixed[0] ^ 1;
+ } else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
+ SourceHalfMask[InputsFixed[1] ^ 1] == -1) {
+ SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
+ SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
+ InputsFixed[0] = InputsFixed[1] ^ 1;
+ } else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] == -1 &&
+ SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] == -1) {
+ // The two inputs are in the same DWord but it is clobbered and the
+ // adjacent DWord isn't used at all. Move both inputs to the free
+ // slot.
+ SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
+ SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
+ InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
+ InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
+ } else {
+ // The only way we hit this point is if there is no clobbering
+ // (because there are no off-half inputs to this half) and there is no
+ // free slot adjacent to one of the inputs. In this case, we have to
+ // swap an input with a non-input.
+ for (int i = 0; i < 4; ++i)
+ assert((SourceHalfMask[i] == -1 || SourceHalfMask[i] == i) &&
+ "We can't handle any clobbers here!");
+ assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
+ "Cannot have adjacent inputs here!");
+
+ SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
+ SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;
+
+ // We also have to update the final source mask in this case because
+ // it may need to undo the above swap.
+ for (int &M : FinalSourceHalfMask)
+ if (M == (InputsFixed[0] ^ 1) + SourceOffset)
+ M = InputsFixed[1] + SourceOffset;
+ else if (M == InputsFixed[1] + SourceOffset)
+ M = (InputsFixed[0] ^ 1) + SourceOffset;
+
+ InputsFixed[1] = InputsFixed[0] ^ 1;
+ }
+
+ // Point everything at the fixed inputs.
for (int &M : HalfMask)
if (M == IncomingInputs[0])
- M = SourceDWordBase + SourceOffset;
+ M = InputsFixed[0] + SourceOffset;
else if (M == IncomingInputs[1])
- M = SourceDWordBase + 1 + SourceOffset;
- IncomingInputs[0] = SourceDWordBase + SourceOffset;
- IncomingInputs[1] = SourceDWordBase + 1 + SourceOffset;
+ M = InputsFixed[1] + SourceOffset;
+
+ IncomingInputs[0] = InputsFixed[0] + SourceOffset;
+ IncomingInputs[1] = InputsFixed[1] + SourceOffset;
}
} else {
llvm_unreachable("Unhandled input size!");
@@ -7383,13 +8944,14 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2;
assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free");
PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
- for (int Input : IncomingInputs)
- std::replace(HalfMask.begin(), HalfMask.end(), Input,
- FreeDWord * 2 + Input % 2);
+ for (int &M : HalfMask)
+ for (int Input : IncomingInputs)
+ if (M == Input)
+ M = FreeDWord * 2 + Input % 2;
};
- moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask,
+ moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
/*SourceOffset*/ 4, /*DestOffset*/ 0);
- moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask,
+ moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
/*SourceOffset*/ 0, /*DestOffset*/ 4);
// Now enact all the shuffles we've computed to move the inputs into their
@@ -7526,34 +9088,37 @@ static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1,
if (GoodInputs.size() == 2) {
// If the low inputs are spread across two dwords, pack them into
// a single dword.
- MoveMask[Mask[GoodInputs[0]] % 2 + MoveOffset] =
- Mask[GoodInputs[0]] - MaskOffset;
- MoveMask[Mask[GoodInputs[1]] % 2 + MoveOffset] =
- Mask[GoodInputs[1]] - MaskOffset;
- Mask[GoodInputs[0]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
- Mask[GoodInputs[1]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
+ MoveMask[MoveOffset] = Mask[GoodInputs[0]] - MaskOffset;
+ MoveMask[MoveOffset + 1] = Mask[GoodInputs[1]] - MaskOffset;
+ Mask[GoodInputs[0]] = MoveOffset + MaskOffset;
+ Mask[GoodInputs[1]] = MoveOffset + 1 + MaskOffset;
} else {
- // Otherwise pin the low inputs.
+ // Otherwise pin the good inputs.
for (int GoodInput : GoodInputs)
MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset;
}
- int MoveMaskIdx =
- std::find(std::begin(MoveMask) + MoveOffset, std::end(MoveMask), -1) -
- std::begin(MoveMask);
- assert(MoveMaskIdx >= MoveOffset && "Established above");
-
if (BadInputs.size() == 2) {
+ // If we have two bad inputs then there may be either one or two good
+ // inputs fixed in place. Find a fixed input, and then find the *other*
+ // two adjacent indices by using modular arithmetic.
+ int GoodMaskIdx =
+ std::find_if(std::begin(MoveMask) + MoveOffset, std::end(MoveMask),
+ [](int M) { return M >= 0; }) -
+ std::begin(MoveMask);
+ int MoveMaskIdx =
+ ((((GoodMaskIdx - MoveOffset) & ~1) + 2) % 4) + MoveOffset;
assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot");
assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot");
- MoveMask[MoveMaskIdx + Mask[BadInputs[0]] % 2] =
- Mask[BadInputs[0]] - MaskOffset;
- MoveMask[MoveMaskIdx + Mask[BadInputs[1]] % 2] =
- Mask[BadInputs[1]] - MaskOffset;
- Mask[BadInputs[0]] = MoveMaskIdx + Mask[BadInputs[0]] % 2 + MaskOffset;
- Mask[BadInputs[1]] = MoveMaskIdx + Mask[BadInputs[1]] % 2 + MaskOffset;
+ MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
+ MoveMask[MoveMaskIdx + 1] = Mask[BadInputs[1]] - MaskOffset;
+ Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
+ Mask[BadInputs[1]] = MoveMaskIdx + 1 + MaskOffset;
} else {
assert(BadInputs.size() == 1 && "All sizes handled");
+ int MoveMaskIdx = std::find(std::begin(MoveMask) + MoveOffset,
+ std::end(MoveMask), -1) -
+ std::begin(MoveMask);
MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
}
@@ -7609,6 +9174,12 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ // Whenever we can lower this as a zext, that instruction is strictly faster
+ // than any alternative.
+ if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ DL, MVT::v8i16, V1, V2, OrigMask, Subtarget, DAG))
+ return ZExt;
+
auto isV1 = [](int M) { return M >= 0 && M < 8; };
auto isV2 = [](int M) { return M >= 8; };
@@ -7621,6 +9192,33 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
"to be V1-input shuffles.");
+ // There are special ways we can lower some single-element blends.
+ if (NumV2Inputs == 1)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v8i16, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
+ if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+
+ if (Subtarget->hasSSE41())
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+ return Rotate;
+
if (NumV1Inputs + NumV2Inputs <= 4)
return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
@@ -7664,6 +9262,74 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV));
}
+/// \brief Check whether a compaction lowering can be done by dropping even
+/// elements and compute how many times even elements must be dropped.
+///
+/// This handles shuffles which take every Nth element where N is a power of
+/// two. Example shuffle masks:
+///
+/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
+/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
+/// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28
+/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
+/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
+///
+/// Any of these lanes can of course be undef.
+///
+/// This routine only supports N <= 3.
+/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
+/// for larger N.
+///
+/// \returns N above, or the number of times even elements must be dropped if
+/// there is such a number. Otherwise returns zero.
+static int canLowerByDroppingEvenElements(ArrayRef<int> Mask) {
+ // Figure out whether we're looping over two inputs or just one.
+ bool IsSingleInput = isSingleInputShuffleMask(Mask);
+
+ // The modulus for the shuffle vector entries is based on whether this is
+ // a single input or not.
+ int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
+ assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
+ "We should only be called with masks with a power-of-2 size!");
+
+ uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
+
+ // We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
+ // and 2^3 simultaneously. This is because we may have ambiguity with
+ // partially undef inputs.
+ bool ViableForN[3] = {true, true, true};
+
+ for (int i = 0, e = Mask.size(); i < e; ++i) {
+ // Ignore undef lanes, we'll optimistically collapse them to the pattern we
+ // want.
+ if (Mask[i] == -1)
+ continue;
+
+ bool IsAnyViable = false;
+ for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+ if (ViableForN[j]) {
+ uint64_t N = j + 1;
+
+ // The shuffle mask must be equal to (i * 2^N) % M.
+ if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
+ IsAnyViable = true;
+ else
+ ViableForN[j] = false;
+ }
+ // Early exit if we exhaust the possible powers of two.
+ if (!IsAnyViable)
+ break;
+ }
+
+ for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+ if (ViableForN[j])
+ return j + 1;
+
+ // Return 0 as there is no viable power of two.
+ return 0;
+}
+
/// \brief Generic lowering of v16i8 shuffles.
///
/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
@@ -7681,6 +9347,22 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> OrigMask = SVOp->getMask();
assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+
+ // Try to use byte shift instructions.
+ if (SDValue Shift = lowerVectorShuffleAsByteShift(
+ DL, MVT::v16i8, V1, V2, OrigMask, DAG))
+ return Shift;
+
+ // Try to use byte rotation instructions.
+ if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
+ DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
+ return Rotate;
+
+ // Try to use a zext lowering.
+ if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
+ return ZExt;
+
int MaskStorage[16] = {
OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7],
@@ -7690,8 +9372,16 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MutableArrayRef<int> LoMask = Mask.slice(0, 8);
MutableArrayRef<int> HiMask = Mask.slice(8, 8);
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
+
// For single-input shuffles, there are some nicer lowering tricks we can use.
- if (isSingleInputShuffleMask(Mask)) {
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i8, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
// Notably, this handles splat and partial-splat shuffles more efficiently.
// However, it only makes sense if the pre-duplication shuffle simplifies
@@ -7701,10 +9391,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// FIXME: We should check for other patterns which can be widened into an
// i16 shuffle as well.
auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
- for (int i = 0; i < 16; i += 2) {
- if (Mask[i] != Mask[i + 1])
+ for (int i = 0; i < 16; i += 2)
+ if (Mask[i] != -1 && Mask[i + 1] != -1 && Mask[i] != Mask[i + 1])
return false;
- }
+
return true;
};
auto tryToWidenViaDuplication = [&]() -> SDValue {
@@ -7765,11 +9455,16 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MVT::v16i8, V1, V1);
int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
- for (int i = 0; i < 16; i += 2) {
- if (Mask[i] != -1)
- PostDupI16Shuffle[i / 2] = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
- assert(PostDupI16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!");
- }
+ for (int i = 0; i < 16; ++i)
+ if (Mask[i] != -1) {
+ int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
+ assert(MappedMask < 8 && "Invalid v8 shuffle mask!");
+ if (PostDupI16Shuffle[i / 2] == -1)
+ PostDupI16Shuffle[i / 2] = MappedMask;
+ else
+ assert(PostDupI16Shuffle[i / 2] == MappedMask &&
+ "Conflicting entrties in the original shuffle!");
+ }
return DAG.getNode(
ISD::BITCAST, DL, MVT::v16i8,
DAG.getVectorShuffle(MVT::v8i16, DL,
@@ -7786,21 +9481,108 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
//
// FIXME: We need to handle other interleaving widths (i16, i32, ...).
if (shouldLowerAsInterleaving(Mask)) {
- // FIXME: Figure out whether we should pack these into the low or high
- // halves.
-
- int EMask[16], OMask[16];
+ int NumLoHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 0 && M < 8) || (M >= 16 && M < 24);
+ });
+ int NumHiHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
+ return (M >= 8 && M < 16) || M >= 24;
+ });
+ int EMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ int OMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+ bool UnpackLo = NumLoHalf >= NumHiHalf;
+ MutableArrayRef<int> TargetEMask(UnpackLo ? EMask : EMask + 8, 8);
+ MutableArrayRef<int> TargetOMask(UnpackLo ? OMask : OMask + 8, 8);
for (int i = 0; i < 8; ++i) {
- EMask[i] = Mask[2*i];
- OMask[i] = Mask[2*i + 1];
- EMask[i + 8] = -1;
- OMask[i + 8] = -1;
+ TargetEMask[i] = Mask[2 * i];
+ TargetOMask[i] = Mask[2 * i + 1];
}
SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
+ return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ MVT::v16i8, Evens, Odds);
+ }
+
+ // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
+ // with PSHUFB. It is important to do this before we attempt to generate any
+ // blends but after all of the single-input lowerings. If the single input
+ // lowerings can find an instruction sequence that is faster than a PSHUFB, we
+ // want to preserve that and we can DAG combine any longer sequences into
+ // a PSHUFB in the end. But once we start blending from multiple inputs,
+ // the complexity of DAG combining bad patterns back into PSHUFB is too high,
+ // and there are *very* few patterns that would actually be faster than the
+ // PSHUFB approach because of its ability to zero lanes.
+ //
+ // FIXME: The only exceptions to the above are blends which are exact
+ // interleavings with direct instructions supporting them. We currently don't
+ // handle those well here.
+ if (Subtarget->hasSSSE3()) {
+ SDValue V1Mask[16];
+ SDValue V2Mask[16];
+ for (int i = 0; i < 16; ++i)
+ if (Mask[i] == -1) {
+ V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
+ } else {
+ V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8);
+ V2Mask[i] =
+ DAG.getConstant(Mask[i] < 16 ? 0x80 : Mask[i] - 16, MVT::i8);
+ }
+ V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
+ if (isSingleInputShuffleMask(Mask))
+ return V1; // Single inputs are easy.
+
+ // Otherwise, blend the two.
+ V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
+ return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
+ }
+
+ // There are special ways we can lower some single-element blends.
+ if (NumV2Elements == 1)
+ if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v16i8, DL, V1, V2,
+ Mask, Subtarget, DAG))
+ return V;
+
+ // Check whether a compaction lowering can be done. This handles shuffles
+ // which take every Nth element for some even N. See the helper function for
+ // details.
+ //
+ // We special case these as they can be particularly efficiently handled with
+ // the PACKUSB instruction on x86 and they show up in common patterns of
+ // rearranging bytes to truncate wide elements.
+ if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask)) {
+ // NumEvenDrops is the power of two stride of the elements. Another way of
+ // thinking about it is that we need to drop the even elements this many
+ // times to get the original input.
+ bool IsSingleInput = isSingleInputShuffleMask(Mask);
+
+ // First we need to zero all the dropped bytes.
+ assert(NumEvenDrops <= 3 &&
+ "No support for dropping even elements more than 3 times.");
+ // We use the mask type to pick which bytes are preserved based on how many
+ // elements are dropped.
+ MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
+ SDValue ByteClearMask =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v16i8,
+ DAG.getConstant(0xFF, MaskVTs[NumEvenDrops - 1]));
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
+ if (!IsSingleInput)
+ V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
+
+ // Now pack things back together.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
+ V2 = IsSingleInput ? V1 : DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
+ SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
+ for (int i = 1; i < NumEvenDrops; ++i) {
+ Result = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Result);
+ Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
+ }
+
+ return Result;
}
int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
@@ -7899,15 +9681,933 @@ static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
-/// \brief Tiny helper function to test whether adjacent masks are sequential.
-static bool areAdjacentMasksSequential(ArrayRef<int> Mask) {
- for (int i = 0, Size = Mask.size(); i < Size; i += 2)
- if (Mask[i] + 1 != Mask[i+1])
+/// \brief Helper function to test whether a shuffle mask could be
+/// simplified by widening the elements being shuffled.
+///
+/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
+/// leaves it in an unspecified state.
+///
+/// NOTE: This must handle normal vector shuffle masks and *target* vector
+/// shuffle masks. The latter have the special property of a '-2' representing
+/// a zero-ed lane of a vector.
+static bool canWidenShuffleElements(ArrayRef<int> Mask,
+ SmallVectorImpl<int> &WidenedMask) {
+ for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
+ // If both elements are undef, its trivial.
+ if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
+ WidenedMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+
+ // Check for an undef mask and a mask value properly aligned to fit with
+ // a pair of values. If we find such a case, use the non-undef mask's value.
+ if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 && Mask[i + 1] % 2 == 1) {
+ WidenedMask.push_back(Mask[i + 1] / 2);
+ continue;
+ }
+ if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
+ WidenedMask.push_back(Mask[i] / 2);
+ continue;
+ }
+
+ // When zeroing, we need to spread the zeroing across both lanes to widen.
+ if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
+ if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
+ (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
+ WidenedMask.push_back(SM_SentinelZero);
+ continue;
+ }
return false;
+ }
+
+ // Finally check if the two mask values are adjacent and aligned with
+ // a pair.
+ if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) {
+ WidenedMask.push_back(Mask[i] / 2);
+ continue;
+ }
+
+ // Otherwise we can't safely widen the elements used in this shuffle.
+ return false;
+ }
+ assert(WidenedMask.size() == Mask.size() / 2 &&
+ "Incorrect size of mask after widening the elements!");
return true;
}
+/// \brief Generic routine to split ector shuffle into half-sized shuffles.
+///
+/// This routine just extracts two subvectors, shuffles them independently, and
+/// then concatenates them back together. This should work effectively with all
+/// AVX vector shuffle types.
+static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(VT.getSizeInBits() >= 256 &&
+ "Only for 256-bit or wider vector shuffles!");
+ assert(V1.getSimpleValueType() == VT && "Bad operand type!");
+ assert(V2.getSimpleValueType() == VT && "Bad operand type!");
+
+ ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
+ ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);
+
+ int NumElements = VT.getVectorNumElements();
+ int SplitNumElements = NumElements / 2;
+ MVT ScalarVT = VT.getScalarType();
+ MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
+
+ SDValue LoV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
+ DAG.getIntPtrConstant(SplitNumElements));
+ SDValue LoV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
+ DAG.getIntPtrConstant(SplitNumElements));
+
+ // Now create two 4-way blends of these half-width vectors.
+ auto HalfBlend = [&](ArrayRef<int> HalfMask) {
+ bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false;
+ SmallVector<int, 32> V1BlendMask, V2BlendMask, BlendMask;
+ for (int i = 0; i < SplitNumElements; ++i) {
+ int M = HalfMask[i];
+ if (M >= NumElements) {
+ if (M >= NumElements + SplitNumElements)
+ UseHiV2 = true;
+ else
+ UseLoV2 = true;
+ V2BlendMask.push_back(M - NumElements);
+ V1BlendMask.push_back(-1);
+ BlendMask.push_back(SplitNumElements + i);
+ } else if (M >= 0) {
+ if (M >= SplitNumElements)
+ UseHiV1 = true;
+ else
+ UseLoV1 = true;
+ V2BlendMask.push_back(-1);
+ V1BlendMask.push_back(M);
+ BlendMask.push_back(i);
+ } else {
+ V2BlendMask.push_back(-1);
+ V1BlendMask.push_back(-1);
+ BlendMask.push_back(-1);
+ }
+ }
+
+ // Because the lowering happens after all combining takes place, we need to
+ // manually combine these blend masks as much as possible so that we create
+ // a minimal number of high-level vector shuffle nodes.
+
+ // First try just blending the halves of V1 or V2.
+ if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
+ return DAG.getUNDEF(SplitVT);
+ if (!UseLoV2 && !UseHiV2)
+ return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
+ if (!UseLoV1 && !UseHiV1)
+ return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
+
+ SDValue V1Blend, V2Blend;
+ if (UseLoV1 && UseHiV1) {
+ V1Blend =
+ DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
+ } else {
+ // We only use half of V1 so map the usage down into the final blend mask.
+ V1Blend = UseLoV1 ? LoV1 : HiV1;
+ for (int i = 0; i < SplitNumElements; ++i)
+ if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
+ BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
+ }
+ if (UseLoV2 && UseHiV2) {
+ V2Blend =
+ DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
+ } else {
+ // We only use half of V2 so map the usage down into the final blend mask.
+ V2Blend = UseLoV2 ? LoV2 : HiV2;
+ for (int i = 0; i < SplitNumElements; ++i)
+ if (BlendMask[i] >= SplitNumElements)
+ BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
+ }
+ return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask);
+ };
+ SDValue Lo = HalfBlend(LoMask);
+ SDValue Hi = HalfBlend(HiMask);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+}
+
+/// \brief Either split a vector in halves or decompose the shuffles and the
+/// blend.
+///
+/// This is provided as a good fallback for many lowerings of non-single-input
+/// shuffles with more than one 128-bit lane. In those cases, we want to select
+/// between splitting the shuffle into 128-bit components and stitching those
+/// back together vs. extracting the single-input shuffles and blending those
+/// results.
+static SDValue lowerVectorShuffleAsSplitOrBlend(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(!isSingleInputShuffleMask(Mask) && "This routine must not be used to "
+ "lower single-input shuffles as it "
+ "could then recurse on itself.");
+ int Size = Mask.size();
+
+ // If this can be modeled as a broadcast of two elements followed by a blend,
+ // prefer that lowering. This is especially important because broadcasts can
+ // often fold with memory operands.
+ auto DoBothBroadcast = [&] {
+ int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
+ for (int M : Mask)
+ if (M >= Size) {
+ if (V2BroadcastIdx == -1)
+ V2BroadcastIdx = M - Size;
+ else if (M - Size != V2BroadcastIdx)
+ return false;
+ } else if (M >= 0) {
+ if (V1BroadcastIdx == -1)
+ V1BroadcastIdx = M;
+ else if (M != V1BroadcastIdx)
+ return false;
+ }
+ return true;
+ };
+ if (DoBothBroadcast())
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
+ DAG);
+
+ // If the inputs all stem from a single 128-bit lane of each input, then we
+ // split them rather than blending because the split will decompose to
+ // unusually few instructions.
+ int LaneCount = VT.getSizeInBits() / 128;
+ int LaneSize = Size / LaneCount;
+ SmallBitVector LaneInputs[2];
+ LaneInputs[0].resize(LaneCount, false);
+ LaneInputs[1].resize(LaneCount, false);
+ for (int i = 0; i < Size; ++i)
+ if (Mask[i] >= 0)
+ LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
+ if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+
+ // Otherwise, just fall back to decomposed shuffles and a blend. This requires
+ // that the decomposed single-input shuffles don't end up here.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
+}
+
+/// \brief Lower a vector shuffle crossing multiple 128-bit lanes as
+/// a permutation and blend of those lanes.
+///
+/// This essentially blends the out-of-lane inputs to each lane into the lane
+/// from a permuted copy of the vector. This lowering strategy results in four
+/// instructions in the worst case for a single-input cross lane shuffle which
+/// is lower than any other fully general cross-lane shuffle strategy I'm aware
+/// of. Special cases for each particular shuffle pattern should be handled
+/// prior to trying this lowering.
+static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ // FIXME: This should probably be generalized for 512-bit vectors as well.
+ assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
+ int LaneSize = Mask.size() / 2;
+
+ // If there are only inputs from one 128-bit lane, splitting will in fact be
+ // less expensive. The flags track wether the given lane contains an element
+ // that crosses to another lane.
+ bool LaneCrossing[2] = {false, false};
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
+ LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
+ if (!LaneCrossing[0] || !LaneCrossing[1])
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ SmallVector<int, 32> FlippedBlendMask;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ FlippedBlendMask.push_back(
+ Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
+ ? Mask[i]
+ : Mask[i] % LaneSize +
+ (i / LaneSize) * LaneSize + Size));
+
+ // Flip the vector, and blend the results which should now be in-lane. The
+ // VPERM2X128 mask uses the low 2 bits for the low source and bits 4 and
+ // 5 for the high source. The value 3 selects the high half of source 2 and
+ // the value 2 selects the low half of source 2. We only use source 2 to
+ // allow folding it into a memory operand.
+ unsigned PERMMask = 3 | 2 << 4;
+ SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT),
+ V1, DAG.getConstant(PERMMask, MVT::i8));
+ return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
+ }
+
+ // This now reduces to two single-input shuffles of V1 and V2 which at worst
+ // will be handled by the above logic and a blend of the results, much like
+ // other patterns in AVX.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering 2-lane 128-bit shuffles.
+static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ // Blends are faster and handle all the non-lane-crossing cases.
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+ // Check for patterns which can be matched with a single insert of a 128-bit
+ // subvector.
+ if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
+ isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+ Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+ if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
+ DAG.getIntPtrConstant(2));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+
+ // Otherwise form a 128-bit permutation.
+ // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
+ unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
+ return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
+ DAG.getConstant(PermMask, MVT::i8));
+}
+
+/// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
+///
+/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
+/// isn't available.
+static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ SmallVector<int, 4> WidenedMask;
+ if (canWidenShuffleElements(Mask, WidenedMask))
+ return lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, Subtarget,
+ DAG);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f64, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
+ // Non-half-crossing single input shuffles can be lowerid with an
+ // interleaved permutation.
+ unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
+ ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
+ DAG.getConstant(VPERMILPMask, MVT::i8));
+ }
+
+ // With AVX2 we have direct support for this permutation.
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // Otherwise, fall back.
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
+ DAG);
+ }
+
+ // X86 has dedicated unpack instructions that can handle specific blend
+ // operations: UNPCKH and UNPCKL.
+ if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
+
+ // If we have a single input to the zero element, insert that into V1 if we
+ // can do so cheaply.
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+ if (NumV2Elements == 1 && Mask[0] >= 4)
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ MVT::v4f64, DL, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check if the blend happens to exactly fit that of SHUFPD.
+ if ((Mask[0] == -1 || Mask[0] < 2) &&
+ (Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) &&
+ (Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) &&
+ (Mask[3] == -1 || Mask[3] >= 6)) {
+ unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) |
+ ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3);
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+ if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) &&
+ (Mask[1] == -1 || Mask[1] < 2) &&
+ (Mask[2] == -1 || Mask[2] >= 6) &&
+ (Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) {
+ unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) |
+ ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3);
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+
+ // If we have AVX2 then we always want to lower with a blend because an v4 we
+ // can fully permute the elements.
+ if (Subtarget->hasAVX2())
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
+ Mask, DAG);
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 4-lane 64-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v4i64 shuffling..
+static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v4i64 with AVX2!");
+
+ SmallVector<int, 4> WidenedMask;
+ if (canWidenShuffleElements(Mask, WidenedMask))
+ return lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, Subtarget,
+ DAG);
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i64, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // When the shuffle is mirrored between the 128-bit lanes of the unit, we can
+ // use lower latency instructions that will operate on both 128-bit lanes.
+ SmallVector<int, 2> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
+ if (isSingleInputShuffleMask(Mask)) {
+ int PSHUFDMask[] = {-1, -1, -1, -1};
+ for (int i = 0; i < 2; ++i)
+ if (RepeatedMask[i] >= 0) {
+ PSHUFDMask[2 * i] = 2 * RepeatedMask[i];
+ PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1;
+ }
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v4i64,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+ }
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
+ }
+
+ // AVX2 provides a direct instruction for permuting a single input across
+ // lanes.
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
+ Mask, DAG);
+}
+
+/// \brief Handle lowering of 8-lane 32-bit floating point shuffles.
+///
+/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
+/// isn't available.
+static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8f32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // If the shuffle mask is repeated in each 128-bit lane, we have many more
+ // options to efficiently lower the shuffle.
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
+ assert(RepeatedMask.size() == 4 &&
+ "Repeated masks must be half the mask width!");
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
+
+ // Otherwise, fall back to a SHUFPS sequence. Here it is important that we
+ // have already handled any direct blends. We also need to squash the
+ // repeated mask into a simulated v4f32 mask.
+ for (int i = 0; i < 4; ++i)
+ if (RepeatedMask[i] >= 8)
+ RepeatedMask[i] -= 4;
+ return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
+ }
+
+ // If we have a single input shuffle with different shuffle patterns in the
+ // two 128-bit lanes use the variable mask to VPERMILPS.
+ if (isSingleInputShuffleMask(Mask)) {
+ SDValue VPermMask[8];
+ for (int i = 0; i < 8; ++i)
+ VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
+ : DAG.getConstant(Mask[i], MVT::i32);
+ if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
+ return DAG.getNode(
+ X86ISD::VPERMILPV, DL, MVT::v8f32, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
+
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8f32,
+ DAG.getNode(ISD::BUILD_VECTOR, DL,
+ MVT::v8i32, VPermMask)),
+ V1);
+
+ // Otherwise, fall back.
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
+ DAG);
+ }
+
+ // If we have AVX2 then we always want to lower with a blend because at v8 we
+ // can fully permute the elements.
+ if (Subtarget->hasAVX2())
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
+ Mask, DAG);
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 8-lane 32-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v8i32 shuffling..
+static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!");
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i32, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // If the shuffle mask is repeated in each 128-bit lane we can use more
+ // efficient instructions that mirror the shuffles across the two 128-bit
+ // lanes.
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) {
+ assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
+ getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
+ }
+
+ // If the shuffle patterns aren't repeated but it is a single input, directly
+ // generate a cross-lane VPERMD instruction.
+ if (isSingleInputShuffleMask(Mask)) {
+ SDValue VPermMask[8];
+ for (int i = 0; i < 8; ++i)
+ VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
+ : DAG.getConstant(Mask[i], MVT::i32);
+ return DAG.getNode(
+ X86ISD::VPERMV, DL, MVT::v8i32,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
+ }
+
+ // Otherwise fall back on generic blend lowering.
+ return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
+ Mask, DAG);
+}
+
+/// \brief Handle lowering of 16-lane 16-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v16i16 shuffling..
+static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!");
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i16, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // There are no generalized cross-lane shuffle operations available on i16
+ // element types.
+ if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
+ Mask, DAG);
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask,
+ // First 128-bit lane:
+ 0, 16, 1, 17, 2, 18, 3, 19,
+ // Second 128-bit lane:
+ 8, 24, 9, 25, 10, 26, 11, 27))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
+ if (isShuffleEquivalent(Mask,
+ // First 128-bit lane:
+ 4, 20, 5, 21, 6, 22, 7, 23,
+ // Second 128-bit lane:
+ 12, 28, 13, 29, 14, 30, 15, 31))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ SDValue PSHUFBMask[32];
+ for (int i = 0; i < 16; ++i) {
+ if (Mask[i] == -1) {
+ PSHUFBMask[2 * i] = PSHUFBMask[2 * i + 1] = DAG.getUNDEF(MVT::i8);
+ continue;
+ }
+
+ int M = i < 8 ? Mask[i] : Mask[i] - 8;
+ assert(M >= 0 && M < 8 && "Invalid single-input mask!");
+ PSHUFBMask[2 * i] = DAG.getConstant(2 * M, MVT::i8);
+ PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, MVT::i8);
+ }
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v16i16,
+ DAG.getNode(
+ X86ISD::PSHUFB, DL, MVT::v32i8,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1),
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
+ }
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 32-lane 8-bit integer shuffles.
+///
+/// This routine is only called when we have AVX2 and thus a reasonable
+/// instruction set for v32i8 shuffling..
+static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
+ assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!");
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v32i8, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // There are no generalized cross-lane shuffle operations available on i8
+ // element types.
+ if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
+ return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2,
+ Mask, DAG);
+
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
+ Subtarget, DAG))
+ return Blend;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ // Note that these are repeated 128-bit lane unpacks, not unpacks across all
+ // 256-bit lanes.
+ if (isShuffleEquivalent(
+ Mask,
+ // First 128-bit lane:
+ 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
+ // Second 128-bit lane:
+ 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
+ if (isShuffleEquivalent(
+ Mask,
+ // First 128-bit lane:
+ 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
+ // Second 128-bit lane:
+ 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
+
+ if (isSingleInputShuffleMask(Mask)) {
+ SDValue PSHUFBMask[32];
+ for (int i = 0; i < 32; ++i)
+ PSHUFBMask[i] =
+ Mask[i] < 0
+ ? DAG.getUNDEF(MVT::i8)
+ : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, MVT::i8);
+
+ return DAG.getNode(
+ X86ISD::PSHUFB, DL, MVT::v32i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
+ }
+
+ // Otherwise fall back on generic lowering.
+ return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
+}
+
+/// \brief High-level routine to lower various 256-bit x86 vector shuffles.
+///
+/// This routine either breaks down the specific type of a 256-bit x86 vector
+/// shuffle or splits it into two 128-bit shuffles and fuses the results back
+/// together based on the available instructions.
+static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+
+ // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
+ // check for those subtargets here and avoid much of the subtarget querying in
+ // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
+ // ability to manipulate a 256-bit vector with integer types. Since we'll use
+ // floating point types there eventually, just immediately cast everything to
+ // a float and operate entirely in that domain.
+ if (VT.isInteger() && !Subtarget->hasAVX2()) {
+ int ElementBits = VT.getScalarSizeInBits();
+ if (ElementBits < 32)
+ // No floating point type available, decompose into 128-bit vectors.
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+
+ MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
+ VT.getVectorNumElements());
+ V1 = DAG.getNode(ISD::BITCAST, DL, FpVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, FpVT, V2);
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
+ }
+
+ switch (VT.SimpleTy) {
+ case MVT::v4f64:
+ return lowerV4F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v4i64:
+ return lowerV4I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8f32:
+ return lowerV8F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i32:
+ return lowerV8I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16i16:
+ return lowerV16I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v32i8:
+ return lowerV32I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
+
+ default:
+ llvm_unreachable("Not a valid 256-bit x86 vector type!");
+ }
+}
+
+/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
+static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
+static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
+static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
+static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
+static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
+ assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v32i16, V1, V2, Mask, DAG);
+}
+
+/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
+static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
+ assert(Subtarget->hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
+
+ // FIXME: Implement direct support for this type!
+ return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
+}
+
+/// \brief High-level routine to lower various 512-bit x86 vector shuffles.
+///
+/// This routine either breaks down the specific type of a 512-bit x86 vector
+/// shuffle or splits it into two 256-bit shuffles and fuses the results back
+/// together based on the available instructions.
+static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Subtarget->hasAVX512() &&
+ "Cannot lower 512-bit vectors w/ basic ISA!");
+
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(VT.SimpleTy, DL, V1,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+
+ // Dispatch to each element type for lowering. If we don't have supprot for
+ // specific element type shuffles at 512 bits, immediately split them and
+ // lower them. Each lowering routine of a given type is allowed to assume that
+ // the requisite ISA extensions for that element type are available.
+ switch (VT.SimpleTy) {
+ case MVT::v8f64:
+ return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16f32:
+ return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i64:
+ return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16i32:
+ return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v32i16:
+ if (Subtarget->hasBWI())
+ return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ break;
+ case MVT::v64i8:
+ if (Subtarget->hasBWI())
+ return lowerV64I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ break;
+
+ default:
+ llvm_unreachable("Not a valid 512-bit x86 vector type!");
+ }
+
+ // Otherwise fall back on splitting.
+ return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+}
+
/// \brief Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
@@ -7936,7 +10636,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// but in some cases the first operand may be transformed to UNDEF.
// In this case we should just commute the node.
if (V1IsUndef)
- return CommuteVectorShuffle(SVOp, DAG);
+ return DAG.getCommutedVectorShuffle(*SVOp);
// Check for non-undef masks pointing at an undef vector and make the masks
// undef as well. This makes it easier to match the shuffle based solely on
@@ -7951,22 +10651,25 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
}
- // For integer vector shuffles, try to collapse them into a shuffle of fewer
- // lanes but wider integers. We cap this to not form integers larger than i64
- // but it might be interesting to form i128 integers to handle flipping the
- // low and high halves of AVX 256-bit vectors.
- if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
- areAdjacentMasksSequential(Mask)) {
- SmallVector<int, 8> NewMask;
- for (int i = 0, Size = Mask.size(); i < Size; i += 2)
- NewMask.push_back(Mask[i] / 2);
- MVT NewVT =
- MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2),
- VT.getVectorNumElements() / 2);
- V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask));
+ // Try to collapse shuffles into using a vector type with fewer elements but
+ // wider element types. We cap this to not form integers or floating point
+ // elements wider than 64 bits, but it might be interesting to form i128
+ // integers to handle flipping the low and high halves of AVX 256-bit vectors.
+ SmallVector<int, 16> WidenedMask;
+ if (VT.getScalarSizeInBits() < 64 &&
+ canWidenShuffleElements(Mask, WidenedMask)) {
+ MVT NewEltVT = VT.isFloatingPoint()
+ ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
+ : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
+ MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
+ // Make sure that the new vector type is legal. For example, v2f64 isn't
+ // legal on SSE1.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
+ }
}
int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
@@ -7982,10 +10685,12 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// V2. This allows us to match the shuffle pattern strictly on how many
// elements come from V1 without handling the symmetric cases.
if (NumV2Elements > NumV1Elements)
- return CommuteVectorShuffle(SVOp, DAG);
+ return DAG.getCommutedVectorShuffle(*SVOp);
// When the number of V1 and V2 elements are the same, try to minimize the
- // number of uses of V2 in the low half of the vector.
+ // number of uses of V2 in the low half of the vector. When that is tied,
+ // ensure that the sum of indices for V1 is equal to or lower than the sum
+ // indices for V2.
if (NumV1Elements == NumV2Elements) {
int LowV1Elements = 0, LowV2Elements = 0;
for (int M : SVOp->getMask().slice(0, NumElements / 2))
@@ -7993,14 +10698,32 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
++LowV2Elements;
else if (M >= 0)
++LowV1Elements;
- if (LowV2Elements > LowV1Elements)
- return CommuteVectorShuffle(SVOp, DAG);
+ if (LowV2Elements > LowV1Elements) {
+ return DAG.getCommutedVectorShuffle(*SVOp);
+ } else if (LowV2Elements == LowV1Elements) {
+ int SumV1Indices = 0, SumV2Indices = 0;
+ for (int i = 0, Size = SVOp->getMask().size(); i < Size; ++i)
+ if (SVOp->getMask()[i] >= NumElements)
+ SumV2Indices += i;
+ else if (SVOp->getMask()[i] >= 0)
+ SumV1Indices += i;
+ if (SumV2Indices < SumV1Indices)
+ return DAG.getCommutedVectorShuffle(*SVOp);
+ }
}
// For each vector width, delegate to a specialized lowering routine.
if (VT.getSizeInBits() == 128)
return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+ if (VT.getSizeInBits() == 256)
+ return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+
+ // Force AVX-512 vectors to be scalarized for now.
+ // FIXME: Implement AVX-512 support!
+ if (VT.getSizeInBits() == 512)
+ return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+
llvm_unreachable("Unimplemented!");
}
@@ -9060,6 +11783,20 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
To = V2;
DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
Mask.begin();
+
+ // If we have 1 element from each vector, we have to check if we're
+ // changing V1's element's place. If so, we're done. Otherwise, we
+ // should assume we're changing V2's element's place and behave
+ // accordingly.
+ int FromV2 = std::count_if(Mask.begin(), Mask.end(), FromV2Predicate);
+ assert(DestIndex <= INT32_MAX && "truncated destination index");
+ if (FromV1 == FromV2 &&
+ static_cast<int>(DestIndex) == Mask[DestIndex] % 4) {
+ From = V2;
+ To = V1;
+ DestIndex =
+ std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
+ }
} else {
assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
"More than one element from V1 and from V2, or no elements from one "
@@ -9071,6 +11808,8 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
}
+ // Get an index into the source vector in the range [0,4) (the mask is
+ // in the range [0,8) because it can address V1 and V2)
unsigned SrcIndex = Mask[DestIndex] % 4;
if (MayFoldLoad(From)) {
// Trivial case, when From comes from a load and is only used by the
@@ -9155,37 +11894,6 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
- // Simplify the operand as it's prepared to be fed into shuffle.
- unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
- if (V1.getOpcode() == ISD::BITCAST &&
- V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
- V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- V1.getOperand(0).getOperand(0)
- .getSimpleValueType().getSizeInBits() == SignificantBits) {
- // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
- SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
- ConstantSDNode *CIdx =
- dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
- // If it's foldable, i.e. normal load with single use, we will let code
- // selection to fold it. Otherwise, we will short the conversion sequence.
- if (CIdx && CIdx->getZExtValue() == 0 &&
- (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
- MVT FullVT = V.getSimpleValueType();
- MVT V1VT = V1.getSimpleValueType();
- if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
- // The "ext_vec_elt" node is wider than the result node.
- // In this case we should extract subvector from V.
- // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
- unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
- MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
- FullVT.getVectorNumElements()/Ratio);
- V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
- DAG.getIntPtrConstant(0));
- }
- V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
- }
- }
-
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
@@ -9278,7 +11986,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// but in some cases the first operand may be transformed to UNDEF.
// In this case we should just commute the node.
if (V1IsUndef)
- return CommuteVectorShuffle(SVOp, DAG);
+ return DAG.getCommutedVectorShuffle(*SVOp);
// Vector shuffle lowering takes 3 steps:
//
@@ -9335,7 +12043,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
- return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
+ return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1, TargetMask,
DAG);
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
@@ -9347,6 +12055,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
getShufflePALIGNRImmediate(SVOp),
DAG);
+ if (isVALIGNMask(M, VT, Subtarget))
+ return getTargetShuffleNode(X86ISD::VALIGN, dl, VT, V1, V2,
+ getShuffleVALIGNImmediate(SVOp),
+ DAG);
+
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
@@ -9390,7 +12103,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ShouldXformToMOVHLPS(M, VT) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT))
- return CommuteVectorShuffle(SVOp, DAG);
+ return DAG.getCommutedVectorShuffle(*SVOp);
if (isShift) {
// No better options. Use a vshldq / vsrldq.
@@ -9462,7 +12175,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Normalize the node to match x86 shuffle ops if needed
if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true)))
- return CommuteVectorShuffle(SVOp, DAG);
+ return DAG.getCommutedVectorShuffle(*SVOp);
// The checks below are all present in isShuffleMaskLegal, but they are
// inlined here right now to enable us to directly emit target specific
@@ -9512,7 +12225,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
- return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+ return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
}
@@ -9631,9 +12344,10 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
return true;
}
-// Try to lower a vselect node into a simple blend instruction.
-static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+/// \brief Try to lower a VSELECT instruction to an immediate-controlled blend
+/// instruction.
+static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDValue Cond = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
@@ -9675,7 +12389,14 @@ static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget,
}
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
- SDValue BlendOp = LowerVSELECTtoBlend(Op, Subtarget, DAG);
+ // A vselect where all conditions and data are constants can be optimized into
+ // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
+ return SDValue();
+
+ SDValue BlendOp = lowerVSELECTtoBLENDI(Op, Subtarget, DAG);
if (BlendOp.getNode())
return BlendOp;
@@ -9688,6 +12409,8 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
break;
case MVT::v8i16:
case MVT::v16i16:
+ if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ break;
return SDValue();
}
@@ -9906,59 +12629,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
}
-static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- MVT EltVT = VT.getVectorElementType();
- SDLoc dl(Op);
-
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- SDValue N2 = Op.getOperand(2);
-
- if (!VT.is128BitVector())
- return SDValue();
-
- if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
- isa<ConstantSDNode>(N2)) {
- unsigned Opc;
- if (VT == MVT::v8i16)
- Opc = X86ISD::PINSRW;
- else if (VT == MVT::v16i8)
- Opc = X86ISD::PINSRB;
- else
- Opc = X86ISD::PINSRB;
-
- // Transform it so it match pinsr{b,w} which expects a GR32 as its second
- // argument.
- if (N1.getValueType() != MVT::i32)
- N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
- if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
- return DAG.getNode(Opc, dl, VT, N0, N1, N2);
- }
-
- if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
- // Bits [7:6] of the constant are the source select. This will always be
- // zero here. The DAG Combiner may combine an extract_elt index into these
- // bits. For example (insert (extract, 3), 2) could be matched by putting
- // the '3' into bits [7:6] of X86ISD::INSERTPS.
- // Bits [5:4] of the constant are the destination select. This is the
- // value of the incoming immediate.
- // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
- // combine either bitwise AND or insert of float 0.0 to set these bits.
- N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
- // Create this as a scalar to vector..
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
- return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
- }
-
- if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
- // PINSR* works with constant index.
- return Op;
- }
- return SDValue();
-}
-
/// Insert one bit to mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
SDValue
@@ -9993,11 +12663,12 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
-SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+
+SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
-
+
if (EltVT == MVT::i1)
return InsertBitToMaskVector(Op, DAG);
@@ -10005,20 +12676,20 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+ auto *N2C = cast<ConstantSDNode>(N2);
+ unsigned IdxVal = N2C->getZExtValue();
- // If this is a 256-bit vector result, first extract the 128-bit vector,
- // insert the element into the extracted half and then place it back.
+ // If the vector is wider than 128 bits, extract the 128-bit subvector, insert
+ // into that, and then insert the subvector back into the result.
if (VT.is256BitVector() || VT.is512BitVector()) {
- if (!isa<ConstantSDNode>(N2))
- return SDValue();
-
// Get the desired 128-bit vector half.
- unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
// Insert the element into the desired half.
- unsigned NumEltsIn128 = 128/EltVT.getSizeInBits();
- unsigned IdxIn128 = IdxVal - (IdxVal/NumEltsIn128) * NumEltsIn128;
+ unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
+ unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, MVT::i32));
@@ -10026,20 +12697,60 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
// Insert the changed part back to the 256-bit vector
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
}
+ assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
- if (Subtarget->hasSSE41())
- return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Subtarget->hasSSE41()) {
+ if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) {
+ unsigned Opc;
+ if (VT == MVT::v8i16) {
+ Opc = X86ISD::PINSRW;
+ } else {
+ assert(VT == MVT::v16i8);
+ Opc = X86ISD::PINSRB;
+ }
+
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(IdxVal);
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ }
+
+ if (EltVT == MVT::f32) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into
+ // these
+ // bits. For example (insert (extract, 3), 2) could be matched by
+ // putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(IdxVal << 4);
+ // Create this as a scalar to vector..
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ }
+
+ if (EltVT == MVT::i32 || EltVT == MVT::i64) {
+ // PINSR* works with constant index.
+ return Op;
+ }
+ }
if (EltVT == MVT::i8)
return SDValue();
- if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+ if (EltVT.getSizeInBits() == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ N2 = DAG.getIntPtrConstant(IdxVal);
return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
}
return SDValue();
@@ -10352,6 +13063,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MFI->setAdjustsStack(true);
+ MFI->setHasCalls(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -10585,7 +13297,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->is64Bit())
IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
IDX, MachinePointerInfo(), MVT::i32,
- false, false, 0);
+ false, false, false, 0);
else
IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
false, false, false, 0);
@@ -10669,10 +13381,18 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+ SDLoc dl(Op);
- if (SrcVT.isVector())
+ if (SrcVT.isVector()) {
+ if (SrcVT.getVectorElementType() == MVT::i1) {
+ MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
+ Op.getOperand(0)));
+ }
return SDValue();
-
+ }
+
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
@@ -10685,7 +13405,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Op;
}
- SDLoc dl(Op);
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
@@ -10872,19 +13591,135 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
return Sub;
}
+static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // The algorithm is the following:
+ // #ifdef __SSE4_1__
+ // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
+ // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
+ // (uint4) 0x53000000, 0xaa);
+ // #else
+ // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
+ // uint4 hi = (v >> 16) | (uint4) 0x53000000;
+ // #endif
+ // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
+ // return (float4) lo + fhi;
+
+ SDLoc DL(Op);
+ SDValue V = Op->getOperand(0);
+ EVT VecIntVT = V.getValueType();
+ bool Is128 = VecIntVT == MVT::v4i32;
+ EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
+ // If we convert to something else than the supported type, e.g., to v4f64,
+ // abort early.
+ if (VecFloatVT != Op->getValueType(0))
+ return SDValue();
+
+ unsigned NumElts = VecIntVT.getVectorNumElements();
+ assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
+ "Unsupported custom type");
+ assert(NumElts <= 8 && "The size of the constant array must be fixed");
+
+ // In the #idef/#else code, we have in common:
+ // - The vector of constants:
+ // -- 0x4b000000
+ // -- 0x53000000
+ // - A shift:
+ // -- v >> 16
+
+ // Create the splat vector for 0x4b000000.
+ SDValue CstLow = DAG.getConstant(0x4b000000, MVT::i32);
+ SDValue CstLowArray[] = {CstLow, CstLow, CstLow, CstLow,
+ CstLow, CstLow, CstLow, CstLow};
+ SDValue VecCstLow = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
+ makeArrayRef(&CstLowArray[0], NumElts));
+ // Create the splat vector for 0x53000000.
+ SDValue CstHigh = DAG.getConstant(0x53000000, MVT::i32);
+ SDValue CstHighArray[] = {CstHigh, CstHigh, CstHigh, CstHigh,
+ CstHigh, CstHigh, CstHigh, CstHigh};
+ SDValue VecCstHigh = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
+ makeArrayRef(&CstHighArray[0], NumElts));
+
+ // Create the right shift.
+ SDValue CstShift = DAG.getConstant(16, MVT::i32);
+ SDValue CstShiftArray[] = {CstShift, CstShift, CstShift, CstShift,
+ CstShift, CstShift, CstShift, CstShift};
+ SDValue VecCstShift = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
+ makeArrayRef(&CstShiftArray[0], NumElts));
+ SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift);
+
+ SDValue Low, High;
+ if (Subtarget.hasSSE41()) {
+ EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
+ // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
+ SDValue VecCstLowBitcast =
+ DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstLow);
+ SDValue VecBitcast = DAG.getNode(ISD::BITCAST, DL, VecI16VT, V);
+ // Low will be bitcasted right away, so do not bother bitcasting back to its
+ // original type.
+ Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
+ VecCstLowBitcast, DAG.getConstant(0xaa, MVT::i32));
+ // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
+ // (uint4) 0x53000000, 0xaa);
+ SDValue VecCstHighBitcast =
+ DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstHigh);
+ SDValue VecShiftBitcast =
+ DAG.getNode(ISD::BITCAST, DL, VecI16VT, HighShift);
+ // High will be bitcasted right away, so do not bother bitcasting back to
+ // its original type.
+ High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
+ VecCstHighBitcast, DAG.getConstant(0xaa, MVT::i32));
+ } else {
+ SDValue CstMask = DAG.getConstant(0xffff, MVT::i32);
+ SDValue VecCstMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, CstMask,
+ CstMask, CstMask, CstMask);
+ // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
+ SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask);
+ Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow);
+
+ // uint4 hi = (v >> 16) | (uint4) 0x53000000;
+ High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);
+ }
+
+ // Create the vector constant for -(0x1.0p39f + 0x1.0p23f).
+ SDValue CstFAdd = DAG.getConstantFP(
+ APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), MVT::f32);
+ SDValue CstFAddArray[] = {CstFAdd, CstFAdd, CstFAdd, CstFAdd,
+ CstFAdd, CstFAdd, CstFAdd, CstFAdd};
+ SDValue VecCstFAdd = DAG.getNode(ISD::BUILD_VECTOR, DL, VecFloatVT,
+ makeArrayRef(&CstFAddArray[0], NumElts));
+
+ // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
+ SDValue HighBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, High);
+ SDValue FHigh =
+ DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
+ // return (float4) lo + fhi;
+ SDValue LowBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, Low);
+ return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
+}
+
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
MVT SVT = N0.getSimpleValueType();
SDLoc dl(Op);
- assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
- SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
- "Custom UINT_TO_FP is not supported!");
-
- MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
- return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+ switch (SVT.SimpleTy) {
+ default:
+ llvm_unreachable("Custom UINT_TO_FP is not supported!");
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v8i8:
+ case MVT::v8i16: {
+ MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
+ }
+ case MVT::v4i32:
+ case MVT::v8i32:
+ return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
+ }
+ llvm_unreachable(nullptr);
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
@@ -10970,7 +13805,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// FIXME: Avoid the extend by constructing the right constant pool?
SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
FudgePtr, MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, 4);
+ MVT::f32, false, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
@@ -11184,12 +14019,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::i1) {
assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
"Invalid scalar TRUNCATE operation");
- if (InVT == MVT::i32)
+ if (InVT.getSizeInBits() >= 32)
return SDValue();
- if (InVT.getSizeInBits() == 64)
- In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::i32, In);
- else if (InVT.getSizeInBits() < 32)
- In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
+ In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
return DAG.getNode(ISD::TRUNCATE, DL, VT, In);
}
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
@@ -11367,58 +14199,47 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
In, DAG.getUNDEF(SVT)));
}
-static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
- LLVMContext *Context = DAG.getContext();
- SDLoc dl(Op);
- MVT VT = Op.getSimpleValueType();
- MVT EltVT = VT;
- unsigned NumElts = VT == MVT::f64 ? 2 : 4;
- if (VT.isVector()) {
- EltVT = VT.getVectorElementType();
- NumElts = VT.getVectorNumElements();
- }
- Constant *C;
- if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
- APInt(64, ~(1ULL << 63))));
- else
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
- APInt(32, ~(1U << 31))));
- C = ConstantVector::getSplat(NumElts, C);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
- if (VT.isVector()) {
- MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::AND, dl, ANDVT,
- DAG.getNode(ISD::BITCAST, dl, ANDVT,
- Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
- }
- return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
-}
+/// The only differences between FABS and FNEG are the mask and the logic op.
+/// FNEG also has a folding opportunity for FNEG(FABS(x)).
+static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
+ assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) &&
+ "Wrong opcode for lowering FABS or FNEG.");
+
+ bool IsFABS = (Op.getOpcode() == ISD::FABS);
+
+ // If this is a FABS and it has an FNEG user, bail out to fold the combination
+ // into an FNABS. We'll lower the FABS after that if it is still in use.
+ if (IsFABS)
+ for (SDNode *User : Op->uses())
+ if (User->getOpcode() == ISD::FNEG)
+ return Op;
+
+ SDValue Op0 = Op.getOperand(0);
+ bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
-static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) {
- LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ // Assume scalar op for initialization; update for vector if needed.
+ // Note that there are no scalar bitwise logical SSE/AVX instructions, so we
+ // generate a 16-byte vector constant and logic op even for the scalar case.
+ // Using a 16-byte mask allows folding the load of the mask with
+ // the logic op, so it can save (~4 bytes) on code size.
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
+ // decide if we should generate a 16-byte constant mask when we only need 4 or
+ // 8 bytes for the scalar case.
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
}
- Constant *C;
- if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
- APInt(64, 1ULL << 63)));
- else
- C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
- APInt(32, 1U << 31)));
+
+ unsigned EltBits = EltVT.getSizeInBits();
+ LLVMContext *Context = DAG.getContext();
+ // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
+ APInt MaskElt =
+ IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
+ Constant *C = ConstantInt::get(*Context, MaskElt);
C = ConstantVector::getSplat(NumElts, C);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
@@ -11426,16 +14247,24 @@ static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) {
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
+
if (VT.isVector()) {
- MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
+ // For a vector, cast operands to a vector type, perform the logic op,
+ // and cast the result back to the original value type.
+ MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+ SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
+ SDValue Operand = IsFNABS ?
+ DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) :
+ DAG.getNode(ISD::BITCAST, dl, VecVT, Op0);
+ unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::XOR, dl, XORVT,
- DAG.getNode(ISD::BITCAST, dl, XORVT,
- Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
+ DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
}
-
- return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+
+ // If not vector, then scalar.
+ unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
+ SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
+ return DAG.getNode(BitOp, dl, VT, Operand, Mask);
}
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -11529,8 +14358,7 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
}
-// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
-//
+// Check whether an OR'd tree is PTEST-able.
static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
@@ -11938,6 +14766,66 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
+/// The minimum architected relative accuracy is 2^-12. We need one
+/// Newton-Raphson step to have a good float result (24 bits of precision).
+SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const {
+ // FIXME: We should use instruction latency models to calculate the cost of
+ // each potential sequence, but this is very hard to do reliably because
+ // at least Intel's Core* chips have variable timing based on the number of
+ // significant digits in the divisor and/or sqrt operand.
+ if (!Subtarget->useSqrtEst())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ // SSE1 has rsqrtss and rsqrtps.
+ // TODO: Add support for AVX512 (v16f32).
+ // It is likely not profitable to do this for f64 because a double-precision
+ // rsqrt estimate with refinement on x86 prior to FMA requires at least 16
+ // instructions: convert to single, rsqrtss, convert back to double, refine
+ // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
+ // along with FMA, this could be a throughput win.
+ if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+ RefinementSteps = 1;
+ UseOneConstNR = false;
+ return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
+}
+
+/// The minimum architected relative accuracy is 2^-12. We need one
+/// Newton-Raphson step to have a good float result (24 bits of precision).
+SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
+ DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const {
+ // FIXME: We should use instruction latency models to calculate the cost of
+ // each potential sequence, but this is very hard to do reliably because
+ // at least Intel's Core* chips have variable timing based on the number of
+ // significant digits in the divisor.
+ if (!Subtarget->useReciprocalEst())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
+ // TODO: Add support for AVX512 (v16f32).
+ // It is likely not profitable to do this for f64 because a double-precision
+ // reciprocal estimate with refinement on x86 prior to FMA requires
+ // 15 instructions: convert to single, rcpss, convert back to double, refine
+ // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
+ // along with FMA, this could be a throughput win.
+ if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+ RefinementSteps = ReciprocalEstimateRefinementSteps;
+ return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
+}
+
static bool isAllOnes(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
return C && C->isAllOnesValue();
@@ -12097,7 +14985,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
+ assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
Op.getValueType().getScalarType() == MVT::i1 &&
"Cannot set masked compare for this operation");
@@ -12211,11 +15099,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
EVT OpVT = Op1.getValueType();
if (Subtarget->hasAVX512()) {
if (Op1.getValueType().is512BitVector() ||
+ (Subtarget->hasBWI() && Subtarget->hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
// In AVX-512 architecture setcc returns mask with i1 elements,
- // But there is no compare instruction for i8 and i16 elements.
+ // But there is no compare instruction for i8 and i16 elements in KNL.
// We are not talking about 512-bit operands in this case, these
// types are illegal.
if (MaskResult &&
@@ -12721,18 +15610,40 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
-static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
+ MVT VTElt = VT.getVectorElementType();
+ MVT InVTElt = InVT.getVectorElementType();
SDLoc dl(Op);
+ // SKX processor
+ if ((InVTElt == MVT::i1) &&
+ (((Subtarget->hasBWI() && Subtarget->hasVLX() &&
+ VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
+
+ ((Subtarget->hasBWI() && VT.is512BitVector() &&
+ VTElt.getSizeInBits() <= 16)) ||
+
+ ((Subtarget->hasDQI() && Subtarget->hasVLX() &&
+ VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
+
+ ((Subtarget->hasDQI() && VT.is512BitVector() &&
+ VTElt.getSizeInBits() >= 32))))
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
unsigned int NumElts = VT.getVectorNumElements();
+
if (NumElts != 8 && NumElts != 16)
return SDValue();
- if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
+ if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
+ return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+ }
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
@@ -12760,7 +15671,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDLoc dl(Op);
if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
- return LowerSIGN_EXTEND_AVX512(Op, DAG);
+ return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
(VT != MVT::v8i32 || InVT != MVT::v8i16) &&
@@ -12803,6 +15714,210 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
+// Lower vector extended loads using a shuffle. If SSSE3 is not available we
+// may emit an illegal shuffle but the expansion is still better than scalar
+// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
+// we'll emit a shuffle and a arithmetic shift.
+// TODO: It is possible to support ZExt by zeroing the undef values during
+// the shuffle phase or after the shuffle.
+static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT RegVT = Op.getSimpleValueType();
+ assert(RegVT.isVector() && "We only custom lower vector sext loads.");
+ assert(RegVT.isInteger() &&
+ "We only custom lower integer vector sext loads.");
+
+ // Nothing useful we can do without SSE2 shuffles.
+ assert(Subtarget->hasSSE2() && "We only custom lower sext loads with SSE2.");
+
+ LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
+ SDLoc dl(Ld);
+ EVT MemVT = Ld->getMemoryVT();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned RegSz = RegVT.getSizeInBits();
+
+ ISD::LoadExtType Ext = Ld->getExtensionType();
+
+ assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)
+ && "Only anyext and sext are currently implemented.");
+ assert(MemVT != RegVT && "Cannot extend to the same type");
+ assert(MemVT.isVector() && "Must load a vector from memory");
+
+ unsigned NumElems = RegVT.getVectorNumElements();
+ unsigned MemSz = MemVT.getSizeInBits();
+ assert(RegSz > MemSz && "Register size must be greater than the mem size");
+
+ if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) {
+ // The only way in which we have a legal 256-bit vector result but not the
+ // integer 256-bit operations needed to directly lower a sextload is if we
+ // have AVX1 but not AVX2. In that case, we can always emit a sextload to
+ // a 128-bit vector and a normal sign_extend to 256-bits that should get
+ // correctly legalized. We do this late to allow the canonical form of
+ // sextload to persist throughout the rest of the DAG combiner -- it wants
+ // to fold together any extensions it can, and so will fuse a sign_extend
+ // of an sextload into a sextload targeting a wider value.
+ SDValue Load;
+ if (MemSz == 128) {
+ // Just switch this to a normal load.
+ assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, "
+ "it must be a legal 128-bit vector "
+ "type!");
+ Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment());
+ } else {
+ assert(MemSz < 128 &&
+ "Can't extend a type wider than 128 bits to a 256 bit vector!");
+ // Do an sext load to a 128-bit vector type. We want to use the same
+ // number of elements, but elements half as wide. This will end up being
+ // recursively lowered by this routine, but will succeed as we definitely
+ // have all the necessary features if we're using AVX1.
+ EVT HalfEltVT =
+ EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2);
+ EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems);
+ Load =
+ DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), MemVT, Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Ld->getAlignment());
+ }
+
+ // Replace chain users with the new chain.
+ assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getSExtOrTrunc(Load, dl, RegVT);
+ }
+
+ // All sizes must be a power of two.
+ assert(isPowerOf2_32(RegSz * MemSz * NumElems) &&
+ "Non-power-of-two elements are not custom lowered!");
+
+ // Attempt to load the original value using scalar loads.
+ // Find the largest scalar type that divides the total loaded size.
+ MVT SclrLoadTy = MVT::i8;
+ for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+ tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+ MVT Tp = (MVT::SimpleValueType)tp;
+ if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
+ SclrLoadTy = Tp;
+ }
+ }
+
+ // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
+ if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
+ (64 <= MemSz))
+ SclrLoadTy = MVT::f64;
+
+ // Calculate the number of scalar loads that we need to perform
+ // in order to load our vector from memory.
+ unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+
+ assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
+ "Can only lower sext loads with a single scalar load!");
+
+ unsigned loadRegZize = RegSz;
+ if (Ext == ISD::SEXTLOAD && RegSz == 256)
+ loadRegZize /= 2;
+
+ // Represent our vector as a sequence of elements which are the
+ // largest scalar that we can load.
+ EVT LoadUnitVecVT = EVT::getVectorVT(
+ *DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());
+
+ // Represent the data using the same element type that is stored in
+ // memory. In practice, we ''widen'' MemVT.
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ loadRegZize / MemVT.getScalarType().getSizeInBits());
+
+ assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
+ "Invalid vector type");
+
+ // We can't shuffle using an illegal type.
+ assert(TLI.isTypeLegal(WideVecVT) &&
+ "We only lower types that form legal widened vector types");
+
+ SmallVector<SDValue, 8> Chains;
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment =
+ DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, TLI.getPointerTy());
+ SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
+
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ // Perform a single load.
+ SDValue ScalarLoad =
+ DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
+ Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(),
+ Ld->getAlignment());
+ Chains.push_back(ScalarLoad.getValue(1));
+ // Create the first element type using SCALAR_TO_VECTOR in order to avoid
+ // another round of DAGCombining.
+ if (i == 0)
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
+ else
+ Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
+ ScalarLoad, DAG.getIntPtrConstant(i));
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+
+ // Bitcast the loaded value to a vector of the original element type, in
+ // the size of the target vector type.
+ SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
+ unsigned SizeRatio = RegSz / MemSz;
+
+ if (Ext == ISD::SEXTLOAD) {
+ // If we have SSE4.1, we can directly emit a VSEXT node.
+ if (Subtarget->hasSSE41()) {
+ SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
+ return Sext;
+ }
+
+ // Otherwise we'll shuffle the small elements in the high bits of the
+ // larger type and perform an arithmetic shift. If the shift is not legal
+ // it's better to scalarize.
+ assert(TLI.isOperationLegalOrCustom(ISD::SRA, RegVT) &&
+ "We can't implement a sext load without an arithmetic right shift!");
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i * SizeRatio + SizeRatio - 1] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(
+ WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
+
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+
+ // Build the arithmetic shift.
+ unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
+ MemVT.getVectorElementType().getSizeInBits();
+ Shuff =
+ DAG.getNode(ISD::SRA, dl, RegVT, Shuff, DAG.getConstant(Amt, RegVT));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
+ return Shuff;
+ }
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i * SizeRatio] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
+
+ // Bitcast to the requested type.
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
+ return Shuff;
+}
+
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
// from the AND / OR.
@@ -13108,7 +16223,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
-// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// Calls to _alloca are needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
@@ -13143,7 +16258,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- const TargetFrameLowering &TFI = *DAG.getTarget().getFrameLowering();
+ const TargetFrameLowering &TFI = *DAG.getSubtarget().getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
@@ -13166,7 +16281,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
- EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
+ EVT SPTy = getPointerTy();
if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -13184,7 +16299,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
const TargetRegisterClass *AddrRegClass =
- getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+ getRegClassFor(getPointerTy());
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@@ -13193,7 +16308,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops1, dl);
} else {
SDValue Flag;
- unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+ const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
@@ -13201,8 +16316,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
@@ -13475,112 +16590,178 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
-static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
- SDLoc dl(Op);
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- switch (IntNo) {
- default: return SDValue(); // Don't custom lower most intrinsics.
- // Comparison intrinsics.
- case Intrinsic::x86_sse_comieq_ss:
- case Intrinsic::x86_sse_comilt_ss:
- case Intrinsic::x86_sse_comile_ss:
- case Intrinsic::x86_sse_comigt_ss:
- case Intrinsic::x86_sse_comige_ss:
- case Intrinsic::x86_sse_comineq_ss:
- case Intrinsic::x86_sse_ucomieq_ss:
- case Intrinsic::x86_sse_ucomilt_ss:
- case Intrinsic::x86_sse_ucomile_ss:
- case Intrinsic::x86_sse_ucomigt_ss:
- case Intrinsic::x86_sse_ucomige_ss:
- case Intrinsic::x86_sse_ucomineq_ss:
- case Intrinsic::x86_sse2_comieq_sd:
- case Intrinsic::x86_sse2_comilt_sd:
- case Intrinsic::x86_sse2_comile_sd:
- case Intrinsic::x86_sse2_comigt_sd:
- case Intrinsic::x86_sse2_comige_sd:
- case Intrinsic::x86_sse2_comineq_sd:
- case Intrinsic::x86_sse2_ucomieq_sd:
- case Intrinsic::x86_sse2_ucomilt_sd:
- case Intrinsic::x86_sse2_ucomile_sd:
- case Intrinsic::x86_sse2_ucomigt_sd:
- case Intrinsic::x86_sse2_ucomige_sd:
- case Intrinsic::x86_sse2_ucomineq_sd: {
- unsigned Opc;
- ISD::CondCode CC;
+/// \brief Return (and \p Op, \p Mask) for compare instructions or
+/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
+/// necessary casting for \p Mask when lowering masking intrinsics.
+static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
+ SDValue PreservedSrc,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MVT::i1, VT.getVectorNumElements());
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDLoc dl(Op);
+
+ assert(MaskVT.isSimple() && "invalid mask type");
+
+ if (isAllOnes(Mask))
+ return Op;
+
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
+ DAG.getIntPtrConstant(0));
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case X86ISD::PCMPEQM:
+ case X86ISD::PCMPGTM:
+ case X86ISD::CMPM:
+ case X86ISD::CMPMU:
+ return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
+ }
+ if (PreservedSrc.getOpcode() == ISD::UNDEF)
+ PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
+}
+
+static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse_comieq_ss:
- case Intrinsic::x86_sse2_comieq_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETEQ;
- break;
- case Intrinsic::x86_sse_comilt_ss:
- case Intrinsic::x86_sse2_comilt_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETLT;
- break;
- case Intrinsic::x86_sse_comile_ss:
- case Intrinsic::x86_sse2_comile_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETLE;
- break;
- case Intrinsic::x86_sse_comigt_ss:
- case Intrinsic::x86_sse2_comigt_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETGT;
- break;
- case Intrinsic::x86_sse_comige_ss:
- case Intrinsic::x86_sse2_comige_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETGE;
- break;
- case Intrinsic::x86_sse_comineq_ss:
- case Intrinsic::x86_sse2_comineq_sd:
- Opc = X86ISD::COMI;
- CC = ISD::SETNE;
- break;
- case Intrinsic::x86_sse_ucomieq_ss:
- case Intrinsic::x86_sse2_ucomieq_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETEQ;
- break;
- case Intrinsic::x86_sse_ucomilt_ss:
- case Intrinsic::x86_sse2_ucomilt_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETLT;
- break;
- case Intrinsic::x86_sse_ucomile_ss:
- case Intrinsic::x86_sse2_ucomile_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETLE;
- break;
- case Intrinsic::x86_sse_ucomigt_ss:
- case Intrinsic::x86_sse2_ucomigt_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETGT;
- break;
- case Intrinsic::x86_sse_ucomige_ss:
- case Intrinsic::x86_sse2_ucomige_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETGE;
- break;
- case Intrinsic::x86_sse_ucomineq_ss:
- case Intrinsic::x86_sse2_ucomineq_sd:
- Opc = X86ISD::UCOMI;
- CC = ISD::SETNE;
- break;
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_mask_vfmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmadd_pd_512:
+ return X86ISD::FMADD;
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_mask_vfmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsub_pd_512:
+ return X86ISD::FMSUB;
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
+ return X86ISD::FNMADD;
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
+ return X86ISD::FNMSUB;
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
+ return X86ISD::FMADDSUB;
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsubadd_pd_512:
+ return X86ISD::FMSUBADD;
}
+}
- SDValue LHS = Op.getOperand(1);
- SDValue RHS = Op.getOperand(2);
- unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
- assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
- SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
+ if (IntrData) {
+ switch(IntrData->Type) {
+ case INTR_TYPE_1OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
+ case INTR_TYPE_2OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ case INTR_TYPE_3OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ case INTR_TYPE_1OP_MASK_RM: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Src0 = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue RoundingMode = Op.getOperand(4);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
+ RoundingMode),
+ Mask, Src0, Subtarget, DAG);
+ }
+
+ case CMP_MASK:
+ case CMP_MASK_CC: {
+ // Comparison intrinsics with masks.
+ // Example of transformation:
+ // (i8 (int_x86_avx512_mask_pcmpeq_q_128
+ // (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
+ // (i8 (bitcast
+ // (v8i1 (insert_subvector undef,
+ // (v2i1 (and (PCMPEQM %a, %b),
+ // (extract_subvector
+ // (v8i1 (bitcast %mask)), 0))), 0))))
+ EVT VT = Op.getOperand(1).getValueType();
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements());
+ SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Mask.getValueType().getSizeInBits());
+ SDValue Cmp;
+ if (IntrData->Type == CMP_MASK_CC) {
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ } else {
+ assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2));
+ }
+ SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
+ DAG.getTargetConstant(0, MaskVT),
+ Subtarget, DAG);
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+ DAG.getUNDEF(BitcastVT), CmpMask,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+ case COMI: { // Comparison intrinsics
+ ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+ assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
+ SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+ case VSHIFT:
+ return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
+ case VSHIFT_MASK:
+ return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG),
+ Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);;
+ default:
+ break;
+ }
}
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+
// Arithmetic intrinsics.
case Intrinsic::x86_sse2_pmulu_dq:
case Intrinsic::x86_avx2_pmulu_dq:
@@ -13602,128 +16783,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MULHS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
- // SSE2/AVX2 sub with unsigned saturation intrinsics
- case Intrinsic::x86_sse2_psubus_b:
- case Intrinsic::x86_sse2_psubus_w:
- case Intrinsic::x86_avx2_psubus_b:
- case Intrinsic::x86_avx2_psubus_w:
- return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
-
- // SSE3/AVX horizontal add/sub intrinsics
- case Intrinsic::x86_sse3_hadd_ps:
- case Intrinsic::x86_sse3_hadd_pd:
- case Intrinsic::x86_avx_hadd_ps_256:
- case Intrinsic::x86_avx_hadd_pd_256:
- case Intrinsic::x86_sse3_hsub_ps:
- case Intrinsic::x86_sse3_hsub_pd:
- case Intrinsic::x86_avx_hsub_ps_256:
- case Intrinsic::x86_avx_hsub_pd_256:
- case Intrinsic::x86_ssse3_phadd_w_128:
- case Intrinsic::x86_ssse3_phadd_d_128:
- case Intrinsic::x86_avx2_phadd_w:
- case Intrinsic::x86_avx2_phadd_d:
- case Intrinsic::x86_ssse3_phsub_w_128:
- case Intrinsic::x86_ssse3_phsub_d_128:
- case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse3_hadd_ps:
- case Intrinsic::x86_sse3_hadd_pd:
- case Intrinsic::x86_avx_hadd_ps_256:
- case Intrinsic::x86_avx_hadd_pd_256:
- Opcode = X86ISD::FHADD;
- break;
- case Intrinsic::x86_sse3_hsub_ps:
- case Intrinsic::x86_sse3_hsub_pd:
- case Intrinsic::x86_avx_hsub_ps_256:
- case Intrinsic::x86_avx_hsub_pd_256:
- Opcode = X86ISD::FHSUB;
- break;
- case Intrinsic::x86_ssse3_phadd_w_128:
- case Intrinsic::x86_ssse3_phadd_d_128:
- case Intrinsic::x86_avx2_phadd_w:
- case Intrinsic::x86_avx2_phadd_d:
- Opcode = X86ISD::HADD;
- break;
- case Intrinsic::x86_ssse3_phsub_w_128:
- case Intrinsic::x86_ssse3_phsub_d_128:
- case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d:
- Opcode = X86ISD::HSUB;
- break;
- }
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
- // SSE2/SSE41/AVX2 integer max/min intrinsics.
- case Intrinsic::x86_sse2_pmaxu_b:
- case Intrinsic::x86_sse41_pmaxuw:
- case Intrinsic::x86_sse41_pmaxud:
- case Intrinsic::x86_avx2_pmaxu_b:
- case Intrinsic::x86_avx2_pmaxu_w:
- case Intrinsic::x86_avx2_pmaxu_d:
- case Intrinsic::x86_sse2_pminu_b:
- case Intrinsic::x86_sse41_pminuw:
- case Intrinsic::x86_sse41_pminud:
- case Intrinsic::x86_avx2_pminu_b:
- case Intrinsic::x86_avx2_pminu_w:
- case Intrinsic::x86_avx2_pminu_d:
- case Intrinsic::x86_sse41_pmaxsb:
- case Intrinsic::x86_sse2_pmaxs_w:
- case Intrinsic::x86_sse41_pmaxsd:
- case Intrinsic::x86_avx2_pmaxs_b:
- case Intrinsic::x86_avx2_pmaxs_w:
- case Intrinsic::x86_avx2_pmaxs_d:
- case Intrinsic::x86_sse41_pminsb:
- case Intrinsic::x86_sse2_pmins_w:
- case Intrinsic::x86_sse41_pminsd:
- case Intrinsic::x86_avx2_pmins_b:
- case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse2_pmaxu_b:
- case Intrinsic::x86_sse41_pmaxuw:
- case Intrinsic::x86_sse41_pmaxud:
- case Intrinsic::x86_avx2_pmaxu_b:
- case Intrinsic::x86_avx2_pmaxu_w:
- case Intrinsic::x86_avx2_pmaxu_d:
- Opcode = X86ISD::UMAX;
- break;
- case Intrinsic::x86_sse2_pminu_b:
- case Intrinsic::x86_sse41_pminuw:
- case Intrinsic::x86_sse41_pminud:
- case Intrinsic::x86_avx2_pminu_b:
- case Intrinsic::x86_avx2_pminu_w:
- case Intrinsic::x86_avx2_pminu_d:
- Opcode = X86ISD::UMIN;
- break;
- case Intrinsic::x86_sse41_pmaxsb:
- case Intrinsic::x86_sse2_pmaxs_w:
- case Intrinsic::x86_sse41_pmaxsd:
- case Intrinsic::x86_avx2_pmaxs_b:
- case Intrinsic::x86_avx2_pmaxs_w:
- case Intrinsic::x86_avx2_pmaxs_d:
- Opcode = X86ISD::SMAX;
- break;
- case Intrinsic::x86_sse41_pminsb:
- case Intrinsic::x86_sse2_pmins_w:
- case Intrinsic::x86_sse41_pminsd:
- case Intrinsic::x86_avx2_pmins_b:
- case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d:
- Opcode = X86ISD::SMIN;
- break;
- }
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
// SSE/SSE2/AVX floating point max/min intrinsics.
case Intrinsic::x86_sse_max_ps:
case Intrinsic::x86_sse2_max_pd:
@@ -13828,17 +16887,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
- case Intrinsic::x86_sse41_insertps:
- return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case Intrinsic::x86_avx_vperm2f128_ps_256:
- case Intrinsic::x86_avx_vperm2f128_pd_256:
- case Intrinsic::x86_avx_vperm2f128_si_256:
- case Intrinsic::x86_avx2_vperm2i128:
- return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
@@ -13846,11 +16894,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
- case Intrinsic::x86_sse_sqrt_ps:
- case Intrinsic::x86_sse2_sqrt_pd:
- case Intrinsic::x86_avx_sqrt_ps_256:
- case Intrinsic::x86_avx_sqrt_pd_256:
- return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
+ case Intrinsic::x86_avx512_mask_valign_q_512:
+ case Intrinsic::x86_avx512_mask_valign_d_512:
+ // Vector source operands are swapped.
+ return getVectorMaskingNode(DAG.getNode(X86ISD::VALIGN, dl,
+ Op.getValueType(), Op.getOperand(2),
+ Op.getOperand(1),
+ Op.getOperand(3)),
+ Op.getOperand(5), Op.getOperand(4),
+ Subtarget, DAG);
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
@@ -13928,100 +16980,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // SSE/AVX shift intrinsics
- case Intrinsic::x86_sse2_psll_w:
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse2_psll_w:
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- Opcode = X86ISD::VSHL;
- break;
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- Opcode = X86ISD::VSRL;
- break;
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- Opcode = X86ISD::VSRA;
- break;
- }
- return DAG.getNode(Opcode, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
-
- // SSE/AVX immediate shift intrinsics
- case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_sse2_pslli_d:
- case Intrinsic::x86_sse2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d: {
- unsigned Opcode;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_sse2_pslli_d:
- case Intrinsic::x86_sse2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- Opcode = X86ISD::VSHLI;
- break;
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- Opcode = X86ISD::VSRLI;
- break;
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- Opcode = X86ISD::VSRAI;
- break;
- }
- return getTargetVShiftNode(Opcode, dl, Op.getSimpleValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
- }
-
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
@@ -14098,6 +17056,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps);
}
+
+ case Intrinsic::x86_fma_mask_vfmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmadd_pd_512:
+ case Intrinsic::x86_fma_mask_vfmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsub_pd_512:
+ case Intrinsic::x86_fma_mask_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmadd_pd_512:
+ case Intrinsic::x86_fma_mask_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfnmsub_pd_512:
+ case Intrinsic::x86_fma_mask_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_mask_vfmaddsub_pd_512:
+ case Intrinsic::x86_fma_mask_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_mask_vfmsubadd_pd_512: {
+ auto *SAE = cast<ConstantSDNode>(Op.getOperand(5));
+ if (SAE->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION)
+ return getVectorMaskingNode(DAG.getNode(getOpcodeForFMAIntrinsic(IntNo),
+ dl, Op.getValueType(),
+ Op.getOperand(1),
+ Op.getOperand(2),
+ Op.getOperand(3)),
+ Op.getOperand(4), Op.getOperand(1),
+ Subtarget, DAG);
+ else
+ return SDValue();
+ }
+
case Intrinsic::x86_fma_vfmadd_ps:
case Intrinsic::x86_fma_vfmadd_pd:
case Intrinsic::x86_fma_vfmsub_ps:
@@ -14122,74 +17106,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_fma_vfmaddsub_pd_256:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
case Intrinsic::x86_fma_vfmsubadd_pd_256:
- case Intrinsic::x86_fma_vfmadd_ps_512:
- case Intrinsic::x86_fma_vfmadd_pd_512:
- case Intrinsic::x86_fma_vfmsub_ps_512:
- case Intrinsic::x86_fma_vfmsub_pd_512:
- case Intrinsic::x86_fma_vfnmadd_ps_512:
- case Intrinsic::x86_fma_vfnmadd_pd_512:
- case Intrinsic::x86_fma_vfnmsub_ps_512:
- case Intrinsic::x86_fma_vfnmsub_pd_512:
- case Intrinsic::x86_fma_vfmaddsub_ps_512:
- case Intrinsic::x86_fma_vfmaddsub_pd_512:
- case Intrinsic::x86_fma_vfmsubadd_ps_512:
- case Intrinsic::x86_fma_vfmsubadd_pd_512: {
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_fma_vfmadd_ps:
- case Intrinsic::x86_fma_vfmadd_pd:
- case Intrinsic::x86_fma_vfmadd_ps_256:
- case Intrinsic::x86_fma_vfmadd_pd_256:
- case Intrinsic::x86_fma_vfmadd_ps_512:
- case Intrinsic::x86_fma_vfmadd_pd_512:
- Opc = X86ISD::FMADD;
- break;
- case Intrinsic::x86_fma_vfmsub_ps:
- case Intrinsic::x86_fma_vfmsub_pd:
- case Intrinsic::x86_fma_vfmsub_ps_256:
- case Intrinsic::x86_fma_vfmsub_pd_256:
- case Intrinsic::x86_fma_vfmsub_ps_512:
- case Intrinsic::x86_fma_vfmsub_pd_512:
- Opc = X86ISD::FMSUB;
- break;
- case Intrinsic::x86_fma_vfnmadd_ps:
- case Intrinsic::x86_fma_vfnmadd_pd:
- case Intrinsic::x86_fma_vfnmadd_ps_256:
- case Intrinsic::x86_fma_vfnmadd_pd_256:
- case Intrinsic::x86_fma_vfnmadd_ps_512:
- case Intrinsic::x86_fma_vfnmadd_pd_512:
- Opc = X86ISD::FNMADD;
- break;
- case Intrinsic::x86_fma_vfnmsub_ps:
- case Intrinsic::x86_fma_vfnmsub_pd:
- case Intrinsic::x86_fma_vfnmsub_ps_256:
- case Intrinsic::x86_fma_vfnmsub_pd_256:
- case Intrinsic::x86_fma_vfnmsub_ps_512:
- case Intrinsic::x86_fma_vfnmsub_pd_512:
- Opc = X86ISD::FNMSUB;
- break;
- case Intrinsic::x86_fma_vfmaddsub_ps:
- case Intrinsic::x86_fma_vfmaddsub_pd:
- case Intrinsic::x86_fma_vfmaddsub_ps_256:
- case Intrinsic::x86_fma_vfmaddsub_pd_256:
- case Intrinsic::x86_fma_vfmaddsub_ps_512:
- case Intrinsic::x86_fma_vfmaddsub_pd_512:
- Opc = X86ISD::FMADDSUB;
- break;
- case Intrinsic::x86_fma_vfmsubadd_ps:
- case Intrinsic::x86_fma_vfmsubadd_pd:
- case Intrinsic::x86_fma_vfmsubadd_ps_256:
- case Intrinsic::x86_fma_vfmsubadd_pd_256:
- case Intrinsic::x86_fma_vfmsubadd_ps_512:
- case Intrinsic::x86_fma_vfmsubadd_pd_512:
- Opc = X86ISD::FMSUBADD;
- break;
- }
-
- return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
- }
+ return DAG.getNode(getOpcodeForFMAIntrinsic(IntNo), dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
}
}
@@ -14374,122 +17292,25 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, DL);
}
-enum IntrinsicType {
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST
-};
-
-struct IntrinsicData {
- IntrinsicData(IntrinsicType IType, unsigned IOpc0, unsigned IOpc1)
- :Type(IType), Opc0(IOpc0), Opc1(IOpc1) {}
- IntrinsicType Type;
- unsigned Opc0;
- unsigned Opc1;
-};
-
-std::map < unsigned, IntrinsicData> IntrMap;
-static void InitIntinsicsMap() {
- static bool Initialized = false;
- if (Initialized)
- return;
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
- IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
- IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpd_512,
- IntrinsicData(GATHER, X86::VGATHERQPDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpd_512,
- IntrinsicData(GATHER, X86::VGATHERDPDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dps_512,
- IntrinsicData(GATHER, X86::VGATHERDPSZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpi_512,
- IntrinsicData(GATHER, X86::VPGATHERQDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpq_512,
- IntrinsicData(GATHER, X86::VPGATHERQQZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpi_512,
- IntrinsicData(GATHER, X86::VPGATHERDDZrm, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpq_512,
- IntrinsicData(GATHER, X86::VPGATHERDQZrm, 0)));
-
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qps_512,
- IntrinsicData(SCATTER, X86::VSCATTERQPSZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpd_512,
- IntrinsicData(SCATTER, X86::VSCATTERQPDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpd_512,
- IntrinsicData(SCATTER, X86::VSCATTERDPDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dps_512,
- IntrinsicData(SCATTER, X86::VSCATTERDPSZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpi_512,
- IntrinsicData(SCATTER, X86::VPSCATTERQDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpq_512,
- IntrinsicData(SCATTER, X86::VPSCATTERQQZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpi_512,
- IntrinsicData(SCATTER, X86::VPSCATTERDDZmr, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpq_512,
- IntrinsicData(SCATTER, X86::VPSCATTERDQZmr, 0)));
-
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qps_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0QPSm,
- X86::VGATHERPF1QPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qpd_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0QPDm,
- X86::VGATHERPF1QPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dpd_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0DPDm,
- X86::VGATHERPF1DPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dps_512,
- IntrinsicData(PREFETCH, X86::VGATHERPF0DPSm,
- X86::VGATHERPF1DPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qps_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0QPSm,
- X86::VSCATTERPF1QPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qpd_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0QPDm,
- X86::VSCATTERPF1QPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dpd_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0DPDm,
- X86::VSCATTERPF1DPDm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dps_512,
- IntrinsicData(PREFETCH, X86::VSCATTERPF0DPSm,
- X86::VSCATTERPF1DPSm)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_16,
- IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_32,
- IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_64,
- IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_16,
- IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_32,
- IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_64,
- IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_xtest,
- IntrinsicData(XTEST, X86ISD::XTEST, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdtsc,
- IntrinsicData(RDTSC, X86ISD::RDTSC_DAG, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp,
- IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0)));
- IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc,
- IntrinsicData(RDPMC, X86ISD::RDPMC_DAG, 0)));
- Initialized = true;
-}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- InitIntinsicsMap();
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- std::map < unsigned, IntrinsicData>::const_iterator itr = IntrMap.find(IntNo);
- if (itr == IntrMap.end())
+
+ const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
+ if (!IntrData)
return SDValue();
SDLoc dl(Op);
- IntrinsicData Intr = itr->second;
- switch(Intr.Type) {
+ switch(IntrData->Type) {
+ default:
+ llvm_unreachable("Unknown Intrinsic Type");
+ break;
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
- SDValue Result = DAG.getNode(Intr.Opc0, dl, VTs, Op.getOperand(0));
+ SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
// Otherwise return the value from Rand, which is always 0, casted to i32.
@@ -14513,7 +17334,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Index = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getGatherNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
Subtarget);
}
case SCATTER: {
@@ -14524,7 +17345,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Index = Op.getOperand(4);
SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getScatterNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+ return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
}
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
@@ -14532,7 +17353,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
(HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
- unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0);
+ unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
@@ -14543,7 +17364,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
// Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
case RDTSC: {
SmallVector<SDValue, 2> Results;
- getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results);
+ getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
// Read Performance Monitoring Counters.
@@ -14555,7 +17376,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
// XTEST intrinsics.
case XTEST: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
- SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
+ SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86::COND_NE, MVT::i8),
InTrans);
@@ -14563,8 +17384,26 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
+ // ADC/ADCX/SBB
+ case ADX: {
+ SmallVector<SDValue, 2> Results;
+ SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other);
+ SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
+ DAG.getConstant(-1, MVT::i8));
+ SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
+ Op.getOperand(4), GenCF.getValue(1));
+ SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
+ Op.getOperand(5), MachinePointerInfo(),
+ false, false, 0);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_B, MVT::i8),
+ Res.getValue(1));
+ Results.push_back(SetCC);
+ Results.push_back(Store);
+ return DAG.getMergeValues(Results, dl);
+ }
}
- llvm_unreachable("Unknown Intrinsic Type");
}
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
@@ -14581,8 +17420,8 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT,
@@ -14603,8 +17442,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
@@ -14632,8 +17471,8 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName,
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
@@ -14644,8 +17483,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl (Op);
EVT PtrVT = getPointerTy();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
@@ -14692,7 +17531,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- const TargetRegisterInfo* TRI = DAG.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
@@ -14856,7 +17695,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
@@ -15156,10 +17995,23 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
(VT == MVT::v8i32 && Subtarget->hasInt256()));
- // Get the high parts.
+ // PMULxD operations multiply each even value (starting at 0) of LHS with
+ // the related value of RHS and produce a widen result.
+ // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
+ //
+ // In other word, to have all the results, we need to perform two PMULxD:
+ // 1. one with the even values.
+ // 2. one with the odd values.
+ // To achieve #2, with need to place the odd values at an even position.
+ //
+ // Place the odd value at an even position (basically, shift all values 1
+ // step to the left):
const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
- SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
- SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
+ // <a|b|c|d> => <b|undef|d|undef>
+ SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
+ // <e|f|g|h> => <f|undef|h|undef>
+ SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
// Emit two multiplies, one for the lower 2 ints and one for the higher 2
// ints.
@@ -15167,10 +18019,14 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
unsigned Opcode =
(!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
+ // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
+ // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
+ // => <2 x i64> <bf|dh>
SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1));
+ DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
// Shuffle it back into the right order.
SDValue Highs, Lows;
@@ -15200,7 +18056,10 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
}
- return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows);
+ // The first result of MUL_LOHI is actually the low value, followed by the
+ // high value.
+ SDValue Ops[] = {Lows, Highs};
+ return DAG.getMergeValues(Ops, dl);
}
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
@@ -15811,10 +18670,15 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
Cond = X86::COND_B;
break;
case ISD::SMULO:
- BaseOp = X86ISD::SMUL;
+ BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
Cond = X86::COND_O;
break;
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ if (N->getValueType(0) == MVT::i8) {
+ BaseOp = X86ISD::UMUL8;
+ Cond = X86::COND_O;
+ break;
+ }
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
MVT::i32);
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
@@ -15840,6 +18704,11 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
+// Sign extension of the low part of vector elements. This may be used either
+// when sign extend instructions are not available or if the vector element
+// sizes already match the sign-extended size. If the vector elements are in
+// their pre-extended size and sign extend instructions are available, that will
+// be handled by LowerSIGN_EXTEND.
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -15885,37 +18754,151 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
case MVT::v4i32:
case MVT::v8i16: {
SDValue Op0 = Op.getOperand(0);
- SDValue Op00 = Op0.getOperand(0);
- SDValue Tmp1;
- // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
- if (Op0.getOpcode() == ISD::BITCAST &&
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
- // (sext (vzext x)) -> (vsext x)
- Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
- if (Tmp1.getNode()) {
- EVT ExtraEltVT = ExtraVT.getVectorElementType();
- // This folding is only valid when the in-reg type is a vector of i8,
- // i16, or i32.
- if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
- ExtraEltVT == MVT::i32) {
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
- "This optimization is invalid without a VZEXT.");
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
- }
- Op0 = Tmp1;
- }
- }
- // If the above didn't work, then just use Shift-Left + Shift-Right.
- Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff,
- DAG);
- return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff,
+ // This is a sign extension of some low part of vector elements without
+ // changing the size of the vector elements themselves:
+ // Shift-Left + Shift-Right-Algebraic.
+ SDValue Shl = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0,
+ BitsDiff, DAG);
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Shl, BitsDiff,
DAG);
}
}
}
+/// Returns true if the operand type is exactly twice the native width, and
+/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
+/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
+/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
+bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
+ const X86Subtarget &Subtarget =
+ getTargetMachine().getSubtarget<X86Subtarget>();
+ unsigned OpWidth = MemType->getPrimitiveSizeInBits();
+
+ if (OpWidth == 64)
+ return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
+ else if (OpWidth == 128)
+ return Subtarget.hasCmpxchg16b();
+ else
+ return false;
+}
+
+bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+ return needsCmpXchgNb(SI->getValueOperand()->getType());
+}
+
+// Note: this turns large loads into lock cmpxchg8b/16b.
+// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
+bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+ auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
+ return needsCmpXchgNb(PTy->getElementType());
+}
+
+bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ const X86Subtarget &Subtarget =
+ getTargetMachine().getSubtarget<X86Subtarget>();
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+ const Type *MemType = AI->getType();
+
+ // If the operand is too big, we must see if cmpxchg8/16b is available
+ // and default to library calls otherwise.
+ if (MemType->getPrimitiveSizeInBits() > NativeWidth)
+ return needsCmpXchgNb(MemType);
+
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+ switch (Op) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Xchg:
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ // It's better to use xadd, xsub or xchg for these in all cases.
+ return false;
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Xor:
+ // If the atomicrmw's result isn't actually used, we can just add a "lock"
+ // prefix to a normal instruction for these operations.
+ return !AI->use_empty();
+ case AtomicRMWInst::Nand:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // These always require a non-trivial set of data operations on x86. We must
+ // use a cmpxchg loop.
+ return true;
+ }
+}
+
+static bool hasMFENCE(const X86Subtarget& Subtarget) {
+ // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+ // no-sse2). There isn't any reason to disable it if the target processor
+ // supports it.
+ return Subtarget.hasSSE2() || Subtarget.is64Bit();
+}
+
+LoadInst *
+X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
+ const X86Subtarget &Subtarget =
+ getTargetMachine().getSubtarget<X86Subtarget>();
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+ const Type *MemType = AI->getType();
+ // Accesses larger than the native width are turned into cmpxchg/libcalls, so
+ // there is no benefit in turning such RMWs into loads, and it is actually
+ // harmful as it introduces a mfence.
+ if (MemType->getPrimitiveSizeInBits() > NativeWidth)
+ return nullptr;
+
+ auto Builder = IRBuilder<>(AI);
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ auto SynchScope = AI->getSynchScope();
+ // We must restrict the ordering to avoid generating loads with Release or
+ // ReleaseAcquire orderings.
+ auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
+ auto Ptr = AI->getPointerOperand();
+
+ // Before the load we need a fence. Here is an example lifted from
+ // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
+ // is required:
+ // Thread 0:
+ // x.store(1, relaxed);
+ // r1 = y.fetch_add(0, release);
+ // Thread 1:
+ // y.fetch_add(42, acquire);
+ // r2 = x.load(relaxed);
+ // r1 = r2 = 0 is impossible, but becomes possible if the idempotent rmw is
+ // lowered to just a load without a fence. A mfence flushes the store buffer,
+ // making the optimization clearly correct.
+ // FIXME: it is required if isAtLeastRelease(Order) but it is not clear
+ // otherwise, we might be able to be more agressive on relaxed idempotent
+ // rmw. In practice, they do not look useful, so we don't try to be
+ // especially clever.
+ if (SynchScope == SingleThread) {
+ // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at
+ // the IR level, so we must wrap it in an intrinsic.
+ return nullptr;
+ } else if (hasMFENCE(Subtarget)) {
+ Function *MFence = llvm::Intrinsic::getDeclaration(M,
+ Intrinsic::x86_sse2_mfence);
+ Builder.CreateCall(MFence);
+ } else {
+ // FIXME: it might make sense to use a locked operation here but on a
+ // different cache-line to prevent cache-line bouncing. In practice it
+ // is probably a small win, and x86 processors without mfence are rare
+ // enough that we do not bother.
+ return nullptr;
+ }
+
+ // Finally we can emit the atomic load.
+ LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
+ AI->getType()->getPrimitiveSizeInBits());
+ Loaded->setAtomic(Order, SynchScope);
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+ return Loaded;
+}
+
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -15927,10 +18910,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
// The only fence that needs an instruction is a sequentially-consistent
// cross-thread fence.
if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
- // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
- // no-sse2). There isn't any reason to disable it if the target processor
- // supports it.
- if (Subtarget->hasSSE2() || Subtarget->is64Bit())
+ if (hasMFENCE(*Subtarget))
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
@@ -16141,7 +19121,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
Type *RetTy = isF64
- ? (Type*)StructType::get(ArgTy, ArgTy, NULL)
+ ? (Type*)StructType::get(ArgTy, ArgTy, nullptr)
: (Type*)VectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -16200,8 +19180,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
- case ISD::FABS: return LowerFABS(Op, DAG);
- case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
+ case ISD::FABS:
+ case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
@@ -16211,7 +19192,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -16252,29 +19233,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
-static void ReplaceATOMIC_LOAD(SDNode *Node,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) {
- SDLoc dl(Node);
- EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
-
- // Convert wide load -> cmpxchg8b/cmpxchg16b
- // FIXME: On 32-bit, load -> fild or movq would be more efficient
- // (The only way to get a 16-byte load is cmpxchg16b)
- // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
- SDValue Zero = DAG.getConstant(0, VT);
- SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
- SDValue Swap =
- DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs,
- Node->getOperand(0), Node->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
- Results.push_back(Swap.getValue(0));
- Results.push_back(Swap.getValue(2));
-}
-
/// ReplaceNodeResults - Replace a node with an illegal result type
/// with a new node built out of custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
@@ -16433,12 +19391,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD: {
// Delegate to generic TypeLegalization. Situations we can really handle
- // should have already been dealt with by X86AtomicExpand.cpp.
+ // should have already been dealt with by AtomicExpandPass.cpp.
break;
- case ISD::ATOMIC_LOAD: {
- ReplaceATOMIC_LOAD(N, Results, DAG);
- return;
}
case ISD::BITCAST: {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
@@ -16521,8 +19477,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
- case X86ISD::BLENDV: return "X86ISD::BLENDV";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
+ case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
@@ -16578,6 +19534,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
+ case X86ISD::SMUL8: return "X86ISD::SMUL8";
+ case X86ISD::UMUL8: return "X86ISD::UMUL8";
+ case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
+ case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
@@ -16593,6 +19553,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
+ case X86ISD::VALIGN: return "X86ISD::VALIGN";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
@@ -16612,7 +19573,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
- case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
+ case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
@@ -16848,8 +19809,11 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
+ isMOVHLPSMask(M, SVT) ||
isSHUFPMask(M, SVT) ||
+ isSHUFPMask(M, SVT, /* Commuted */ true) ||
isPSHUFDMask(M, SVT) ||
+ isPSHUFDMask(M, SVT, /* SecondOperand */ true) ||
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
isPALIGNRMask(M, SVT, Subtarget) ||
@@ -16857,7 +19821,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
- isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
+ isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()) ||
+ (Subtarget->hasSSE41() && isINSERTPSMask(M, SVT)));
}
bool
@@ -16875,7 +19840,9 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
return (isMOVLMask(Mask, SVT) ||
isCommutedMOVLMask(Mask, SVT, true) ||
isSHUFPMask(Mask, SVT) ||
- isSHUFPMask(Mask, SVT, /* Commuted */ true));
+ isSHUFPMask(Mask, SVT, /* Commuted */ true) ||
+ isBlendMask(Mask, SVT, Subtarget->hasSSE41(),
+ Subtarget->hasInt256()));
}
return false;
}
@@ -17073,7 +20040,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
// Machine Information
- const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
@@ -17329,7 +20296,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
- const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
@@ -17412,7 +20379,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = BB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -17438,7 +20405,8 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- const TargetRegisterInfo* TRI = BB->getParent()->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI =
+ BB->getParent()->getSubtarget().getRegisterInfo();
if (!MI->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
@@ -17477,17 +20445,20 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
- bool Is64Bit) const {
+X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
- unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
- unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
+ const bool Is64Bit = Subtarget->is64Bit();
+ const bool IsLP64 = Subtarget->isTarget64BitLP64();
+
+ const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
+ const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
// BB:
// ... [Till the alloca]
@@ -17511,14 +20482,14 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *AddrRegClass =
- getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
+ getRegClassFor(getPointerTy());
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
sizeVReg = MI->getOperand(1).getReg(),
- physSPReg = Is64Bit ? X86::RSP : X86::ESP;
+ physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
MachineFunction::iterator MBBIter = BB;
++MBBIter;
@@ -17534,9 +20505,9 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
- BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
+ BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
- BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
+ BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
@@ -17550,9 +20521,11 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
- const uint32_t *RegMask =
- MF->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
- if (Is64Bit) {
+ const uint32_t *RegMask = MF->getTarget()
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getCallPreservedMask(CallingConv::C);
+ if (IsLP64) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
@@ -17560,6 +20533,14 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
.addRegMask(RegMask)
.addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
+ } else if (Is64Bit) {
+ BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
+ .addReg(sizeVReg);
+ BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ .addExternalSymbol("__morestack_allocate_stack_space")
+ .addRegMask(RegMask)
+ .addReg(X86::EDI, RegState::Implicit)
+ .addReg(X86::EAX, RegState::ImplicitDefine);
} else {
BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
@@ -17575,7 +20556,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
.addImm(16);
BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
- .addReg(Is64Bit ? X86::RAX : X86::EAX);
+ .addReg(IsLP64 ? X86::RAX : X86::EAX);
BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
// Set up the CFG correctly.
@@ -17600,7 +20581,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = BB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetMacho());
@@ -17633,8 +20614,10 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
.addReg(X86::RAX);
}
} else {
- const char *StackProbeSymbol =
- Subtarget->isTargetKnownWindowsMSVC() ? "_chkstk" : "_alloca";
+ const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() ||
+ Subtarget->isTargetWindowsItanium())
+ ? "_chkstk"
+ : "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
@@ -17657,8 +20640,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
MachineFunction *F = BB->getParent();
- const X86InstrInfo *TII
- = static_cast<const X86InstrInfo*>(F->getTarget().getInstrInfo());
+ const X86InstrInfo *TII =
+ static_cast<const X86InstrInfo *>(F->getSubtarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
@@ -17667,8 +20650,10 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// Get a register mask for the lowered call.
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
- const uint32_t *RegMask =
- F->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ const uint32_t *RegMask = F->getTarget()
+ .getSubtargetImpl()
+ ->getRegisterInfo()
+ ->getCallPreservedMask(CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
@@ -17713,7 +20698,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
@@ -17819,8 +20804,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
@@ -17850,7 +20835,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
@@ -17865,8 +20850,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
@@ -17965,6 +20950,11 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
case X86::VFNMSUBPSr213r: NewFMAOpc = X86::VFNMSUBPSr231r; break;
case X86::VFNMSUBSDr213r: NewFMAOpc = X86::VFNMSUBSDr231r; break;
case X86::VFNMSUBSSr213r: NewFMAOpc = X86::VFNMSUBSSr231r; break;
+ case X86::VFMADDSUBPDr213r: NewFMAOpc = X86::VFMADDSUBPDr231r; break;
+ case X86::VFMADDSUBPSr213r: NewFMAOpc = X86::VFMADDSUBPSr231r; break;
+ case X86::VFMSUBADDPDr213r: NewFMAOpc = X86::VFMSUBADDPDr231r; break;
+ case X86::VFMSUBADDPSr213r: NewFMAOpc = X86::VFMSUBADDPSr231r; break;
+
case X86::VFMADDPDr213rY: NewFMAOpc = X86::VFMADDPDr231rY; break;
case X86::VFMADDPSr213rY: NewFMAOpc = X86::VFMADDPSr231rY; break;
case X86::VFMSUBPDr213rY: NewFMAOpc = X86::VFMSUBPDr231rY; break;
@@ -17973,10 +20963,14 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
case X86::VFNMADDPSr213rY: NewFMAOpc = X86::VFNMADDPSr231rY; break;
case X86::VFNMSUBPDr213rY: NewFMAOpc = X86::VFNMSUBPDr231rY; break;
case X86::VFNMSUBPSr213rY: NewFMAOpc = X86::VFNMSUBPSr231rY; break;
+ case X86::VFMADDSUBPDr213rY: NewFMAOpc = X86::VFMADDSUBPDr231rY; break;
+ case X86::VFMADDSUBPSr213rY: NewFMAOpc = X86::VFMADDSUBPSr231rY; break;
+ case X86::VFMSUBADDPDr213rY: NewFMAOpc = X86::VFMSUBADDPDr231rY; break;
+ case X86::VFMSUBADDPSr213rY: NewFMAOpc = X86::VFMSUBADDPSr231rY; break;
default: llvm_unreachable("Unrecognized FMA variant.");
}
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc))
.addOperand(MI->getOperand(0))
@@ -18007,9 +21001,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::SEG_ALLOCA_32:
- return EmitLoweredSegAlloca(MI, BB, false);
case X86::SEG_ALLOCA_64:
- return EmitLoweredSegAlloca(MI, BB, true);
+ return EmitLoweredSegAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
@@ -18042,7 +21035,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
MachineFunction *F = BB->getParent();
- const TargetInstrInfo *TII = F->getTarget().getInstrInfo();
+ const TargetInstrInfo *TII = F->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// Change the floating point control register to use "round towards zero"
@@ -18126,7 +21119,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRM128MEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRM(MI, BB, BB->getParent()->getTarget().getInstrInfo());
+ return EmitPCMPSTRM(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
// String/text processing lowering.
case X86::PCMPISTRIREG:
@@ -18139,15 +21132,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRIMEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRI(MI, BB, BB->getParent()->getTarget().getInstrInfo());
+ return EmitPCMPSTRI(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
// Thread synchronization.
case X86::MONITOR:
- return EmitMonitor(MI, BB, BB->getParent()->getTarget().getInstrInfo(), Subtarget);
+ return EmitMonitor(MI, BB, BB->getParent()->getSubtarget().getInstrInfo(),
+ Subtarget);
// xbegin
case X86::XBEGIN:
- return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo());
+ return EmitXBegin(MI, BB, BB->getParent()->getSubtarget().getInstrInfo());
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -18183,6 +21177,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VFNMSUBPSr213r:
case X86::VFNMSUBSDr213r:
case X86::VFNMSUBSSr213r:
+ case X86::VFMADDSUBPDr213r:
+ case X86::VFMADDSUBPSr213r:
+ case X86::VFMSUBADDPDr213r:
+ case X86::VFMSUBADDPSr213r:
case X86::VFMADDPDr213rY:
case X86::VFMADDPSr213rY:
case X86::VFMSUBPDr213rY:
@@ -18191,6 +21189,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VFNMADDPSr213rY:
case X86::VFNMSUBPDr213rY:
case X86::VFNMSUBPSr213rY:
+ case X86::VFMADDSUBPDr213rY:
+ case X86::VFMADDSUBPSr213rY:
+ case X86::VFMSUBADDPDr213rY:
+ case X86::VFMSUBADDPSr213rY:
return emitFMA3Instr(MI, BB);
}
}
@@ -18420,6 +21422,329 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Combine an arbitrary chain of shuffles into a single instruction if
+/// possible.
+///
+/// This is the leaf of the recursive combinine below. When we have found some
+/// chain of single-use x86 shuffle instructions and accumulated the combined
+/// shuffle mask represented by them, this will try to pattern match that mask
+/// into either a single instruction if there is a special purpose instruction
+/// for this operation, or into a PSHUFB instruction which is a fully general
+/// instruction but should only be used to replace chains over a certain depth.
+static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
+ int Depth, bool HasPSHUFB, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
+
+ // Find the operand that enters the chain. Note that multiple uses are OK
+ // here, we're not going to remove the operand we find.
+ SDValue Input = Op.getOperand(0);
+ while (Input.getOpcode() == ISD::BITCAST)
+ Input = Input.getOperand(0);
+
+ MVT VT = Input.getSimpleValueType();
+ MVT RootVT = Root.getSimpleValueType();
+ SDLoc DL(Root);
+
+ // Just remove no-op shuffle masks.
+ if (Mask.size() == 1) {
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Use the float domain if the operand type is a floating point type.
+ bool FloatDomain = VT.isFloatingPoint();
+
+ // For floating point shuffles, we don't have free copies in the shuffle
+ // instructions or the ability to load as part of the instruction, so
+ // canonicalize their shuffles to UNPCK or MOV variants.
+ //
+ // Note that even with AVX we prefer the PSHUFD form of shuffle for integer
+ // vectors because it can have a load folded into it that UNPCK cannot. This
+ // doesn't preclude something switching to the shorter encoding post-RA.
+ if (FloatDomain) {
+ if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
+ bool Lo = Mask.equals(0, 0);
+ unsigned Shuffle;
+ MVT ShuffleVT;
+ // Check if we have SSE3 which will let us use MOVDDUP. That instruction
+ // is no slower than UNPCKLPD but has the option to fold the input operand
+ // into even an unaligned memory load.
+ if (Lo && Subtarget->hasSSE3()) {
+ Shuffle = X86ISD::MOVDDUP;
+ ShuffleVT = MVT::v2f64;
+ } else {
+ // We have MOVLHPS and MOVHLPS throughout SSE and they encode smaller
+ // than the UNPCK variants.
+ Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS;
+ ShuffleVT = MVT::v4f32;
+ }
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ if (Shuffle == X86ISD::MOVDDUP)
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
+ else
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+ if (Subtarget->hasSSE3() &&
+ (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) {
+ bool Lo = Mask.equals(0, 0, 2, 2);
+ unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
+ MVT ShuffleVT = MVT::v4f32;
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+ if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
+ bool Lo = Mask.equals(0, 0, 1, 1);
+ unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ MVT ShuffleVT = MVT::v4f32;
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+ }
+
+ // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
+ // variants as none of these have single-instruction variants that are
+ // superior to the UNPCK formulation.
+ if (!FloatDomain &&
+ (Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
+ Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
+ Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
+ Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15,
+ 15))) {
+ bool Lo = Mask[0] == 0;
+ unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ if (Depth == 1 && Root->getOpcode() == Shuffle)
+ return false; // Nothing to do!
+ MVT ShuffleVT;
+ switch (Mask.size()) {
+ case 8:
+ ShuffleVT = MVT::v8i16;
+ break;
+ case 16:
+ ShuffleVT = MVT::v16i8;
+ break;
+ default:
+ llvm_unreachable("Impossible mask size!");
+ };
+ Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Don't try to re-form single instruction chains under any circumstances now
+ // that we've done encoding canonicalization for them.
+ if (Depth < 2)
+ return false;
+
+ // If we have 3 or more shuffle instructions or a chain involving PSHUFB, we
+ // can replace them with a single PSHUFB instruction profitably. Intel's
+ // manuals suggest only using PSHUFB if doing so replacing 5 instructions, but
+ // in practice PSHUFB tends to be *very* fast so we're more aggressive.
+ if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
+ SmallVector<SDValue, 16> PSHUFBMask;
+ assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
+ int Ratio = 16 / Mask.size();
+ for (unsigned i = 0; i < 16; ++i) {
+ if (Mask[i / Ratio] == SM_SentinelUndef) {
+ PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
+ continue;
+ }
+ int M = Mask[i / Ratio] != SM_SentinelZero
+ ? Ratio * Mask[i / Ratio] + i % Ratio
+ : 255;
+ PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
+ }
+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
+ DCI.AddToWorklist(Op.getNode());
+ SDValue PSHUFBMaskOp =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
+ DCI.AddToWorklist(PSHUFBMaskOp.getNode());
+ Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
+ DCI.AddToWorklist(Op.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+ /*AddTo*/ true);
+ return true;
+ }
+
+ // Failed to find any combines.
+ return false;
+}
+
+/// \brief Fully generic combining of x86 shuffle instructions.
+///
+/// This should be the last combine run over the x86 shuffle instructions. Once
+/// they have been fully optimized, this will recursively consider all chains
+/// of single-use shuffle instructions, build a generic model of the cumulative
+/// shuffle operation, and check for simpler instructions which implement this
+/// operation. We use this primarily for two purposes:
+///
+/// 1) Collapse generic shuffles to specialized single instructions when
+/// equivalent. In most cases, this is just an encoding size win, but
+/// sometimes we will collapse multiple generic shuffles into a single
+/// special-purpose shuffle.
+/// 2) Look for sequences of shuffle instructions with 3 or more total
+/// instructions, and replace them with the slightly more expensive SSSE3
+/// PSHUFB instruction if available. We do this as the last combining step
+/// to ensure we avoid using PSHUFB if we can implement the shuffle with
+/// a suitable short sequence of other instructions. The PHUFB will either
+/// use a register or have to read from memory and so is slightly (but only
+/// slightly) more expensive than the other shuffle instructions.
+///
+/// Because this is inherently a quadratic operation (for each shuffle in
+/// a chain, we recurse up the chain), the depth is limited to 8 instructions.
+/// This should never be an issue in practice as the shuffle lowering doesn't
+/// produce sequences of more than 8 instructions.
+///
+/// FIXME: We will currently miss some cases where the redundant shuffling
+/// would simplify under the threshold for PSHUFB formation because of
+/// combine-ordering. To fix this, we should do the redundant instruction
+/// combining in this recursive walk.
+static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
+ ArrayRef<int> RootMask,
+ int Depth, bool HasPSHUFB,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ // Bound the depth of our recursive combine because this is ultimately
+ // quadratic in nature.
+ if (Depth > 8)
+ return false;
+
+ // Directly rip through bitcasts to find the underlying operand.
+ while (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).hasOneUse())
+ Op = Op.getOperand(0);
+
+ MVT VT = Op.getSimpleValueType();
+ if (!VT.isVector())
+ return false; // Bail if we hit a non-vector.
+ // FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit
+ // version should be added.
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ assert(Root.getSimpleValueType().isVector() &&
+ "Shuffles operate on vector types!");
+ assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() &&
+ "Can only combine shuffles of the same vector register size.");
+
+ if (!isTargetShuffle(Op.getOpcode()))
+ return false;
+ SmallVector<int, 16> OpMask;
+ bool IsUnary;
+ bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
+ // We only can combine unary shuffles which we can decode the mask for.
+ if (!HaveMask || !IsUnary)
+ return false;
+
+ assert(VT.getVectorNumElements() == OpMask.size() &&
+ "Different mask size from vector size!");
+ assert(((RootMask.size() > OpMask.size() &&
+ RootMask.size() % OpMask.size() == 0) ||
+ (OpMask.size() > RootMask.size() &&
+ OpMask.size() % RootMask.size() == 0) ||
+ OpMask.size() == RootMask.size()) &&
+ "The smaller number of elements must divide the larger.");
+ int RootRatio = std::max<int>(1, OpMask.size() / RootMask.size());
+ int OpRatio = std::max<int>(1, RootMask.size() / OpMask.size());
+ assert(((RootRatio == 1 && OpRatio == 1) ||
+ (RootRatio == 1) != (OpRatio == 1)) &&
+ "Must not have a ratio for both incoming and op masks!");
+
+ SmallVector<int, 16> Mask;
+ Mask.reserve(std::max(OpMask.size(), RootMask.size()));
+
+ // Merge this shuffle operation's mask into our accumulated mask. Note that
+ // this shuffle's mask will be the first applied to the input, followed by the
+ // root mask to get us all the way to the root value arrangement. The reason
+ // for this order is that we are recursing up the operation chain.
+ for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) {
+ int RootIdx = i / RootRatio;
+ if (RootMask[RootIdx] < 0) {
+ // This is a zero or undef lane, we're done.
+ Mask.push_back(RootMask[RootIdx]);
+ continue;
+ }
+
+ int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
+ int OpIdx = RootMaskedIdx / OpRatio;
+ if (OpMask[OpIdx] < 0) {
+ // The incoming lanes are zero or undef, it doesn't matter which ones we
+ // are using.
+ Mask.push_back(OpMask[OpIdx]);
+ continue;
+ }
+
+ // Ok, we have non-zero lanes, map them through.
+ Mask.push_back(OpMask[OpIdx] * OpRatio +
+ RootMaskedIdx % OpRatio);
+ }
+
+ // See if we can recurse into the operand to combine more things.
+ switch (Op.getOpcode()) {
+ case X86ISD::PSHUFB:
+ HasPSHUFB = true;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ if (Op.getOperand(0).hasOneUse() &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
+
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!");
+ // We can't check for single use, we have to check that this shuffle is the only user.
+ if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
+ }
+
+ // Minor canonicalization of the accumulated shuffle mask to make it easier
+ // to match below. All this does is detect masks with squential pairs of
+ // elements, and shrink them to the half-width mask. It does this in a loop
+ // so it will reduce the size of the mask to the minimal width mask which
+ // performs an equivalent shuffle.
+ SmallVector<int, 16> WidenedMask;
+ while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
+ Mask = std::move(WidenedMask);
+ WidenedMask.clear();
+ }
+
+ return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
+ Subtarget);
+}
+
/// \brief Get the PSHUF-style mask from PSHUF node.
///
/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
@@ -18452,19 +21777,23 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
/// We walk up the chain and look for a combinable shuffle, skipping over
/// shuffles that we could hoist this shuffle's transformation past without
/// altering anything.
-static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue
+combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
assert(N.getOpcode() == X86ISD::PSHUFD &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
- // Walk up a single-use chain looking for a combinable shuffle.
+ // Walk up a single-use chain looking for a combinable shuffle. Keep a stack
+ // of the shuffles in the chain so that we can form a fresh chain to replace
+ // this one.
+ SmallVector<SDValue, 8> Chain;
SDValue V = N.getOperand(0);
for (; V.hasOneUse(); V = V.getOperand(0)) {
switch (V.getOpcode()) {
default:
- return false; // Nothing combined!
+ return SDValue(); // Nothing combined!
case ISD::BITCAST:
// Skip bitcasts as we always know the type for the target specific
@@ -18480,8 +21809,9 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
// dword shuffle, and the high words are self-contained.
if (Mask[0] != 0 || Mask[1] != 1 ||
!(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
- return false;
+ return SDValue();
+ Chain.push_back(V);
continue;
case X86ISD::PSHUFHW:
@@ -18489,8 +21819,9 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
// dword shuffle, and the low words are self-contained.
if (Mask[2] != 2 || Mask[3] != 3 ||
!(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
- return false;
+ return SDValue();
+ Chain.push_back(V);
continue;
case X86ISD::UNPCKL:
@@ -18498,25 +21829,28 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
- return false;
+ return SDValue();
// Search for a half-shuffle which we can combine with.
unsigned CombineOp =
V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
if (V.getOperand(0) != V.getOperand(1) ||
!V->isOnlyUserOf(V.getOperand(0).getNode()))
- return false;
+ return SDValue();
+ Chain.push_back(V);
V = V.getOperand(0);
do {
switch (V.getOpcode()) {
default:
- return false; // Nothing to combine.
+ return SDValue(); // Nothing to combine.
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
if (V.getOpcode() == CombineOp)
break;
+ Chain.push_back(V);
+
// Fallthrough!
case ISD::BITCAST:
V = V.getOperand(0);
@@ -18532,10 +21866,7 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
if (!V.hasOneUse())
// We fell out of the loop without finding a viable combining instruction.
- return false;
-
- // Record the old value to use in RAUW-ing.
- SDValue Old = V;
+ return SDValue();
// Merge this node's mask and our incoming mask.
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
@@ -18544,20 +21875,34 @@ static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
- // It is possible that one of the combinable shuffles was completely absorbed
- // by the other, just replace it and revisit all users in that case.
- if (Old.getNode() == V.getNode()) {
- DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo=*/true);
- return true;
- }
+ // Rebuild the chain around this new shuffle.
+ while (!Chain.empty()) {
+ SDValue W = Chain.pop_back_val();
- // Replace N with its operand as we're going to combine that shuffle away.
- DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+ if (V.getValueType() != W.getOperand(0).getValueType())
+ V = DAG.getNode(ISD::BITCAST, DL, W.getOperand(0).getValueType(), V);
- // Replace the combinable shuffle with the combined one, updating all users
- // so that we re-evaluate the chain here.
- DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
- return true;
+ switch (W.getOpcode()) {
+ default:
+ llvm_unreachable("Only PSHUF and UNPCK instructions get here!");
+
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V);
+ break;
+
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1));
+ break;
+ }
+ }
+ if (V.getValueType() != N.getValueType())
+ V = DAG.getNode(ISD::BITCAST, DL, N.getValueType(), V);
+
+ // Return the new chain to replace N.
+ return V;
}
/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
@@ -18593,26 +21938,6 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
// Other-half shuffles are no-ops.
continue;
-
- case X86ISD::PSHUFD: {
- // We can only handle pshufd if the half we are combining either stays in
- // its half, or switches to the other half. Bail if one of these isn't
- // true.
- SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
- int DOffset = CombineOpcode == X86ISD::PSHUFLW ? 0 : 2;
- if (!((VMask[DOffset + 0] < 2 && VMask[DOffset + 1] < 2) ||
- (VMask[DOffset + 0] >= 2 && VMask[DOffset + 1] >= 2)))
- return false;
-
- // Map the mask through the pshufd and keep walking up the chain.
- for (int i = 0; i < 4; ++i)
- Mask[i] = 2 * (VMask[DOffset + Mask[i] / 2] % 2) + Mask[i] % 2;
-
- // Switch halves if the pshufd does.
- CombineOpcode =
- VMask[DOffset + Mask[0] / 2] < 2 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
- continue;
- }
}
// Break out of the loop if we break out of the switch.
break;
@@ -18622,7 +21947,11 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
// We fell out of the loop without finding a viable combining instruction.
return false;
- // Record the old value to use in RAUW-ing.
+ // Combine away the bottom node as its shuffle will be accumulated into
+ // a preceding shuffle.
+ DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
+
+ // Record the old value.
SDValue Old = V;
// Merge this node's mask and our incoming mask (adjusted to account for all
@@ -18633,12 +21962,13 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
- // Replace N with its operand as we're going to combine that shuffle away.
- DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+ // Check that the shuffles didn't cancel each other out. If not, we need to
+ // combine to the new one.
+ if (Old != V)
+ // Replace the combinable shuffle with the combined one, updating all users
+ // so that we re-evaluate the chain here.
+ DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
- // Replace the combinable shuffle with the combined one, updating all users
- // so that we re-evaluate the chain here.
- DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
return true;
}
@@ -18679,13 +22009,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
return SDValue(); // We combined away this shuffle, so we're done.
// See if this reduces to a PSHUFD which is no more expensive and can
- // combine with more operations.
- if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
- areAdjacentMasksSequential(Mask)) {
- int DMask[] = {-1, -1, -1, -1};
+ // combine with more operations. Note that it has to at least flip the
+ // dwords as otherwise it would have been removed as a no-op.
+ if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3] == 1) {
+ int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
- DMask[DOffset + 0] = DOffset + Mask[0] / 2;
- DMask[DOffset + 1] = DOffset + Mask[2] / 2;
+ DMask[DOffset + 0] = DOffset + 1;
+ DMask[DOffset + 1] = DOffset + 0;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
DCI.AddToWorklist(V.getNode());
V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
@@ -18738,8 +22068,8 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFD:
- if (combineRedundantDWordShuffle(N, Mask, DAG, DCI))
- return SDValue(); // We combined away this shuffle.
+ if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG, DCI))
+ return NewN;
break;
}
@@ -18747,6 +22077,61 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Try to combine a shuffle into a target-specific add-sub node.
+///
+/// We combine this directly on the abstract vector shuffle nodes so it is
+/// easier to generically match. We also insert dummy vector shuffle nodes for
+/// the operands which explicitly discard the lanes which are unused by this
+/// operation to try to flow through the rest of the combiner the fact that
+/// they're unused.
+static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // We only handle target-independent shuffles.
+ // FIXME: It would be easy and harmless to use the target shuffle mask
+ // extraction tool to support more.
+ if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
+ return SDValue();
+
+ auto *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+
+ // We require the first shuffle operand to be the SUB node, and the second to
+ // be the ADD node.
+ // FIXME: We should support the commuted patterns.
+ if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
+ return SDValue();
+
+ // If there are other uses of these operations we can't fold them.
+ if (!V1->hasOneUse() || !V2->hasOneUse())
+ return SDValue();
+
+ // Ensure that both operations have the same operands. Note that we can
+ // commute the FADD operands.
+ SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
+ if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
+ (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
+ return SDValue();
+
+ // We're looking for blends between FADD and FSUB nodes. We insist on these
+ // nodes being lined up in a specific expected pattern.
+ if (!(isShuffleEquivalent(Mask, 0, 3) ||
+ isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
+ isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
+ return SDValue();
+
+ // Only specific types are legal at this point, assert so we notice if and
+ // when these change.
+ assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
+ VT == MVT::v4f64) &&
+ "Unknown vector type encountered!");
+
+ return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
+}
+
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -18756,54 +22141,17 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- // Canonicalize shuffles that perform 'addsub' on packed float vectors
- // according to the rule:
- // (shuffle (FADD A, B), (FSUB A, B), Mask) ->
- // (shuffle (FSUB A, -B), (FADD A, -B), Mask)
- //
- // Where 'Mask' is:
- // <0,5,2,7> -- for v4f32 and v4f64 shuffles;
- // <0,3> -- for v2f64 shuffles;
- // <0,9,2,11,4,13,6,15> -- for v8f32 shuffles.
- //
- // This helps pattern-matching more SSE3/AVX ADDSUB instructions
- // during ISel stage.
- if (N->getOpcode() == ISD::VECTOR_SHUFFLE &&
- ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
- N0->getOpcode() == ISD::FADD && N1->getOpcode() == ISD::FSUB &&
- // Operands to the FADD and FSUB must be the same.
- ((N0->getOperand(0) == N1->getOperand(0) &&
- N0->getOperand(1) == N1->getOperand(1)) ||
- // FADD is commutable. See if by commuting the operands of the FADD
- // we would still be able to match the operands of the FSUB dag node.
- (N0->getOperand(1) == N1->getOperand(0) &&
- N0->getOperand(0) == N1->getOperand(1))) &&
- N0->getOperand(0)->getOpcode() != ISD::UNDEF &&
- N0->getOperand(1)->getOpcode() != ISD::UNDEF) {
-
- ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
- unsigned NumElts = VT.getVectorNumElements();
- ArrayRef<int> Mask = SV->getMask();
- bool CanFold = true;
-
- for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i)
- CanFold = Mask[i] == (int)((i & 1) ? i + NumElts : i);
-
- if (CanFold) {
- SDValue Op0 = N1->getOperand(0);
- SDValue Op1 = DAG.getNode(ISD::FNEG, dl, VT, N1->getOperand(1));
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, VT, Op0, Op1);
- SDValue Add = DAG.getNode(ISD::FADD, dl, VT, Op0, Op1);
- return DAG.getVectorShuffle(VT, dl, Sub, Add, Mask);
- }
- }
-
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
+ // If we have legalized the vector types, look for blends of FADD and FSUB
+ // nodes that we can fuse into an ADDSUB node.
+ if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
+ if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
+ return AddSub;
+
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
if (Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
@@ -18880,6 +22228,18 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
if (Shuffle.getNode())
return Shuffle;
+
+ // Try recursively combining arbitrary sequences of x86 shuffle
+ // instructions into higher-order shuffles. We do this after combining
+ // specific PSHUF instruction sequences into their minimal form so that we
+ // can evaluate how many specialized shuffle instructions are involved in
+ // a particular chain.
+ SmallVector<int, 1> NonceMask; // Just a placeholder.
+ NonceMask.push_back(0);
+ if (combineX86ShufflesRecursively(SDValue(N, 0), SDValue(N, 0), NonceMask,
+ /*Depth*/ 1, /*HasPSHUFB*/ false, DAG,
+ DCI, Subtarget))
+ return SDValue(); // This routine will use CombineTo to replace N.
}
return SDValue();
@@ -18897,7 +22257,7 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
/// specific shuffle of a load can be folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
-/// shuffles have been customed lowered so we need to handle those here.
+/// shuffles have been custom lowered so we need to handle those here.
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
@@ -18909,20 +22269,20 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
- EVT VT = InVec.getValueType();
+ EVT OriginalVT = InVec.getValueType();
- bool HasShuffleIntoBitcast = false;
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
- if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+ if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
- HasShuffleIntoBitcast = true;
}
+ EVT CurrentVT = InVec.getValueType();
+
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
@@ -18932,12 +22292,12 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
- if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask,
- UnaryShuffle))
+ if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
+ ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
@@ -18963,28 +22323,28 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
- if (HasShuffleIntoBitcast) {
- // If there's a bitcast before the shuffle, check if the load type and
- // alignment is valid.
- unsigned Align = LN0->getAlignment();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = TLI.getDataLayout()->
- getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+ EVT EltVT = N->getValueType(0);
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ EltVT.getTypeForEVT(*DAG.getContext()));
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
- return SDValue();
- }
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
+ return SDValue();
// All checks match so transform back to vector_shuffle so that DAG combiner
// can finish the job
SDLoc dl(N);
// Create shuffle node taking into account the case that its a unary shuffle
- SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
- Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+ SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT)
+ : InVec.getOperand(1);
+ Shuffle = DAG.getVectorShuffle(CurrentVT, dl,
InVec.getOperand(0), Shuffle,
&ShuffleMask[0]);
- Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
@@ -19190,6 +22550,12 @@ TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
+ // A vselect where all conditions and data are constants can be optimized into
+ // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
+ if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
+ ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
+ return SDValue();
+
unsigned MaskValue = 0;
if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
return SDValue();
@@ -19367,13 +22733,15 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
- // lowering on AVX-512. In this case we convert it to
+ // lowering on KNL. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
- // The same situation for all 128 and 256-bit vectors of i8 and i16
+ // The same situation for all 128 and 256-bit vectors of i8 and i16.
+ // Since SKX these selects have a proper lowering.
EVT OpVT = LHS.getValueType();
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
(OpVT.getVectorElementType() == MVT::i8 ||
- OpVT.getVectorElementType() == MVT::i16)) {
+ OpVT.getVectorElementType() == MVT::i16) &&
+ !(Subtarget->hasBWI() && Subtarget->hasVLX())) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
@@ -19593,22 +22961,22 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
- // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
- if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
- // Check if SETCC has already been promoted
- TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT &&
- // Check that condition value type matches vselect operand type
- CondVT == VT) {
-
+ // Simplify vector selection if condition value type matches vselect
+ // operand type
+ if (N->getOpcode() == ISD::VSELECT && CondVT == VT) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
- if (!TValIsAllOnes && !FValIsAllZeros) {
- // Try invert the condition if true value is not all 1s and false value
- // is not all 0s.
+ // Try invert the condition if true value is not all 1s and false value
+ // is not all 0s.
+ if (!TValIsAllOnes && !FValIsAllZeros &&
+ // Check if the selector will be produced by CMPP*/PCMP*
+ Cond.getOpcode() == ISD::SETCC &&
+ // Check if SETCC has already been promoted
+ TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT) {
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
@@ -19726,22 +23094,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// build_vector of constants. This will be taken care in a later
// condition.
(TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 &&
- VT != MVT::v8i16)) {
+ VT != MVT::v8i16) &&
+ // Don't optimize vector of constants. Those are handled by
+ // the generic code and all the bits must be properly set for
+ // the generic optimizer.
+ !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
// Don't optimize vector selects that map to mask-registers.
if (BitWidth == 1)
return SDValue();
- // Check all uses of that condition operand to check whether it will be
- // consumed by non-BLEND instructions, which may depend on all bits are set
- // properly.
- for (SDNode::use_iterator I = Cond->use_begin(),
- E = Cond->use_end(); I != E; ++I)
- if (I->getOpcode() != ISD::VSELECT)
- // TODO: Add other opcodes eventually lowered into BLEND.
- return SDValue();
-
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
@@ -19749,8 +23112,45 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
- TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
- DCI.CommitTargetLoweringOpt(TLO);
+ TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
+ TLO)) {
+ // If we changed the computation somewhere in the DAG, this change
+ // will affect all users of Cond.
+ // Make sure it is fine and update all the nodes so that we do not
+ // use the generic VSELECT anymore. Otherwise, we may perform
+ // wrong optimizations as we messed up with the actual expectation
+ // for the vector boolean values.
+ if (Cond != TLO.Old) {
+ // Check all uses of that condition operand to check whether it will be
+ // consumed by non-BLEND instructions, which may depend on all bits are
+ // set properly.
+ for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
+ I != E; ++I)
+ if (I->getOpcode() != ISD::VSELECT)
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ return SDValue();
+
+ // Update all the users of the condition, before committing the change,
+ // so that the VSELECT optimizations that expect the correct vector
+ // boolean value will not be triggered.
+ for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
+ I != E; ++I)
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(*I, 0),
+ DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0),
+ Cond, I->getOperand(1), I->getOperand(2)));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return SDValue();
+ }
+ // At this point, only Cond is changed. Change the condition
+ // just for N to keep the opportunity to optimize all other
+ // users their own way.
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(N, 0),
+ DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0),
+ TLO.New, N->getOperand(1), N->getOperand(2)));
+ return SDValue();
+ }
}
// We should generate an X86ISD::BLENDI from a vselect if its argument
@@ -19764,7 +23164,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Iff we find this pattern and the build_vectors are built from
// constants, we translate the vselect into a shuffle_vector that we
// know will be matched by LowerVECTOR_SHUFFLEtoBlend.
- if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) {
+ if ((N->getOpcode() == ISD::VSELECT ||
+ N->getOpcode() == X86ISD::SHRUNKBLEND) &&
+ !DCI.isBeforeLegalize()) {
SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
if (Shuffle.getNode())
return Shuffle;
@@ -20830,7 +24232,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
EVT MemVT = Ld->getMemoryVT();
SDLoc dl(Ld);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned RegSz = RegVT.getSizeInBits();
// On Sandybridge unaligned 256bit loads are inefficient.
ISD::LoadExtType Ext = Ld->getExtensionType();
@@ -20866,153 +24267,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
return DCI.CombineTo(N, NewVec, TF, true);
}
- // If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
- // expansion is still better than scalar code.
- // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
- // emit a shuffle and a arithmetic shift.
- // TODO: It is possible to support ZExt by zeroing the undef values
- // during the shuffle phase or after the shuffle.
- if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
- (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
- assert(MemVT != RegVT && "Cannot extend to the same type");
- assert(MemVT.isVector() && "Must load a vector from memory");
-
- unsigned NumElems = RegVT.getVectorNumElements();
- unsigned MemSz = MemVT.getSizeInBits();
- assert(RegSz > MemSz && "Register size must be greater than the mem size");
-
- if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
- return SDValue();
-
- // All sizes must be a power of two.
- if (!isPowerOf2_32(RegSz * MemSz * NumElems))
- return SDValue();
-
- // Attempt to load the original value using scalar loads.
- // Find the largest scalar type that divides the total loaded size.
- MVT SclrLoadTy = MVT::i8;
- for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
- tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
- MVT Tp = (MVT::SimpleValueType)tp;
- if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
- SclrLoadTy = Tp;
- }
- }
-
- // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
- if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
- (64 <= MemSz))
- SclrLoadTy = MVT::f64;
-
- // Calculate the number of scalar loads that we need to perform
- // in order to load our vector from memory.
- unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
- if (Ext == ISD::SEXTLOAD && NumLoads > 1)
- return SDValue();
-
- unsigned loadRegZize = RegSz;
- if (Ext == ISD::SEXTLOAD && RegSz == 256)
- loadRegZize /= 2;
-
- // Represent our vector as a sequence of elements which are the
- // largest scalar that we can load.
- EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
- loadRegZize/SclrLoadTy.getSizeInBits());
-
- // Represent the data using the same element type that is stored in
- // memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegZize/MemVT.getScalarType().getSizeInBits());
-
- assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
- "Invalid vector type");
-
- // We can't shuffle using an illegal type.
- if (!TLI.isTypeLegal(WideVecVT))
- return SDValue();
-
- SmallVector<SDValue, 8> Chains;
- SDValue Ptr = Ld->getBasePtr();
- SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
- TLI.getPointerTy());
- SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
-
- for (unsigned i = 0; i < NumLoads; ++i) {
- // Perform a single load.
- SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
- Ptr, Ld->getPointerInfo(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->isInvariant(), Ld->getAlignment());
- Chains.push_back(ScalarLoad.getValue(1));
- // Create the first element type using SCALAR_TO_VECTOR in order to avoid
- // another round of DAGCombining.
- if (i == 0)
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
- else
- Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
- ScalarLoad, DAG.getIntPtrConstant(i));
-
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- }
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
-
- // Bitcast the loaded value to a vector of the original element type, in
- // the size of the target vector type.
- SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
- unsigned SizeRatio = RegSz/MemSz;
-
- if (Ext == ISD::SEXTLOAD) {
- // If we have SSE4.1 we can directly emit a VSEXT node.
- if (Subtarget->hasSSE41()) {
- SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
- return DCI.CombineTo(N, Sext, TF, true);
- }
-
- // Otherwise we'll shuffle the small elements in the high bits of the
- // larger type and perform an arithmetic shift. If the shift is not legal
- // it's better to scalarize.
- if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
- return SDValue();
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT),
- &ShuffleVec[0]);
-
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
-
- // Build the arithmetic shift.
- unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
- MemVT.getVectorElementType().getSizeInBits();
- Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
- DAG.getConstant(Amt, RegVT));
-
- return DCI.CombineTo(N, Shuff, TF, true);
- }
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i*SizeRatio] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
- DAG.getUNDEF(WideVecVT),
- &ShuffleVec[0]);
-
- // Bitcast to the requested type.
- Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
- // Replace the original load with the new sequence
- // and return the new chain.
- return DCI.CombineTo(N, Shuff, TF, true);
- }
-
return SDValue();
}
@@ -21535,13 +24789,29 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
+ // (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
+ // This exposes the sext to the sdivrem lowering, so that it directly extends
+ // from AH (which we otherwise need to do contortions to access).
+ if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
+ N0.getValueType() == MVT::i8 && VT == MVT::i32) {
+ SDLoc dl(N);
+ SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
+ SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
+ N0.getOperand(0), N0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
+ return R.getValue(1);
+ }
+
if (!DCI.isBeforeLegalizeOps())
return SDValue();
if (!Subtarget->hasFp256())
return SDValue();
- EVT VT = N->getValueType(0);
if (VT.isVector() && VT.getSizeInBits() == 256) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
@@ -21634,6 +24904,20 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
return R;
}
+ // (i8,i32 zext (udivrem (i8 x, i8 y)) ->
+ // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
+ // This exposes the zext to the udivrem lowering, so that it directly extends
+ // from AH (which we otherwise need to do contortions to access).
+ if (N0.getOpcode() == ISD::UDIVREM &&
+ N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
+ (VT == MVT::i32 || VT == MVT::i64)) {
+ SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
+ SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
+ N0.getOperand(0), N0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
+ return R.getValue(1);
+ }
+
return SDValue();
}
@@ -21803,8 +25087,61 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
+ SelectionDAG &DAG) {
+ // Take advantage of vector comparisons producing 0 or -1 in each lane to
+ // optimize away operation when it's from a constant.
+ //
+ // The general transformation is:
+ // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
+ // AND(VECTOR_CMP(x,y), constant2)
+ // constant2 = UNARYOP(constant)
+
+ // Early exit if this isn't a vector operation, the operand of the
+ // unary operation isn't a bitwise AND, or if the sizes of the operations
+ // aren't the same.
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
+ N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
+ VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
+ return SDValue();
+
+ // Now check that the other operand of the AND is a constant. We could
+ // make the transformation for non-constant splats as well, but it's unclear
+ // that would be a benefit as it would not eliminate any operations, just
+ // perform one more step in scalar code before moving to the vector unit.
+ if (BuildVectorSDNode *BV =
+ dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+ // Bail out if the vector isn't a constant.
+ if (!BV->isConstant())
+ return SDValue();
+
+ // Everything checks out. Build up the new and improved node.
+ SDLoc DL(N);
+ EVT IntVT = BV->getValueType(0);
+ // Create a new constant of the appropriate type for the transformed
+ // DAG.
+ SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
+ // The AND node needs bitcasts to/from an integer vector type around it.
+ SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
+ N->getOperand(0)->getOperand(0), MaskConst);
+ SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+ return Res;
+ }
+
+ return SDValue();
+}
+
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86TargetLowering *XTLI) {
+ // First try to optimize away the conversion entirely when it's
+ // conditionally from a constant. Vectors only.
+ SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
+ if (Res != SDValue())
+ return Res;
+
+ // Now move on to more general possibilities.
SDValue Op0 = N->getOperand(0);
EVT InVT = Op0->getValueType(0);
@@ -21950,18 +25287,68 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
/// performVZEXTCombine - Performs build vector combines
static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget *Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(N);
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Op = N->getOperand(0);
+ MVT OpVT = Op.getSimpleValueType();
+ MVT OpEltVT = OpVT.getVectorElementType();
+ unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
+
// (vzext (bitcast (vzext (x)) -> (vzext x)
- SDValue In = N->getOperand(0);
- while (In.getOpcode() == ISD::BITCAST)
- In = In.getOperand(0);
+ SDValue V = Op;
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
- if (In.getOpcode() != X86ISD::VZEXT)
- return SDValue();
+ if (V != Op && V.getOpcode() == X86ISD::VZEXT) {
+ MVT InnerVT = V.getSimpleValueType();
+ MVT InnerEltVT = InnerVT.getVectorElementType();
+
+ // If the element sizes match exactly, we can just do one larger vzext. This
+ // is always an exact type match as vzext operates on integer types.
+ if (OpEltVT == InnerEltVT) {
+ assert(OpVT == InnerVT && "Types must match for vzext!");
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
+ }
+
+ // The only other way we can combine them is if only a single element of the
+ // inner vzext is used in the input to the outer vzext.
+ if (InnerEltVT.getSizeInBits() < InputBits)
+ return SDValue();
+
+ // In this case, the inner vzext is completely dead because we're going to
+ // only look at bits inside of the low element. Just do the outer vzext on
+ // a bitcast of the input to the inner.
+ return DAG.getNode(X86ISD::VZEXT, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, OpVT, V));
+ }
+
+ // Check if we can bypass extracting and re-inserting an element of an input
+ // vector. Essentialy:
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
+ SDValue ExtractedV = V.getOperand(0);
+ SDValue OrigV = ExtractedV.getOperand(0);
+ if (auto *ExtractIdx = dyn_cast<ConstantSDNode>(ExtractedV.getOperand(1)))
+ if (ExtractIdx->getZExtValue() == 0) {
+ MVT OrigVT = OrigV.getSimpleValueType();
+ // Extract a subvector if necessary...
+ if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
+ int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
+ OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
+ OrigVT.getVectorNumElements() / Ratio);
+ OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
+ DAG.getIntPtrConstant(0));
+ }
+ Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV);
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
+ }
+ }
- return DAG.getNode(X86ISD::VZEXT, SDLoc(N), N->getValueType(0),
- In.getOperand(0));
+ return SDValue();
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
@@ -21972,7 +25359,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
- case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ case ISD::SELECT:
+ case X86ISD::SHRUNKBLEND:
+ return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
@@ -22013,12 +25402,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::VPERMILP:
+ case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
@@ -22668,14 +26058,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Constraint[5] == ')' &&
Constraint[6] == '}') {
- Res.first = X86::ST0+Constraint[4]-'0';
+ Res.first = X86::FP0+Constraint[4]-'0';
Res.second = &X86::RFP80RegClass;
return Res;
}
// GCC allows "st(0)" to be called just plain "st".
if (StringRef("{st}").equals_lower(Constraint)) {
- Res.first = X86::ST0;
+ Res.first = X86::FP0;
Res.second = &X86::RFP80RegClass;
return Res;
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index c8cdce7..7c6ffa2 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86ISELLOWERING_H
-#define X86ISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
+#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -187,12 +187,17 @@ namespace llvm {
/// PSIGN - Copy integer sign.
PSIGN,
- /// BLENDV - Blend where the selector is a register.
- BLENDV,
-
/// BLENDI - Blend where the selector is an immediate.
BLENDI,
+ /// SHRUNKBLEND - Blend where the condition has been shrunk.
+ /// This is used to emphasize that the condition mask is
+ /// no more valid for generic VSELECT optimizations.
+ SHRUNKBLEND,
+
+ /// ADDSUB - Combined add and sub on an FP vector.
+ ADDSUB,
+
// SUBUS - Integer sub with unsigned saturation.
SUBUS,
@@ -301,6 +306,13 @@ namespace llvm {
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+ // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS
+ SMUL8, UMUL8,
+
+ // 8-bit divrem that zero-extend the high result (AH).
+ UDIVREM8_ZEXT_HREG,
+ SDIVREM8_SEXT_HREG,
+
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,
@@ -320,7 +332,10 @@ namespace llvm {
// Several flavors of instructions with vector shuffle behaviors.
PACKSS,
PACKUS,
+ // Intra-lane alignr
PALIGNR,
+ // AVX512 inter-lane alignr
+ VALIGN,
PSHUFD,
PSHUFHW,
PSHUFLW,
@@ -337,7 +352,8 @@ namespace llvm {
MOVSS,
UNPCKL,
UNPCKH,
- VPERMILP,
+ VPERMILPV,
+ VPERMILPI,
VPERMV,
VPERMV3,
VPERMIV3,
@@ -350,9 +366,9 @@ namespace llvm {
VINSERT,
VEXTRACT,
- // PMULUDQ - Vector multiply packed unsigned doubleword integers
+ // Vector multiply packed unsigned doubleword integers
PMULUDQ,
- // PMULUDQ - Vector multiply packed signed doubleword integers
+ // Vector multiply packed signed doubleword integers
PMULDQ,
// FMA nodes
@@ -363,20 +379,19 @@ namespace llvm {
FMADDSUB,
FMSUBADD,
- // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
- // according to %al. An operator is needed so that this can be expanded
- // with control flow.
+ // Save xmm argument registers to the stack, according to %al. An operator
+ // is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
- // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+ // Windows's _chkstk call to do stack probing.
WIN_ALLOCA,
- // SEG_ALLOCA - For allocating variable amounts of stack space when using
+ // For allocating variable amounts of stack space when using
// segmented stacks. Check if the current stacklet has enough space, and
// falls back to heap allocation if not.
SEG_ALLOCA,
- // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
+ // Windows's _ftol2 runtime routine to do fptoui.
WIN_FTOL,
// Memory barrier
@@ -385,38 +400,40 @@ namespace llvm {
SFENCE,
LFENCE,
- // FNSTSW16r - Store FP status word into i16 register.
+ // Store FP status word into i16 register.
FNSTSW16r,
- // SAHF - Store contents of %ah into %eflags.
+ // Store contents of %ah into %eflags.
SAHF,
- // RDRAND - Get a random integer and indicate whether it is valid in CF.
+ // Get a random integer and indicate whether it is valid in CF.
RDRAND,
- // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+ // Get a NIST SP800-90B & C compliant random integer and
// indicate whether it is valid in CF.
RDSEED,
- // PCMP*STRI
PCMPISTRI,
PCMPESTRI,
- // XTEST - Test if in transactional execution.
+ // Test if in transactional execution.
XTEST,
- // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
+ // ERI instructions
+ RSQRT28, RCP28, EXP2,
+
+ // Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
- // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ // Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
- // FNSTCW16m - Store FP control world into i16 memory.
+ // Store FP control world into i16 memory.
FNSTCW16m,
- /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
/// has two inputs (token chain and address) and two outputs (int value
@@ -425,7 +442,7 @@ namespace llvm {
FP_TO_INT32_IN_MEM,
FP_TO_INT64_IN_MEM,
- /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// This instruction implements SINT_TO_FP with the
/// integer source in memory and FP reg result. This corresponds to the
/// X86::FILD*m instructions. It has three inputs (token chain, address,
/// and source type) and two outputs (FP value and token chain). FILD_FLAG
@@ -433,19 +450,19 @@ namespace llvm {
FILD,
FILD_FLAG,
- /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This instruction implements an extending load to FP stack slots.
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
/// operand, ptr to load from, and a ValueType node indicating the type
/// to load to.
FLD,
- /// FST - This instruction implements a truncating store to FP stack
+ /// This instruction implements a truncating store to FP stack
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
/// chain operand, value to store, address, and a ValueType to store it
/// as.
FST,
- /// VAARG_64 - This instruction grabs the address of the next argument
+ /// This instruction grabs the address of the next argument
/// from a va_list. (reads and modifies the va_list in memory)
VAARG_64
@@ -457,67 +474,76 @@ namespace llvm {
/// Define some predicates that are used for node matching.
namespace X86 {
- /// isVEXTRACT128Index - Return true if the specified
+ /// Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions.
bool isVEXTRACT128Index(SDNode *N);
- /// isVINSERT128Index - Return true if the specified
+ /// Return true if the specified
/// INSERT_SUBVECTOR operand specifies a subvector insert that is
/// suitable for input to VINSERTF128, VINSERTI128 instructions.
bool isVINSERT128Index(SDNode *N);
- /// isVEXTRACT256Index - Return true if the specified
+ /// Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions.
bool isVEXTRACT256Index(SDNode *N);
- /// isVINSERT256Index - Return true if the specified
+ /// Return true if the specified
/// INSERT_SUBVECTOR operand specifies a subvector insert that is
/// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions.
bool isVINSERT256Index(SDNode *N);
- /// getExtractVEXTRACT128Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to extract the specified EXTRACT_SUBVECTOR index
/// with VEXTRACTF128, VEXTRACTI128 instructions.
unsigned getExtractVEXTRACT128Immediate(SDNode *N);
- /// getInsertVINSERT128Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to insert at the specified INSERT_SUBVECTOR index
/// with VINSERTF128, VINSERT128 instructions.
unsigned getInsertVINSERT128Immediate(SDNode *N);
- /// getExtractVEXTRACT256Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to extract the specified EXTRACT_SUBVECTOR index
/// with VEXTRACTF64X4, VEXTRACTI64x4 instructions.
unsigned getExtractVEXTRACT256Immediate(SDNode *N);
- /// getInsertVINSERT256Immediate - Return the appropriate
+ /// Return the appropriate
/// immediate to insert at the specified INSERT_SUBVECTOR index
/// with VINSERTF64x4, VINSERTI64x4 instructions.
unsigned getInsertVINSERT256Immediate(SDNode *N);
- /// isZeroNode - Returns true if Elt is a constant zero or a floating point
- /// constant +0.0.
+ /// Returns true if Elt is a constant zero or floating point constant +0.0.
bool isZeroNode(SDValue Elt);
- /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+ /// Returns true of the given offset can be
/// fit into displacement field of the instruction.
bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement = true);
- /// isCalleePop - Determines whether the callee is required to pop its
+ /// Determines whether the callee is required to pop its
/// own arguments. Callee pop is necessary to support tail calls.
bool isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt);
+
+ /// AVX512 static rounding constants. These need to match the values in
+ /// avx512fintrin.h.
+ enum STATIC_ROUNDING {
+ TO_NEAREST_INT = 0,
+ TO_NEG_INF = 1,
+ TO_POS_INF = 2,
+ TO_ZERO = 3,
+ CUR_DIRECTION = 4
+ };
}
//===--------------------------------------------------------------------===//
- // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ // X86 Implementation of the TargetLowering interface
class X86TargetLowering final : public TargetLowering {
public:
- explicit X86TargetLowering(X86TargetMachine &TM);
+ explicit X86TargetLowering(const X86TargetMachine &TM);
unsigned getJumpTableEncoding() const override;
@@ -528,21 +554,20 @@ namespace llvm {
const MachineBasicBlock *MBB, unsigned uid,
MCContext &Ctx) const override;
- /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
- /// jumptable.
+ /// Returns relocation base for the given PIC jumptable.
SDValue getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const override;
const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI, MCContext &Ctx) const override;
- /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
unsigned getByValTypeAlignment(Type *Ty) const override;
- /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
@@ -557,7 +582,7 @@ namespace llvm {
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const override;
- /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+ /// Returns true if it's safe to use load / store of the
/// specified type to expand memcpy / memset inline. This is mostly true
/// for all types except for some special cases. For example, on X86
/// targets without SSE2 f64 load / store are done with fldl / fstpl which
@@ -565,17 +590,17 @@ namespace llvm {
/// legal as the hook is used before type legalization.
bool isSafeMemOpType(MVT VT) const override;
- /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// Returns true if the target allows
/// unaligned memory accesses. of the specified type. Returns whether it
/// is "fast" by reference in the second argument.
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
bool *Fast) const override;
- /// LowerOperation - Provide custom lowering hooks for some operations.
+ /// Provide custom lowering hooks for some operations.
///
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
@@ -584,13 +609,13 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- /// isTypeDesirableForOp - Return true if the target has native support for
+ /// Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
/// instruction encodings are longer and some i16 instructions are slow.
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
- /// isTypeDesirable - Return true if the target has native support for the
+ /// Return true if the target has native support for the
/// specified value type and it is 'desirable' to use the type. e.g. On x86
/// i16 is legal, but undesirable since i16 instruction encodings are longer
/// and some i16 instructions are slow.
@@ -601,24 +626,21 @@ namespace llvm {
MachineBasicBlock *MBB) const override;
- /// getTargetNodeName - This method returns the name of a target specific
- /// DAG node.
+ /// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
- /// getSetCCResultType - Return the value type to use for ISD::SETCC.
+ /// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- /// computeKnownBitsForTargetNode - Determine which of the bits specified
- /// in Mask are known to be either zero or one and return them in the
- /// KnownZero/KnownOne bitsets.
+ /// Determine which of the bits specified in Mask are known to be either
+ /// zero or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
- // operation that are sign bits.
+ /// Determine the number of bits in the operation that are sign bits.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
const SelectionDAG &DAG,
unsigned Depth) const override;
@@ -641,16 +663,15 @@ namespace llvm {
const char *LowerXConstraint(EVT ConstraintVT) const override;
- /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
- /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
- /// true it means one of the asm constraint of the inline asm instruction
- /// being processed is 'm'.
+ /// Lower the specified operand into the Ops vector. If it is invalid, don't
+ /// add anything to Ops. If hasMemory is true it means one of the asm
+ /// constraint of the inline asm instruction being processed is 'm'.
void LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
/// register. This should only be used for C_Register constraints. On
/// error, this returns a register number of 0.
@@ -658,17 +679,17 @@ namespace llvm {
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const override;
- /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
- /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
/// compare a register against the immediate without having to materialize
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
- /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// Return true if the specified immediate is legal
/// add immediate, that is the target has add instructions which can
/// add a register and the immediate without having to materialize
/// the immediate into a register.
@@ -683,7 +704,7 @@ namespace llvm {
bool isVectorShiftByScalarCheap(Type *Ty) const override;
- /// isTruncateFree - Return true if it's free to truncate a value of
+ /// Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
@@ -691,7 +712,7 @@ namespace llvm {
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
- /// isZExtFree - Return true if any actual instruction that defines a
+ /// Return true if any actual instruction that defines a
/// value of type Ty1 implicit zero-extends the value to Ty2 in the result
/// register. This does not necessarily include registers defined in
/// unknown ways, such as incoming arguments, or copies from unknown
@@ -703,37 +724,35 @@ namespace llvm {
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
+ /// Return true if an FMA operation is faster than a pair of fmul and fadd
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this
+ /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
- /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
/// from i32 to i8 but not from i32 to i16.
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
- /// isFPImmLegal - Returns true if the target can instruction select the
+ /// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
- /// isShuffleMaskLegal - Targets can use this to indicate that they only
- /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
- /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
- /// values are assumed to be legal.
+ /// Targets can use this to indicate that they only support *some*
+ /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
+ /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
+ /// be legal.
bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const override;
- /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
- /// used by Targets can use this to indicate if there is a suitable
- /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
- /// pool entry.
+ /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
+ /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to
+ /// replace a VAND with a constant pool entry.
bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const override;
- /// ShouldShrinkFPConstant - If true, then instruction selection should
+ /// If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type
/// in order to save space and / or reduce runtime.
bool ShouldShrinkFPConstant(EVT VT) const override {
@@ -747,19 +766,18 @@ namespace llvm {
return Subtarget;
}
- /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
- /// computed in an SSE register, not on the X87 floating point stack.
+ /// Return true if the specified scalar FP type is computed in an SSE
+ /// register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
- /// for fptoui.
+ /// Return true if the target uses the MSVC _ftol2 routine for fptoui.
bool isTargetFTOL() const;
- /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
- /// used for fptoui to the given type.
+ /// Return true if the MSVC _ftol2 routine should be used for fptoui to the
+ /// given type.
bool isIntegerTypeFTOL(EVT VT) const {
return isTargetFTOL() && VT == MVT::i64;
}
@@ -776,15 +794,14 @@ namespace llvm {
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
- /// createFastISel - This method returns a target specific FastISel object,
+ /// This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
- /// getStackCookieLocation - Return true if the target stores stack
- /// protector cookies at a fixed offset in some non-standard address
- /// space, and populates the address space and offset as
- /// appropriate.
+ /// Return true if the target stores stack protector cookies at a fixed
+ /// offset in some non-standard address space, and populates the address
+ /// space and offset as appropriate.
bool getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const override;
@@ -796,6 +813,7 @@ namespace llvm {
/// \brief Reset the operation actions based on target options.
void resetOperationActions() override;
+ bool useLoadStackGuardNode() const override;
/// \brief Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
@@ -804,7 +822,7 @@ namespace llvm {
findRepresentativeClass(MVT VT) const override;
private:
- /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
const DataLayout *TD;
@@ -813,17 +831,16 @@ namespace llvm {
/// the operation actions unless we have to.
TargetOptions TO;
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
- /// floating point ops.
+ /// Select between SSE or x87 floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
bool X86ScalarSSEf32;
bool X86ScalarSSEf64;
- /// LegalFPImmediates - A list of legal fp immediates.
+ /// A list of legal FP immediates.
std::vector<APFloat> LegalFPImmediates;
- /// addLegalFPImmediate - Indicate that this x86 target can instruction
+ /// Indicate that this x86 target can instruction
/// select the specified FP immediate natively.
void addLegalFPImmediate(const APFloat& Imm) {
LegalFPImmediates.push_back(Imm);
@@ -847,9 +864,8 @@ namespace llvm {
// Call lowering helpers.
- /// IsEligibleForTailCallOptimization - Check whether the call is eligible
- /// for tail call optimization. Targets which want to do tail call
- /// optimization should implement this function.
+ /// Check whether the call is eligible for tail call optimization. Targets
+ /// that want to do tail call optimization should implement this function.
bool IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
@@ -936,7 +952,7 @@ namespace llvm {
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
- MVT getTypeForExtArgOrReturn(MVT VT,
+ EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
@@ -946,6 +962,15 @@ namespace llvm {
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+ bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
+ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+ bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+
+ LoadInst *
+ lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
+
+ bool needsCmpXchgNb(const Type *MemType) const;
+
/// Utility function to emit atomic-load-arith operations (and, or, xor,
/// nand, max, min, umax, umin). It takes the corresponding instruction to
/// expand, the associated machine basic block, and the associated X86
@@ -975,8 +1000,7 @@ namespace llvm {
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
- MachineBasicBlock *BB,
- bool Is64Bit) const;
+ MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
@@ -1005,6 +1029,15 @@ namespace llvm {
/// Convert a comparison if required by the subtarget.
SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
+
+ /// Use rsqrt* to speed up sqrt calculations.
+ SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps,
+ bool &UseOneConstNR) const override;
+
+ /// Use rcp* to speed up fdiv calculations.
+ SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+ unsigned &RefinementSteps) const override;
};
namespace X86 {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 41e900e..b188cd5 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1,19 +1,277 @@
+// Group template arguments that can be derived from the vector type (EltNum x
+// EltVT). These are things like the register class for the writemask, etc.
+// The idea is to pass one of these as the template argument rather than the
+// individual arguments.
+class X86VectorVTInfo<int numelts, ValueType EltVT, RegisterClass rc,
+ string suffix = ""> {
+ RegisterClass RC = rc;
+ int NumElts = numelts;
+
+ // Corresponding mask register class.
+ RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
+
+ // Corresponding write-mask register class.
+ RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
+
+ // The GPR register class that can hold the write mask. Use GR8 for fewer
+ // than 8 elements. Use shift-right and equal to work around the lack of
+ // !lt in tablegen.
+ RegisterClass MRC =
+ !cast<RegisterClass>("GR" #
+ !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
+
+ // Suffix used in the instruction mnemonic.
+ string Suffix = suffix;
+
+ string VTName = "v" # NumElts # EltVT;
+
+ // The vector VT.
+ ValueType VT = !cast<ValueType>(VTName);
+
+ string EltTypeName = !cast<string>(EltVT);
+ // Size of the element type in bits, e.g. 32 for v16i32.
+ string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
+ int EltSize = EltVT.Size;
+
+ // "i" for integer types and "f" for floating-point types
+ string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
+
+ // Size of RC in bits, e.g. 512 for VR512.
+ int Size = VT.Size;
+
+ // The corresponding memory operand, e.g. i512mem for VR512.
+ X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
+ X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
+
+ // Load patterns
+ // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
+ // due to load promotion during legalization
+ PatFrag LdFrag = !cast<PatFrag>("load" #
+ !if (!eq (TypeVariantName, "i"),
+ !if (!eq (Size, 128), "v2i64",
+ !if (!eq (Size, 256), "v4i64",
+ VTName)), VTName));
+ PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
+
+ // Load patterns used for memory operands. We only have this defined in
+ // case of i64 element types for sub-512 integer vectors. For now, keep
+ // MemOpFrag undefined in these cases.
+ PatFrag MemOpFrag =
+ !if (!eq (TypeVariantName, "f"), !cast<PatFrag>("memop" # VTName),
+ !if (!eq (EltTypeName, "i64"), !cast<PatFrag>("memop" # VTName),
+ !if (!eq (VTName, "v16i32"), !cast<PatFrag>("memop" # VTName), ?)));
+
+ // The corresponding float type, e.g. v16f32 for v16i32
+ // Note: For EltSize < 32, FloatVT is illegal and TableGen
+ // fails to compile, so we choose FloatVT = VT
+ ValueType FloatVT = !cast<ValueType>(
+ !if (!eq (!srl(EltSize,5),0),
+ VTName,
+ !if (!eq(TypeVariantName, "i"),
+ "v" # NumElts # "f" # EltSize,
+ VTName)));
+
+ // The string to specify embedded broadcast in assembly.
+ string BroadcastStr = "{1to" # NumElts # "}";
+
+ // 8-bit compressed displacement tuple/subvector format. This is only
+ // defined for NumElts <= 8.
+ CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
+ !cast<CD8VForm>("CD8VT" # NumElts), ?);
+
+ SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
+ !if (!eq (Size, 256), sub_ymm, ?));
+
+ Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
+ !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
+ SSEPackedInt));
+
+ // A vector type of the same width with element type i32. This is used to
+ // create the canonical constant zero node ImmAllZerosV.
+ ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
+ dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
+}
+
+def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
+def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
+def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
+def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
+def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
+def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
+
+// "x" in v32i8x_info means RC = VR256X
+def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
+def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
+def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
+def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
+def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
+def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
+
+def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
+def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
+def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
+def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
+def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
+def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
+
+class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
+ X86VectorVTInfo i128> {
+ X86VectorVTInfo info512 = i512;
+ X86VectorVTInfo info256 = i256;
+ X86VectorVTInfo info128 = i128;
+}
+
+def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
+ v16i8x_info>;
+def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
+ v8i16x_info>;
+def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
+ v4i32x_info>;
+def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
+ v2i64x_info>;
+def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
+ v4f32x_info>;
+def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
+ v2f64x_info>;
+
+// This multiclass generates the masking variants from the non-masking
+// variant. It only provides the assembly pieces for the masking variants.
+// It assumes custom ISel patterns for masking which can be provided as
+// template arguments.
+multiclass AVX512_maskable_custom<bits<8> O, Format F,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ list<dag> Pattern,
+ list<dag> MaskingPattern,
+ list<dag> ZeroMaskingPattern,
+ string Round = "",
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
+ def NAME: AVX512<O, F, Outs, Ins,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
+ "$dst "#Round#", "#IntelSrcAsm#"}",
+ Pattern, itin>;
+
+ // Prefer over VMOV*rrk Pat<>
+ let AddedComplexity = 20 in
+ def NAME#k: AVX512<O, F, Outs, MaskingIns,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"#
+ "$dst {${mask}}"#Round#", "#IntelSrcAsm#"}",
+ MaskingPattern, itin>,
+ EVEX_K {
+ // In case of the 3src subclass this is overridden with a let.
+ string Constraints = MaskingConstraint;
+ }
+ let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
+ def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}"#Round#"|"#
+ "$dst {${mask}} {z}"#Round#", "#IntelSrcAsm#"}",
+ ZeroMaskingPattern,
+ itin>,
+ EVEX_KZ;
+}
+
+
+// Common base class of AVX512_maskable and AVX512_maskable_3src.
+multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskingRHS,
+ string Round = "",
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst, MaskingRHS)],
+ [(set _.RC:$dst,
+ (vselect _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
+ Round, MaskingConstraint, NoItinerary, IsCommutable>;
+
+// This multiclass generates the unconditional/non-masking, the masking and
+// the zero-masking variant of the instruction. In the masking case, the
+// perserved vector elements come from a new dummy input operand tied to $dst.
+multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, string Round = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
+ AVX512_maskable_common<O, F, _, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect _.KRCWM:$mask, RHS, _.RC:$src0), Round,
+ "$src0 = $dst", itin, IsCommutable>;
+
+// Similar to AVX512_maskable but in this case one of the source operands
+// ($src1) is already tied to $dst so we just use that for the preserved
+// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
+// $src1.
+multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS> :
+ AVX512_maskable_common<O, F, _, Outs,
+ !con((ins _.RC:$src1), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+
+
+multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ list<dag> Pattern> :
+ AVX512_maskable_custom<O, F, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], "",
+ "$src0 = $dst">;
+
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
let Predicates = [HasAVX512] in {
- def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
- def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
- def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
+ def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
+ def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
+ def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
+ def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
+ def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
+ def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
+ def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
- def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
+ def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
+ def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
+ def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
+ def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
- def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
+ def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
+ def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
- def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
+ def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
+ def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
+ def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
+ def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
+ def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
+ def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
+ def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
+ def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
+ def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
+ def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
+ def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
@@ -99,120 +357,92 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
-// -- 32x8 form --
-let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
-def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
- "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512;
-let mayLoad = 1 in
-def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
- "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
-}
-
-// -- 64x4 fp form --
-let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
-def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
- "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W;
-let mayLoad = 1 in
-def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
- "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
+
+multiclass vinsert_for_size_no_alt<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm> {
+ let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+ def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
+ (ins VR512:$src1, From.RC:$src2, i8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts #
+ "\t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}",
+ [(set To.RC:$dst, (vinsert_insert:$src3 (To.VT VR512:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm)))]>,
+ EVEX_4V, EVEX_V512;
+
+ let mayLoad = 1 in
+ def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
+ (ins VR512:$src1, From.MemOp:$src2, i8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts #
+ "\t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}",
+ []>,
+ EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
+ }
}
-// -- 32x4 integer form --
-let hasSideEffects = 0 in {
-def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
- "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512;
-let mayLoad = 1 in
-def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
- "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
+multiclass vinsert_for_size<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
+ PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm> :
+ vinsert_for_size_no_alt<Opcode, From, To,
+ vinsert_insert, INSERT_get_vinsert_imm> {
+ // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
+ // vinserti32x4. Only add this if 64x2 and friends are not supported
+ // natively via AVX512DQ.
+ let Predicates = [NoDQI] in
+ def : Pat<(vinsert_insert:$ins
+ (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
+ (AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
+ VR512:$src1, From.RC:$src2,
+ (INSERT_get_vinsert_imm VR512:$ins)))>;
}
-let hasSideEffects = 0 in {
-// -- 64x4 form --
-def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
- "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W;
-let mayLoad = 1 in
-def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
- "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
-}
-
-def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
- (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
- (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
- (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
- (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-
-def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
- (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
- (bc_v4i32 (loadv2i64 addr:$src2)),
- (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
- (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
- (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert128_imm VR512:$ins))>;
-
-def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
- (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
- (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
- (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
- (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-
-def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
- (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
- (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
- (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
-def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
- (bc_v8i32 (loadv4i64 addr:$src2)),
- (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
- (INSERT_get_vinsert256_imm VR512:$ins))>;
+multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
+ ValueType EltVT64, int Opcode256> {
+ defm NAME # "32x4" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ vinsert128_insert,
+ INSERT_get_vinsert128_imm>;
+ let Predicates = [HasDQI] in
+ defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ vinsert128_insert,
+ INSERT_get_vinsert128_imm>, VEX_W;
+ defm NAME # "64x4" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 8, EltVT32, VR256>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert,
+ INSERT_get_vinsert256_imm>, VEX_W;
+ let Predicates = [HasDQI] in
+ defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert,
+ INSERT_get_vinsert256_imm>;
+}
+
+defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
+defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
EVEX_4V;
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
@@ -221,106 +451,90 @@ def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
//===----------------------------------------------------------------------===//
// AVX-512 VECTOR EXTRACT
//---
-let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
-// -- 32x4 form --
-def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512;
-def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
- (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
- "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
-
-// -- 64x4 form --
-def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W;
-let mayStore = 1 in
-def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
- (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
- "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
+
+multiclass vextract_for_size<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
+ PatFrag vextract_extract,
+ SDNodeXForm EXTRACT_get_vextract_imm> {
+ let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+ defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
+ (ins VR512:$src1, i8imm:$idx),
+ "vextract" # To.EltTypeName # "x4",
+ "$idx, $src1", "$src1, $idx",
+ [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
+ (iPTR imm)))]>,
+ AVX512AIi8Base, EVEX, EVEX_V512;
+ let mayStore = 1 in
+ def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
+ (ins To.MemOp:$dst, VR512:$src1, i8imm:$src2),
+ "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
+ "$dst, $src1, $src2}",
+ []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
+ }
+
+ // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
+ // vextracti32x4
+ def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
+ (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
+ VR512:$src1,
+ (EXTRACT_get_vextract_imm To.RC:$ext)))>;
+
+ // A 128/256-bit subvector extract from the first 512-bit vector position is
+ // a subregister copy that needs no instruction.
+ def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
+ (To.VT
+ (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
+
+ // And for the alternative types.
+ def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
+ (AltTo.VT
+ (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
+
+ // Intrinsic call with masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
+ (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+ VR512:$src1, imm:$idx)>;
+
+ // Intrinsic call with zero-masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
+ (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+ VR512:$src1, imm:$idx)>;
+
+ // Intrinsic call without masking.
+ def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+ "x4_512")
+ VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
+ (!cast<Instruction>(NAME # To.EltSize # "x4rr")
+ VR512:$src1, imm:$idx)>;
}
-let hasSideEffects = 0 in {
-// -- 32x4 form --
-def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512;
-def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
- (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
- "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
-
-// -- 64x4 form --
-def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
- (ins VR512:$src1, i8imm:$src2),
- "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W;
-let mayStore = 1 in
-def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
- (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
- "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
-}
-
-def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
- (v4f32 (VEXTRACTF32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
- (v4i32 (VEXTRACTF32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
- (v2f64 (VEXTRACTF32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
- (v2i64 (VEXTRACTI32x4rr VR512:$src1,
- (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
-
-
-def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
- (v8f32 (VEXTRACTF64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
- (v8i32 (VEXTRACTI64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
- (v4f64 (VEXTRACTF64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
- (v4i64 (VEXTRACTI64x4rr VR512:$src1,
- (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
-
-// A 256-bit subvector extract from the first 512-bit vector position
-// is a subregister copy that needs no instruction.
-def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
- (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
-def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
- (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
-def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
- (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
-def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
- (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
-
-// zmm -> xmm
-def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
- (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
-def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
- (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
-def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
- (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
-def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
- (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
+ ValueType EltVT64, int Opcode64> {
+ defm NAME # "32x4" : vextract_for_size<Opcode32,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ vextract128_extract,
+ EXTRACT_get_vextract128_imm>;
+ defm NAME # "64x4" : vextract_for_size<Opcode64,
+ X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ X86VectorVTInfo< 8, EltVT32, VR256>,
+ vextract256_extract,
+ EXTRACT_get_vextract256_imm>, VEX_W;
+}
+defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
+defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
// A 128-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
@@ -352,13 +566,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
// vextractps - extract 32 bits from XMM
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
- (ins VR128X:$src1, u32u8imm:$src2),
+ (ins VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX;
def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
- (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
+ (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
@@ -366,36 +580,57 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
-multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass DestRC,
- RegisterClass SrcRC, X86MemOperand x86memop> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
+multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+ ValueType svt, X86VectorVTInfo _> {
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
+ "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
+ T8PD, EVEX;
+
+ let mayLoad = 1 in {
+ defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src),
+ "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
+ (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
+ T8PD, EVEX;
+ }
}
+
+multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+ EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
+ EVEX_V256;
+ }
+}
+
let ExeDomain = SSEPackedSingle in {
- defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
- VR128X, f32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
+ avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+ let Predicates = [HasVLX] in {
+ defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
+ v4f32, v4f32x_info>, EVEX_V128,
+ EVEX_CD8<32, CD8VT1>;
+ }
}
let ExeDomain = SSEPackedDouble in {
- defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
- VR128X, f64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
+ avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
}
def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDZrm addr:$src)>;
+ (VBROADCASTSDZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
- (VBROADCASTSDZrm addr:$src)>;
+ (VBROADCASTSDZm addr:$src)>;
multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
RegisterClass SrcRC, RegisterClass KRC> {
@@ -503,22 +738,27 @@ def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
(VPBROADCASTQZrr VR128X:$src)>;
-def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
-def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
+def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
+ (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
+ (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
+
+def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
+ (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
+def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
+ (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
+ (VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
+ (VBROADCASTSDZr VR128X:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
- (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+ (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
- (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+ (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
let Predicates = [HasAVX512] in {
@@ -532,48 +772,91 @@ def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
//---
multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass DstRC, RegisterClass KRC,
- ValueType OpVT, ValueType SrcVT> {
-def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
+ RegisterClass KRC> {
+let Predicates = [HasCDI] in
+def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
+ []>, EVEX, EVEX_V512;
+
+let Predicates = [HasCDI, HasVLX] in {
+def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V128;
+def Z256rr : AVX512XS8I<opc, MRMSrcReg, (outs VR256:$dst), (ins KRC:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ []>, EVEX, EVEX_V256;
+}
}
let Predicates = [HasCDI] in {
-defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
- VK16, v16i32, v16i1>, EVEX_V512;
-defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
- VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
+ VK16>;
+defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
+ VK8>, VEX_W;
}
//===----------------------------------------------------------------------===//
// AVX-512 - VPERM
//
// -- immediate form --
-multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT> {
- def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, i8imm:$src2),
+multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
+ def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ [(set _.RC:$dst,
+ (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
EVEX;
- def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
+ def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.MemOp:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst,
- (OpVT (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2))))]>, EVEX;
+ [(set _.RC:$dst,
+ (_.VT (OpNode (_.MemOpFrag addr:$src1),
+ (i8 imm:$src2))))]>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VF>;
+}
+}
+
+multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
+ X86VectorVTInfo Ctrl> :
+ avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
+ let ExeDomain = _.ExeDomain in {
+ def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
+ !strconcat("vpermil" # _.Suffix,
+ " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (X86VPermilpv _.RC:$src1,
+ (Ctrl.VT Ctrl.RC:$src2))))]>,
+ EVEX_4V;
+ def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, Ctrl.MemOp:$src2),
+ !strconcat("vpermil" # _.Suffix,
+ " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (_.VT (X86VPermilpv _.RC:$src1,
+ (Ctrl.VT (Ctrl.MemOpFrag addr:$src2)))))]>,
+ EVEX_4V;
+ }
}
-defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
- i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
- f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", X86VPermi, v8i64_info>,
+ EVEX_V512, VEX_W;
+defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", X86VPermi, v8f64_info>,
+ EVEX_V512, VEX_W;
+
+defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
+ EVEX_V512;
+defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
+ EVEX_V512, VEX_W;
+
+def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPSZri VR512:$src1, imm:$imm)>;
+def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPDZri VR512:$src1, imm:$imm)>;
// -- VPERM - register form --
multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -834,98 +1117,295 @@ defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
XD, VEX_W;
}
-multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
- SDNode OpNode, ValueType vt> {
+multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
def rr : AVX512BI<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ let mayLoad = 1 in
def rm : AVX512BI<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2)))],
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rrk : AVX512BI<opc, MRMSrcReg,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+ let mayLoad = 1 in
+ def rmk : AVX512BI<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert
+ (_.LdFrag addr:$src2))))))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
-defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem,
- memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem,
- memopv8i64, X86pcmpeqm, v8i64>, T8PD, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> :
+ avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
+ let mayLoad = 1 in {
+ def rmb : AVX512BI<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
+ "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ def rmbk : AVX512BI<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
+ _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ }
+}
-defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem,
- memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem,
- memopv8i64, X86pcmpgtm, v8i64>, T8PD, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
+multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
+ Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
+ avx512vl_i8_info, HasBWI>,
+ EVEX_CD8<8, CD8VF>;
+
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
+ avx512vl_i16_info, HasBWI>,
+ EVEX_CD8<16, CD8VF>;
+
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
+ avx512vl_i32_info, HasAVX512>,
+ EVEX_CD8<32, CD8VF>;
+
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
+ avx512vl_i64_info, HasAVX512>,
+ T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
+ avx512vl_i8_info, HasBWI>,
+ EVEX_CD8<8, CD8VF>;
+
+defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
+ avx512vl_i16_info, HasBWI>,
+ EVEX_CD8<16, CD8VF>;
+
+defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
+ avx512vl_i32_info, HasAVX512>,
+ EVEX_CD8<32, CD8VF>;
+
+defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
+ avx512vl_i64_info, HasAVX512>,
+ T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPGTDZrr
+ (COPY_TO_REGCLASS (VPCMPGTDZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
- (COPY_TO_REGCLASS (VPCMPEQDZrr
+ (COPY_TO_REGCLASS (VPCMPEQDZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
-multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
- RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
- SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
+multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
+ X86VectorVTInfo _> {
def rri : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ let mayLoad = 1 in
def rmi : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
!strconcat("vpcmp${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
- imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rrik : AVX512AIi8<opc, MRMSrcReg,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
+ AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc)))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+ let mayLoad = 1 in
+ def rmik : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
+ AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (bitconvert (_.LdFrag addr:$src2))),
+ imm:$cc)))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
+
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
+ "$dst, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
+ "$dst, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
+ i8imm:$cc),
!strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ "\t{$cc, $src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
- def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
+ i8imm:$cc),
!strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ "\t{$cc, $src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2, $cc}"),
[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
}
-defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
- X86cmpm, v16i32, AVXCC, "d">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
- X86cmpmu, v16i32, AVXCC, "ud">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
+ X86VectorVTInfo _> :
+ avx512_icmp_cc<opc, Suffix, OpNode, _> {
+ let mayLoad = 1 in {
+ def rmib : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
+ AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
+ "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ def rmibk : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
+ _.ScalarMemOp:$src2, AVXCC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
+ [(set _.KRC:$dst, (and _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
+ imm:$cc)))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ }
+
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1, hasSideEffects = 0 in {
+ def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
+ i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
+ "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+ def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
+ _.ScalarMemOp:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+ }
+}
+
+multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
+ defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
+ }
+}
+
+multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
+ HasBWI>, EVEX_CD8<8, CD8VF>;
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
+ HasBWI>, EVEX_CD8<8, CD8VF>;
-defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
- X86cmpm, v8i64, AVXCC, "q">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
- X86cmpmu, v8i64, AVXCC, "uq">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
+ HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
+ HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
+
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
+ HasAVX512>, EVEX_CD8<32, CD8VF>;
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
+ HasAVX512>, EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
+ HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
+ HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
// avx512_cmp_packed - compare packed instructions
multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
@@ -1015,14 +1495,14 @@ def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
//
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
string OpcodeStr, RegisterClass KRC,
- ValueType vt, X86MemOperand x86memop> {
+ ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
let hasSideEffects = 0 in {
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
let mayLoad = 1 in
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (vt (load addr:$src)))]>;
+ [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
let mayStore = 1 in
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
@@ -1040,32 +1520,82 @@ multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
}
}
-let Predicates = [HasAVX512] in {
- defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
- VEX, PS;
- defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
+let Predicates = [HasDQI] in
+ defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
+ i8mem>,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
+ VEX, PD;
+
+let Predicates = [HasAVX512] in
+ defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
+ i16mem>,
+ avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
VEX, PS;
+
+let Predicates = [HasBWI] in {
+ defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
+ i32mem>, VEX, PD, VEX_W;
+ defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
+ VEX, XD;
}
+let Predicates = [HasBWI] in {
+ defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
+ i64mem>, VEX, PS, VEX_W;
+ defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
+ VEX, XD, VEX_W;
+}
+
+// GR from/to mask register
+let Predicates = [HasDQI] in {
+ def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
+ (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
+ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
+ (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
+}
let Predicates = [HasAVX512] in {
- // GR16 from/to 16-bit mask
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
(KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
(EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
+ def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
+ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
+}
- // Store kreg in memory
- def : Pat<(store (v16i1 VK16:$src), addr:$dst),
+// Load/store kreg
+let Predicates = [HasDQI] in {
+ def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
+ (KMOVBmk addr:$dst, VK8:$src)>;
+}
+let Predicates = [HasAVX512] in {
+ def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
-
- def : Pat<(store VK8:$src, addr:$dst),
+ def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
(KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
-
def : Pat<(i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
-
- def : Pat<(v8i1 (load addr:$src)),
+ def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
+ (KMOVDmk addr:$dst, VK32:$src)>;
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
+ (KMOVQmk addr:$dst, VK64:$src)>;
+}
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(i1 (trunc (i64 GR64:$src))),
+ (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
+ (i32 1))), VK1)>;
def : Pat<(i1 (trunc (i32 GR32:$src))),
(COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
@@ -1078,7 +1608,7 @@ let Predicates = [HasAVX512] in {
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
VK1)>;
-
+
def : Pat<(i32 (zext VK1:$src)),
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
def : Pat<(i8 (zext VK1:$src)),
@@ -1097,6 +1627,14 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK8)>;
}
+let Predicates = [HasBWI] in {
+ def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK32)>;
+ def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK64)>;
+}
+
+
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
// GR from/to 8-bit mask without native support
@@ -1113,26 +1651,38 @@ let Predicates = [HasAVX512] in {
(COPY_TO_REGCLASS VK16:$src, VK1)>;
def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK1)>;
-
+}
+let Predicates = [HasBWI] in {
+ def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK32:$src, VK1)>;
+ def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
+ (COPY_TO_REGCLASS VK64:$src, VK1)>;
}
// Mask unary operation
// - KNOT
multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
- RegisterClass KRC, SDPatternOperator OpNode> {
- let Predicates = [HasAVX512] in
+ RegisterClass KRC, SDPatternOperator OpNode,
+ Predicate prd> {
+ let Predicates = [prd] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
[(set KRC:$dst, (OpNode KRC:$src))]>;
}
-multiclass avx512_mask_unop_w<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode> {
- defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX, PS;
+multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode> {
+ defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
+ HasDQI>, VEX, PD;
+ defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
+ HasAVX512>, VEX, PS;
+ defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
+ HasBWI>, VEX, PD, VEX_W;
+ defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
+ HasBWI>, VEX, PS, VEX_W;
}
-defm KNOT : avx512_mask_unop_w<0x44, "knot", not>;
+defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
multiclass avx512_mask_unop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
@@ -1143,43 +1693,60 @@ multiclass avx512_mask_unop_int<string IntName, string InstName> {
}
defm : avx512_mask_unop_int<"knot", "KNOT">;
+let Predicates = [HasDQI] in
+def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
+let Predicates = [HasAVX512] in
def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
+let Predicates = [HasBWI] in
+def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
+let Predicates = [HasBWI] in
+def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
+
+// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
+let Predicates = [HasAVX512] in {
def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
-// With AVX-512, 8-bit mask is promoted to 16-bit mask.
def : Pat<(not VK8:$src),
(COPY_TO_REGCLASS
(KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
+}
// Mask binary operation
// - KAND, KANDN, KOR, KXNOR, KXOR
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
- RegisterClass KRC, SDPatternOperator OpNode> {
- let Predicates = [HasAVX512] in
+ RegisterClass KRC, SDPatternOperator OpNode,
+ Predicate prd> {
+ let Predicates = [prd] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
" \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
}
-multiclass avx512_mask_binop_w<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode> {
- defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX_4V, VEX_L, PS;
+multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode> {
+ defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
+ HasDQI>, VEX_4V, VEX_L, PD;
+ defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
+ HasAVX512>, VEX_4V, VEX_L, PS;
+ defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
+ HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
+ defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
+ HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
}
def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
let isCommutable = 1 in {
- defm KAND : avx512_mask_binop_w<0x41, "kand", and>;
- let isCommutable = 0 in
- defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>;
- defm KOR : avx512_mask_binop_w<0x45, "kor", or>;
- defm KXNOR : avx512_mask_binop_w<0x46, "kxnor", xnor>;
- defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>;
+ defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
+ defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
+ defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
+ defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
}
+let isCommutable = 0 in
+ defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
def : Pat<(xor VK1:$src1, VK1:$src2),
(COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
@@ -1325,6 +1892,17 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
+let Predicates = [HasVLX] in {
+ def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
+ def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+ def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
+ def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
+}
+
def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
@@ -1334,104 +1912,176 @@ def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
// AVX-512 - Aligned and unaligned load and store
//
-multiclass avx512_load<bits<8> opc, RegisterClass RC, RegisterClass KRC,
- X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d,
- ValueType vt, bit IsReMaterializable = 1> {
+multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
+ RegisterClass KRC, RegisterClass RC,
+ ValueType vt, ValueType zvt, X86MemOperand memop,
+ Domain d, bit IsReMaterializable = 1> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>,
- EVEX;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ d>, EVEX;
def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [], d>, EVEX, EVEX_KZ;
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
}
- let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
- def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
- [(set (vt RC:$dst), (ld_frag addr:$src))], d>, EVEX;
- let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
- def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
- EVEX, EVEX_K;
- let mayLoad = 1 in
- def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, x86memop:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- [], d>, EVEX, EVEX_K;
+ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
+ SchedRW = [WriteLoad] in
+ def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
+ d>, EVEX;
+
+ let AddedComplexity = 20 in {
+ let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
+ let hasSideEffects = 0 in
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src0, KRC:$mask, RC:$src1),
+ !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src1}"),
+ [(set RC:$dst, (vt (vselect KRC:$mask,
+ (vt RC:$src1),
+ (vt RC:$src0))))],
+ d>, EVEX, EVEX_K;
+ let mayLoad = 1, SchedRW = [WriteLoad] in
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src0, KRC:$mask, memop:$src1),
+ !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src1}"),
+ [(set RC:$dst, (vt
+ (vselect KRC:$mask,
+ (vt (bitconvert (ld_frag addr:$src1))),
+ (vt RC:$src0))))],
+ d>, EVEX, EVEX_K;
+ }
+ let mayLoad = 1, SchedRW = [WriteLoad] in
+ def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
+ "${dst} {${mask}} {z}, $src}"),
+ [(set RC:$dst, (vt
+ (vselect KRC:$mask,
+ (vt (bitconvert (ld_frag addr:$src))),
+ (vt (bitconvert (zvt immAllZerosV))))))],
+ d>, EVEX, EVEX_KZ;
+ }
+}
+
+multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
+ string elty, string elsz, string vsz512,
+ string vsz256, string vsz128, Domain d,
+ Predicate prd, bit IsReMaterializable = 1> {
+ let Predicates = [prd] in
+ defm Z : avx512_load<opc, OpcodeStr,
+ !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
+ !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
+ !cast<X86MemOperand>(elty##"512mem"), d,
+ IsReMaterializable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_load<opc, OpcodeStr,
+ !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
+ "v"##vsz256##elty##elsz, "v4i64")),
+ !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
+ !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
+ !cast<X86MemOperand>(elty##"256mem"), d,
+ IsReMaterializable>, EVEX_V256;
+
+ defm Z128 : avx512_load<opc, OpcodeStr,
+ !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
+ "v"##vsz128##elty##elsz, "v2i64")),
+ !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
+ !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
+ !cast<X86MemOperand>(elty##"128mem"), d,
+ IsReMaterializable>, EVEX_V128;
}
- let mayLoad = 1 in
- def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, x86memop:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src2}"),
- [], d>, EVEX, EVEX_KZ;
}
-multiclass avx512_store<bits<8> opc, RegisterClass RC, RegisterClass KRC,
- X86MemOperand x86memop, PatFrag store_frag,
- string asm, Domain d, ValueType vt> {
+
+multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ ValueType OpVT, RegisterClass KRC, RegisterClass RC,
+ X86MemOperand memop, Domain d> {
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>,
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
EVEX;
let Constraints = "$src1 = $dst" in
- def alt_rrk : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(asm,
- " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
+ def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
EVEX, EVEX_K;
- def alt_rrkz : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
+ def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
(ins KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[], d>, EVEX, EVEX_KZ;
}
let mayStore = 1 in {
- def mr : AVX512PI<opc, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
- !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
- [(store_frag (vt RC:$src), addr:$dst)], d>, EVEX;
+ def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
def mrk : AVX512PI<opc, MRMDestMem, (outs),
- (ins x86memop:$dst, KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+ (ins memop:$dst, KRC:$mask, RC:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[], d>, EVEX, EVEX_K;
- def mrkz : AVX512PI<opc, MRMDestMem, (outs),
- (ins x86memop:$dst, KRC:$mask, RC:$src),
- !strconcat(asm,
- " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [], d>, EVEX, EVEX_KZ;
}
}
-defm VMOVAPSZ : avx512_load<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
- "vmovaps", SSEPackedSingle, v16f32>,
- avx512_store<0x29, VR512, VK16WM, f512mem, alignedstore512,
- "vmovaps", SSEPackedSingle, v16f32>,
- PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVAPDZ : avx512_load<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
- "vmovapd", SSEPackedDouble, v8f64>,
- avx512_store<0x29, VR512, VK8WM, f512mem, alignedstore512,
- "vmovapd", SSEPackedDouble, v8f64>,
- PD, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
-defm VMOVUPSZ : avx512_load<0x10, VR512, VK16WM, f512mem, loadv16f32,
- "vmovups", SSEPackedSingle, v16f32>,
- avx512_store<0x11, VR512, VK16WM, f512mem, store,
- "vmovups", SSEPackedSingle, v16f32>,
- PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVUPDZ : avx512_load<0x10, VR512, VK8WM, f512mem, loadv8f64,
- "vmovupd", SSEPackedDouble, v8f64, 0>,
- avx512_store<0x11, VR512, VK8WM, f512mem, store,
- "vmovupd", SSEPackedDouble, v8f64>,
- PD, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
+ string st_suff_512, string st_suff_256,
+ string st_suff_128, string elty, string elsz,
+ string vsz512, string vsz256, string vsz128,
+ Domain d, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
+ !cast<ValueType>("v"##vsz512##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
+ !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
+ !cast<ValueType>("v"##vsz256##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
+ !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
+
+ defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
+ !cast<ValueType>("v"##vsz128##elty##elsz),
+ !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
+ !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
+ }
+}
+
+defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
+ "16", "8", "4", SSEPackedSingle, HasAVX512>,
+ avx512_store_vl<0x29, "vmovaps", "alignedstore",
+ "512", "256", "", "f", "32", "16", "8", "4",
+ SSEPackedSingle, HasAVX512>,
+ PS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
+ "8", "4", "2", SSEPackedDouble, HasAVX512>,
+ avx512_store_vl<0x29, "vmovapd", "alignedstore",
+ "512", "256", "", "f", "64", "8", "4", "2",
+ SSEPackedDouble, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
+ "16", "8", "4", SSEPackedSingle, HasAVX512>,
+ avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
+ "16", "8", "4", SSEPackedSingle, HasAVX512>,
+ PS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
+ "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
+ avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
+ "8", "4", "2", SSEPackedDouble, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
+ (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
(VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
@@ -1447,75 +2097,80 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
-defm VMOVDQA32: avx512_load<0x6F, VR512, VK16WM, i512mem, alignedloadv16i32,
- "vmovdqa32", SSEPackedInt, v16i32>,
- avx512_store<0x7F, VR512, VK16WM, i512mem, alignedstore512,
- "vmovdqa32", SSEPackedInt, v16i32>,
- PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVDQA64: avx512_load<0x6F, VR512, VK8WM, i512mem, alignedloadv8i64,
- "vmovdqa64", SSEPackedInt, v8i64>,
- avx512_store<0x7F, VR512, VK8WM, i512mem, alignedstore512,
- "vmovdqa64", SSEPackedInt, v8i64>,
- PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VMOVDQU32: avx512_load<0x6F, VR512, VK16WM, i512mem, load,
- "vmovdqu32", SSEPackedInt, v16i32>,
- avx512_store<0x7F, VR512, VK16WM, i512mem, store,
- "vmovdqu32", SSEPackedInt, v16i32>,
- XS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VMOVDQU64: avx512_load<0x6F, VR512, VK8WM, i512mem, load,
- "vmovdqu64", SSEPackedInt, v8i64>,
- avx512_store<0x7F, VR512, VK8WM, i512mem, store,
- "vmovdqu64", SSEPackedInt, v8i64>,
- XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
+ "16", "8", "4", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
+ "512", "256", "", "i", "32", "16", "8", "4",
+ SSEPackedInt, HasAVX512>,
+ PD, EVEX_CD8<32, CD8VF>;
+
+defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
+ "8", "4", "2", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
+ "512", "256", "", "i", "64", "8", "4", "2",
+ SSEPackedInt, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
+ "64", "32", "16", SSEPackedInt, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
+ "i", "8", "64", "32", "16", SSEPackedInt,
+ HasBWI>, XD, EVEX_CD8<8, CD8VF>;
+
+defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
+ "32", "16", "8", SSEPackedInt, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
+ "i", "16", "32", "16", "8", SSEPackedInt,
+ HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
+
+defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
+ "16", "8", "4", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
+ "i", "32", "16", "8", "4", SSEPackedInt,
+ HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
+ "8", "4", "2", SSEPackedInt, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
+ "i", "64", "8", "4", "2", SSEPackedInt,
+ HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
(v16i32 immAllZerosV), GR16:$mask)),
- (VMOVDQU32rmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
+ (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
- (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
- (VMOVDQU64rmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
+ (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
+ (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
- GR16:$mask),
- (VMOVDQU32mrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
+ GR16:$mask),
+ (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
VR512:$src)>;
def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
- GR8:$mask),
- (VMOVDQU64mrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
+ GR8:$mask),
+ (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
let AddedComplexity = 20 in {
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
- (bc_v8i64 (v16i32 immAllZerosV)))),
- (VMOVDQU64rrkz VK8WM:$mask, VR512:$src)>;
+ (bc_v8i64 (v16i32 immAllZerosV)))),
+ (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
- (v8i64 VR512:$src))),
- (VMOVDQU64rrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
+ (v8i64 VR512:$src))),
+ (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
(v16i32 immAllZerosV))),
- (VMOVDQU32rrkz VK16WM:$mask, VR512:$src)>;
+ (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
- (v16i32 VR512:$src))),
- (VMOVDQU32rrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
-
-def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
- (v16f32 VR512:$src2))),
- (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
-def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1),
- (v8f64 VR512:$src2))),
- (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
-def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1),
- (v16i32 VR512:$src2))),
- (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
-def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1),
- (v8i64 VR512:$src2))),
- (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
+ (v16i32 VR512:$src))),
+ (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
+
// Move Int Doubleword to Packed Double Int
//
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
@@ -1641,10 +2296,16 @@ multiclass avx512_move_scalar <string asm, RegisterClass RC,
!strconcat(asm, " \t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
EVEX, VEX_LIG;
+ let mayStore = 1 in {
def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(asm, " \t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
EVEX, VEX_LIG;
+ def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
+ !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
+ [], IIC_SSE_MOV_S_MR>,
+ EVEX, VEX_LIG, EVEX_K;
+ } // mayStore
} //hasSideEffects = 0
}
@@ -1664,6 +2325,10 @@ def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
+def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
+ (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+
// For the disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
@@ -1882,136 +2547,201 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
//===----------------------------------------------------------------------===//
// AVX-512 - Non-temporals
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteLoad] in {
+ def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V512,
+ EVEX_CD8<64, CD8VF>;
+
+ let Predicates = [HasAVX512, HasVLX] in {
+ def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
+ (ins i256mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}", [],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V256,
+ EVEX_CD8<64, CD8VF>;
+
+ def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
+ (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}", [],
+ SSEPackedInt>, EVEX, T8PD, EVEX_V128,
+ EVEX_CD8<64, CD8VF>;
+ }
+}
+
+multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ ValueType OpVT, RegisterClass RC, X86MemOperand memop,
+ Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
+ let SchedRW = [WriteStore], mayStore = 1,
+ AddedComplexity = 400 in
+ def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
+}
-def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
- (ins i512mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR512:$dst,
- (int_x86_avx512_movntdqa addr:$src))]>,
- EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-// Prefer non-temporal over temporal versions
-let AddedComplexity = 400, SchedRW = [WriteStore] in {
-
-def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
- (ins f512mem:$dst, VR512:$src),
- "vmovntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v16f32 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
- (ins f512mem:$dst, VR512:$src),
- "vmovntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f64 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-
-def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
- (ins i512mem:$dst, VR512:$src),
- "vmovntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8i64 VR512:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>,
- EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
+ string elty, string elsz, string vsz512,
+ string vsz256, string vsz128, Domain d,
+ Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
+ let Predicates = [prd] in
+ defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
+ !cast<X86MemOperand>(elty##"512mem"), d, itin>,
+ EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
+ !cast<X86MemOperand>(elty##"256mem"), d, itin>,
+ EVEX_V256;
+
+ defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
+ !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
+ !cast<X86MemOperand>(elty##"128mem"), d, itin>,
+ EVEX_V128;
+ }
}
+defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
+ "i", "64", "8", "4", "2", SSEPackedInt,
+ HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
+
+defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
+ "f", "64", "8", "4", "2", SSEPackedDouble,
+ HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
+ "f", "32", "16", "8", "4", SSEPackedSingle,
+ HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass KRC,
- RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, PatFrag scalar_mfrag,
- X86MemOperand x86scalar_mop, string BrdcstStr,
- OpndItins itins, bit IsCommutable = 0> {
- let isCommutable = IsCommutable in
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
- itins.rr>, EVEX_4V;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
- RC:$src0)))],
- itins.rr>, EVEX_4V, EVEX_K;
- def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
- "|$dst {${mask}} {z}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
- (OpVT immAllZerosV))))],
- itins.rr>, EVEX_4V, EVEX_KZ;
+ X86VectorVTInfo _, OpndItins itins,
+ bit IsCommutable = 0> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ "", itins.rr, IsCommutable>,
+ AVX512BIBase, EVEX_4V;
+
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src2)))),
+ "", itins.rm>,
+ AVX512BIBase, EVEX_4V;
+}
+
+multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, OpndItins itins,
+ bit IsCommutable = 0> :
+ avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
+ let mayLoad = 1 in
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1,
+ (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)))),
+ "", itins.rm>,
+ AVX512BIBase, EVEX_4V, EVEX_B;
+}
+
+multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, OpndItins itins,
+ Predicate prd, bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ IsCommutable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ IsCommutable>, EVEX_V256;
+ defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ IsCommutable>, EVEX_V128;
}
+}
- let mayLoad = 1 in {
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
- itins.rm>, EVEX_4V;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
- RC:$src0)))],
- itins.rm>, EVEX_4V, EVEX_K;
- def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
- (OpVT immAllZerosV))))],
- itins.rm>, EVEX_4V, EVEX_KZ;
- }
- def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
- itins.rm>, EVEX_4V, EVEX_B;
- let AddedComplexity = 30 in {
- let Constraints = "$src0 = $dst" in
- def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
- BrdcstStr, "}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1),
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
- RC:$src0)))],
- itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
- def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
- BrdcstStr, "}"),
- [(set RC:$dst, (OpVT (vselect KRC:$mask,
- (OpNode (OpVT RC:$src1),
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
- (OpVT immAllZerosV))))],
- itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
- }
+multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo, OpndItins itins,
+ Predicate prd, bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ IsCommutable>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ IsCommutable>, EVEX_V256;
+ defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ IsCommutable>, EVEX_V128;
}
}
+multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ itins, prd, IsCommutable>,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+ itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
+ itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
+ itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
+}
+
+multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+ SDNode OpNode, OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+
+ defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+}
+
+multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
+ SDNode OpNode, OpndItins itins, Predicate prd,
+ bit IsCommutable = 0> {
+ defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+
+ defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr, OpNode, itins, prd,
+ IsCommutable>;
+}
+
+multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
+ bits<8> opc_d, bits<8> opc_q,
+ string OpcodeStr, SDNode OpNode,
+ OpndItins itins, bit IsCommutable = 0> {
+ defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+ itins, HasAVX512, IsCommutable>,
+ avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+ itins, HasBWI, IsCommutable>;
+}
+
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
PatFrag memop_frag, X86MemOperand x86memop,
@@ -2069,25 +2799,16 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
}
}
-defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
-
-defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
+ SSE_INTALU_ITINS_P, 1>;
+defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
+ SSE_INTALU_ITINS_P, 0>;
+defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
+ SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
@@ -2108,41 +2829,33 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
(VPMULDQZrr VR512:$src1, VR512:$src2)>;
-defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_INTALU_ITINS_P, 1>,
- T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 0>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
+defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
(v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
@@ -2255,48 +2968,18 @@ multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedSingle in
-defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
- memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-let ExeDomain = SSEPackedDouble in
-defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
- memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
- VEX_W, EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
- (VPERMILPSZri VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
- (VPERMILPDZri VR512:$src1, imm:$imm)>;
-
//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
-defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
- i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
- i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
- memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
- SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
- memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
+defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@@ -2324,118 +3007,58 @@ defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
}
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass KRC,
- RegisterClass RC, ValueType vt,
- X86MemOperand x86memop, PatFrag mem_frag,
- X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr,
- Domain d, OpndItins itins, bit commutable> {
- let isCommutable = commutable in {
- def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
- EVEX_4V;
-
- def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
- [], itins.rr, d>, EVEX_4V, EVEX_K;
-
- def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rr, d>, EVEX_4V, EVEX_KZ;
- }
-
+ X86VectorVTInfo _, bit IsCommutable> {
+ defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V;
let mayLoad = 1 in {
- def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>, EVEX_4V;
+ defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V;
+ defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))>,
+ EVEX_4V, EVEX_B;
+ }//let mayLoad = 1
+}
- def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
- itins.rm, d>, EVEX_4V, EVEX_B;
-
- def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], itins.rm, d>, EVEX_4V, EVEX_K;
-
- def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rm, d>, EVEX_4V, EVEX_KZ;
-
- def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
- " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
- [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
-
- def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
- " \t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
- BrdcstStr, "}"),
- [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit IsCommutable = 0> {
+ defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
+ IsCommutable>, EVEX_V512, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
+ IsCommutable>, EVEX_V512, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
+ IsCommutable>, EVEX_V128, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
+ IsCommutable>, EVEX_V256, PS,
+ EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
+ IsCommutable>, EVEX_V128, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
+ IsCommutable>, EVEX_V256, PD, VEX_W,
+ EVEX_CD8<64, CD8VF>;
}
}
-defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 1>,
- EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 1>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
- SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-
-defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 0>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
- SSE_ALU_ITINS_P.d, 0>,
- EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>;
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>;
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>;
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>;
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>;
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>;
def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
(v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
@@ -2502,29 +3125,17 @@ def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
- string OpcodeStr, SDNode OpNode, RegisterClass RC,
- ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
- RegisterClass KRC> {
- def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
- (ins RC:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
- SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
- def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
- def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
- !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode (mem_frag addr:$src1),
- (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
- def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
- (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+ string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i8imm:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+ defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1, i8imm:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode (_.MemOpFrag addr:$src1), (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
}
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -2555,42 +3166,42 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
EVEX_CD8<32, CD8VQ>;
defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
+ v16i32_info>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
EVEX_CD8<32, CD8VQ>;
defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
EVEX_CD8<64, CD8VQ>, VEX_W;
defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
- VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ v16i32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
EVEX_CD8<32, CD8VQ>;
defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
- VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ v8i64_info>, EVEX_V512,
EVEX_CD8<64, CD8VF>, VEX_W;
defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
@@ -2713,155 +3324,133 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
//
+
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr, SDNode OpNode, ValueType OpVT> {
- def r: AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
- !strconcat(OpcodeStr," \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst, (OpVT(OpNode RC:$src1, RC:$src2, RC:$src3)))]>;
+// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
+multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDPatternOperator OpNode = null_frag> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ AVX512FMA3Base;
let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, _.MemOp:$src3),
!strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3))))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
- !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
- ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (_.MemOpFrag addr:$src3))))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, _.ScalarMemOp:$src3),
+ !strconcat(OpcodeStr, " \t{${src3}", _.BroadcastStr,
+ ", $src2, $dst|$dst, $src2, ${src3}", _.BroadcastStr, "}"),
+ [(set _.RC:$dst, (OpNode _.RC:$src1, _.RC:$src2,
+ (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))]>, EVEX_B;
}
} // Constraints = "$src1 = $dst"
+multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, X86VectorVTInfo VTI,
+ SDPatternOperator OpNode> {
+ defm v213 : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
+ VTI, OpNode>,
+ EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
+
+ defm v231 : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
+ VTI>,
+ EVEX_V512, EVEX_CD8<VTI.EltSize, CD8VF>;
+}
+
let ExeDomain = SSEPackedSingle in {
- defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmaddsub, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsubadd, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
+ defm VFMADDPSZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
+ v16f32_info, X86Fmadd>;
+ defm VFMSUBPSZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
+ v16f32_info, X86Fmsub>;
+ defm VFMADDSUBPSZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
+ v16f32_info, X86Fmaddsub>;
+ defm VFMSUBADDPSZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
+ v16f32_info, X86Fmsubadd>;
+ defm VFNMADDPSZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
+ v16f32_info, X86Fnmadd>;
+ defm VFNMSUBPSZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
+ v16f32_info, X86Fnmsub>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmadd, v8f64>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+ defm VFMADDPDZ : avx512_fma3p_forms<0xA8, 0xB8, "vfmadd",
+ v8f64_info, X86Fmadd>, VEX_W;
+ defm VFMSUBPDZ : avx512_fma3p_forms<0xAA, 0xBA, "vfmsub",
+ v8f64_info, X86Fmsub>, VEX_W;
+ defm VFMADDSUBPDZ : avx512_fma3p_forms<0xA6, 0xB6, "vfmaddsub",
+ v8f64_info, X86Fmaddsub>, VEX_W;
+ defm VFMSUBADDPDZ : avx512_fma3p_forms<0xA7, 0xB7, "vfmsubadd",
+ v8f64_info, X86Fmsubadd>, VEX_W;
+ defm VFNMADDPDZ : avx512_fma3p_forms<0xAC, 0xBC, "vfnmadd",
+ v8f64_info, X86Fnmadd>, VEX_W;
+ defm VFNMSUBPDZ : avx512_fma3p_forms<0xAE, 0xBE, "vfnmsub",
+ v8f64_info, X86Fnmsub>, VEX_W;
}
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
- string BrdcstStr, SDNode OpNode, ValueType OpVT> {
+multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
let mayLoad = 1 in
- def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src3, x86memop:$src2),
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
!strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
- def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
- ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
- [(set RC:$dst, (OpNode RC:$src1,
- (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2),
+ _.RC:$src3)))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, " \t{${src2}", _.BroadcastStr,
+ ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))),
+ _.RC:$src3))]>, EVEX_B;
}
} // Constraints = "$src1 = $dst"
let ExeDomain = SSEPackedSingle in {
- defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmaddsub, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fmsubadd, v16f32>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmadd, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
- defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
- memopv16f32, f32mem, loadf32, "{1to16}",
- X86Fnmsub, v16f32>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
+ defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", X86Fmadd,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", X86Fmsub,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", X86Fmaddsub,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", X86Fmsubadd,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", X86Fnmadd,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", X86Fnmsub,
+ v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmadd, v8f64>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
- defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
- defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
- memopv8f64, f64mem, loadf64, "{1to8}",
- X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VF>;
+ defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", X86Fmadd,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", X86Fmsub,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", X86Fmaddsub,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", X86Fmsubadd,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", X86Fnmadd,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", X86Fnmsub,
+ v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
// Scalar FMA
@@ -3482,26 +4071,49 @@ def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, ValueType OpVt> {
- def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,
- " \t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
- EVEX;
- def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
- EVEX;
-}
-defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
- memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
- memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
- memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
- memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
+ defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ EVEX, T8PD, EVEX_B;
+ }
+}
+
+multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+}
+
+defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
+defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
@@ -3573,93 +4185,63 @@ def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
(COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
-multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop> {
- let hasSideEffects = 0, Predicates = [HasERI] in {
- def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,
- " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,
- " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
- []>, EVEX, EVEX_B;
- def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- }
-}
-defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
- (VRSQRT28PSZrb VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
- (VRSQRT28PDZrb VR512:$src)>;
-
-def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
- (VRCP28PSZrb VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
- (VRCP28PDZrb VR512:$src)>;
-
-multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Intrinsic V16F32Int, Intrinsic V8F64Int,
- OpndItins itins_s, OpndItins itins_d> {
- def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
- EVEX, EVEX_V512;
- let mayLoad = 1 in
- def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst,
- (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
- itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ SDNode OpNode> {
- def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
- EVEX, EVEX_V512;
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
- let mayLoad = 1 in
- def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (OpNode
- (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
- itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+ defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr,
+ "$src", "$src",
+ (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
-let isCodeGenOnly = 1 in {
- def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr,
- "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V16F32Int VR512:$src))]>,
- EVEX, EVEX_V512;
- def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst,
- (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V8F64Int VR512:$src))]>,
- EVEX, EVEX_V512, VEX_W;
- def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr,
- "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>,
- EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-} // isCodeGenOnly = 1
+ defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))), (i32 FROUND_CURRENT))>;
+
+ defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))),
+ (i32 FROUND_CURRENT))>, EVEX_B;
+}
+
+multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
+ EVEX_CD8<32, CD8VF>;
+ defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
+ VEX_W, EVEX_CD8<32, CD8VF>;
+}
+
+let Predicates = [HasERI], hasSideEffects = 0 in {
+
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD;
+ defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD;
+ defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD;
+}
+
+multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+ defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.FloatVT (OpNode _.RC:$src))>, EVEX;
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (OpNode (_.FloatVT
+ (bitconvert (_.LdFrag addr:$src))))>, EVEX;
+
+ defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr,
+ "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
+ (OpNode (_.FloatVT
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ EVEX, EVEX_B;
+ }
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
@@ -3723,15 +4305,45 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
}
}
+multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ v16f32_info>,
+ EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ v8f64_info>,
+ EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX] in {
+ defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v4f32x_info>,
+ EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, v8f32x_info>,
+ EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v2f64x_info>,
+ EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, v4f64x_info>,
+ EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ }
+}
+
+defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
- SSE_SQRTSS, SSE_SQRTSD>,
- avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
- int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
- SSE_SQRTPS, SSE_SQRTPD>;
+ SSE_SQRTSS, SSE_SQRTSD>;
let Predicates = [HasAVX512] in {
+ def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
+ (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
+ (VSQRTPSZr VR512:$src1)>;
+ def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
+ (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
+ (VSQRTPDZr VR512:$src1)>;
+
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -4301,33 +4913,29 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
(memopv8i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-multiclass avx512_alignr<string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop> {
- def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, i8imm:$src3),
- !strconcat(OpcodeStr,
- " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, EVEX_4V;
+multiclass avx512_valign<X86VectorVTInfo _> {
+ defm rri : AVX512_maskable<0x03, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
+ "valign"##_.Suffix,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
+ (i8 imm:$src3)))>,
+ AVX512AIi8Base, EVEX_4V;
+
+ // Also match valign of packed floats.
+ def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
+ (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
+
let mayLoad = 1 in
- def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, i8imm:$src3),
- !strconcat(OpcodeStr,
- " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
+ !strconcat("valign"##_.Suffix,
+ " \t{$src3, $src2, $src1, $dst|"
+ "$dst, $src1, $src2, $src3}"),
[]>, EVEX_4V;
}
-defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
-def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
-def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
+defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
@@ -4525,3 +5133,32 @@ def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
(MOV8mr addr:$dst, GR8:$src)>;
+multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
+def rr : AVX512XS8I<opc, MRMDestReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
+ !strconcat(OpcodeStr##Vec.Suffix, " \t{$src, $dst|$dst, $src}"),
+ [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
+}
+
+multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
+ string OpcodeStr, Predicate prd> {
+let Predicates = [prd] in
+ defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
+ defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ }
+}
+
+multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
+ defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr,
+ HasBWI>;
+ defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
+ HasBWI>, VEX_W;
+ defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
+ HasDQI>;
+ defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
+ HasDQI>, VEX_W;
+}
+
+defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f2574cc..25e1e80 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1355,49 +1355,57 @@ let Predicates = [HasBMI2] in {
//===----------------------------------------------------------------------===//
// ADCX Instruction
//
-let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+let hasSideEffects = 0, Defs = [EFLAGS], Uses = [EFLAGS],
+ Constraints = "$src0 = $dst", AddedComplexity = 10 in {
let SchedRW = [WriteALU] in {
- def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "adcx{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8PD;
-
- def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "adcx{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8PD, Requires<[In64BitMode]>;
+ def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
+ (ins GR32:$src0, GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS,
+ (X86adc_flag GR32:$src0, GR32:$src, EFLAGS))],
+ IIC_BIN_CARRY_NONMEM>, T8PD, Requires<[HasADX]>;
+ def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src0, GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS,
+ (X86adc_flag GR64:$src0, GR64:$src, EFLAGS))],
+ IIC_BIN_CARRY_NONMEM>, T8PD, Requires<[HasADX, In64BitMode]>;
} // SchedRW
let mayLoad = 1, SchedRW = [WriteALULd] in {
- def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "adcx{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8PD;
-
- def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "adcx{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8PD, Requires<[In64BitMode]>;
+ def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src0, i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS,
+ (X86adc_flag GR32:$src0, (loadi32 addr:$src), EFLAGS))],
+ IIC_BIN_CARRY_MEM>, T8PD, Requires<[HasADX]>;
+
+ def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src0, i64mem:$src), "adcx{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS,
+ (X86adc_flag GR64:$src0, (loadi64 addr:$src), EFLAGS))],
+ IIC_BIN_CARRY_MEM>, T8PD, Requires<[HasADX, In64BitMode]>;
}
}
//===----------------------------------------------------------------------===//
// ADOX Instruction
//
-let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+let hasSideEffects = 0, Defs = [EFLAGS], Uses = [EFLAGS] in {
let SchedRW = [WriteALU] in {
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8XS;
+ [], IIC_BIN_NONMEM>, T8XS, Requires<[HasADX]>;
def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"adox{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_NONMEM>, T8XS, Requires<[In64BitMode]>;
+ [], IIC_BIN_NONMEM>, T8XS, Requires<[HasADX, In64BitMode]>;
} // SchedRW
let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8XS;
+ [], IIC_BIN_MEM>, T8XS, Requires<[HasADX]>;
def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"adox{q}\t{$src, $dst|$dst, $src}",
- [], IIC_BIN_MEM>, T8XS, Requires<[In64BitMode]>;
+ [], IIC_BIN_MEM>, T8XS, Requires<[HasADX, In64BitMode]>;
}
}
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index e421f8c..2056056 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -21,8 +21,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86INSTRBUILDER_H
-#define X86INSTRBUILDER_H
+#ifndef LLVM_LIB_TARGET_X86_X86INSTRBUILDER_H
+#define LLVM_LIB_TARGET_X86_X86INSTRBUILDER_H
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index ca4f608..117b6ff 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -46,11 +46,11 @@ let Defs = [ESP, EFLAGS], Uses = [ESP] in {
def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
"#ADJCALLSTACKDOWN",
[(X86callseq_start timm:$amt)]>,
- Requires<[Not64BitMode]>;
+ Requires<[NotLP64]>;
def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
"#ADJCALLSTACKUP",
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[Not64BitMode]>;
+ Requires<[NotLP64]>;
}
// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
@@ -62,11 +62,11 @@ let Defs = [RSP, EFLAGS], Uses = [RSP] in {
def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
"#ADJCALLSTACKDOWN",
[(X86callseq_start timm:$amt)]>,
- Requires<[In64BitMode]>;
+ Requires<[IsLP64]>;
def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
"#ADJCALLSTACKUP",
[(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[In64BitMode]>;
+ Requires<[IsLP64]>;
}
@@ -118,7 +118,7 @@ def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
"# variable sized alloca for segmented stacks",
[(set GR32:$dst,
(X86SegAlloca GR32:$size))]>,
- Requires<[Not64BitMode]>;
+ Requires<[NotLP64]>;
let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
@@ -214,6 +214,8 @@ let isPseudo = 1 in {
"#SEH_PushFrame $mode", []>;
def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
"#SEH_EndPrologue", []>;
+ def SEH_Epilogue : I<0, Pseudo, (outs), (ins),
+ "#SEH_Epilogue", []>;
}
//===----------------------------------------------------------------------===//
@@ -407,7 +409,8 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
+ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
@@ -426,7 +429,8 @@ def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
+ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
@@ -747,18 +751,88 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
TB, LOCK;
-def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
-def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
-def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
-def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
- [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
+/* The following multiclass tries to make sure that in code like
+ * x.store (immediate op x.load(acquire), release)
+ * an operation directly on memory is generated instead of wasting a register.
+ * It is not automatic as atomic_store/load are only lowered to MOV instructions
+ * extremely late to prevent them from being accidentally reordered in the backend
+ * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
+ */
+multiclass RELEASE_BINOP_MI<string op> {
+ def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
+ "#RELEASE_BINOP PSEUDO!",
+ [(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
+ (atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
+ // NAME#16 is not generated as 16-bit arithmetic instructions are considered
+ // costly and avoided as far as possible by this backend anyway
+ def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
+ "#RELEASE_BINOP PSEUDO!",
+ [(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
+ (atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
+ def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "#RELEASE_BINOP PSEUDO!",
+ [(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
+ (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
+}
+defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
+defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
+defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
+defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
+// Note: we don't deal with sub, because substractions of constants are
+// optimized into additions before this code can run
+
+multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
+ def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_8 addr:$dst, dag8)]>;
+ def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_16 addr:$dst, dag16)]>;
+ def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_32 addr:$dst, dag32)]>;
+ def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
+ "#RELEASE_UNOP PSEUDO!",
+ [(atomic_store_64 addr:$dst, dag64)]>;
+}
+
+defm RELEASE_INC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 1)),
+ (add (atomic_load_16 addr:$dst), (i16 1)),
+ (add (atomic_load_32 addr:$dst), (i32 1)),
+ (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
+defm RELEASE_DEC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 -1)),
+ (add (atomic_load_16 addr:$dst), (i16 -1)),
+ (add (atomic_load_32 addr:$dst), (i32 -1)),
+ (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+/*
+TODO: These don't work because the type inference of TableGen fails.
+TODO: find a way to fix it.
+defm RELEASE_NEG : RELEASE_UNOP<
+ (ineg (atomic_load_8 addr:$dst)),
+ (ineg (atomic_load_16 addr:$dst)),
+ (ineg (atomic_load_32 addr:$dst)),
+ (ineg (atomic_load_64 addr:$dst))>;
+defm RELEASE_NOT : RELEASE_UNOP<
+ (not (atomic_load_8 addr:$dst)),
+ (not (atomic_load_16 addr:$dst)),
+ (not (atomic_load_32 addr:$dst)),
+ (not (atomic_load_64 addr:$dst))>;
+*/
+
+def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
+def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
+def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
+def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "#RELEASE_MOV PSEUDO !",
+ [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
"#RELEASE_MOV PSEUDO!",
@@ -773,11 +847,22 @@ def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
"#RELEASE_MOV PSEUDO!",
[(atomic_store_64 addr:$dst, GR64:$src)]>;
+def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR8:$dst, (atomic_load_8 addr:$src))]>;
+def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
+def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
+def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
+ "#ACQUIRE_MOV PSEUDO!",
+ [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions.
//===----------------------------------------------------------------------===//
-
// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
// instruction selection into a branch sequence.
let Uses = [EFLAGS], usesCustomInserter = 1 in {
@@ -1106,6 +1191,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != ISD::AssertSext &&
N->getOpcode() != X86ISD::CMOV;
}]>;
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 6be6a1f..b38129a 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -97,13 +97,23 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
[], IIC_MOVZX>, TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
[], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
+
+def MOVSX32_NOREXrr8 : I<0xBE, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ [], IIC_MOVSX>, TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVSX32_NOREXrm8 : I<0xBE, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ [], IIC_MOVSX>, TB, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 4ad7b7e..d9f173e 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -114,9 +114,6 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
// a pattern) and the FPI instruction should have emission info (e.g. opcode
// encoding and asm printing info).
-// Pseudo Instruction for FP stack return values.
-def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
-
// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index cc30266..fe4ead1 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -36,20 +36,21 @@ def MRM6m : Format<30>; def MRM7m : Format<31>;
def MRM_C0 : Format<32>; def MRM_C1 : Format<33>; def MRM_C2 : Format<34>;
def MRM_C3 : Format<35>; def MRM_C4 : Format<36>; def MRM_C8 : Format<37>;
def MRM_C9 : Format<38>; def MRM_CA : Format<39>; def MRM_CB : Format<40>;
-def MRM_D0 : Format<41>; def MRM_D1 : Format<42>; def MRM_D4 : Format<43>;
-def MRM_D5 : Format<44>; def MRM_D6 : Format<45>; def MRM_D8 : Format<46>;
-def MRM_D9 : Format<47>; def MRM_DA : Format<48>; def MRM_DB : Format<49>;
-def MRM_DC : Format<50>; def MRM_DD : Format<51>; def MRM_DE : Format<52>;
-def MRM_DF : Format<53>; def MRM_E0 : Format<54>; def MRM_E1 : Format<55>;
-def MRM_E2 : Format<56>; def MRM_E3 : Format<57>; def MRM_E4 : Format<58>;
-def MRM_E5 : Format<59>; def MRM_E8 : Format<60>; def MRM_E9 : Format<61>;
-def MRM_EA : Format<62>; def MRM_EB : Format<63>; def MRM_EC : Format<64>;
-def MRM_ED : Format<65>; def MRM_EE : Format<66>; def MRM_F0 : Format<67>;
-def MRM_F1 : Format<68>; def MRM_F2 : Format<69>; def MRM_F3 : Format<70>;
-def MRM_F4 : Format<71>; def MRM_F5 : Format<72>; def MRM_F6 : Format<73>;
-def MRM_F7 : Format<74>; def MRM_F8 : Format<75>; def MRM_F9 : Format<76>;
-def MRM_FA : Format<77>; def MRM_FB : Format<78>; def MRM_FC : Format<79>;
-def MRM_FD : Format<80>; def MRM_FE : Format<81>; def MRM_FF : Format<82>;
+def MRM_CF : Format<41>; def MRM_D0 : Format<42>; def MRM_D1 : Format<43>;
+def MRM_D4 : Format<44>; def MRM_D5 : Format<45>; def MRM_D6 : Format<46>;
+def MRM_D7 : Format<47>; def MRM_D8 : Format<48>; def MRM_D9 : Format<49>;
+def MRM_DA : Format<50>; def MRM_DB : Format<51>; def MRM_DC : Format<52>;
+def MRM_DD : Format<53>; def MRM_DE : Format<54>; def MRM_DF : Format<55>;
+def MRM_E0 : Format<56>; def MRM_E1 : Format<57>; def MRM_E2 : Format<58>;
+def MRM_E3 : Format<59>; def MRM_E4 : Format<60>; def MRM_E5 : Format<61>;
+def MRM_E8 : Format<62>; def MRM_E9 : Format<63>; def MRM_EA : Format<64>;
+def MRM_EB : Format<65>; def MRM_EC : Format<66>; def MRM_ED : Format<67>;
+def MRM_EE : Format<68>; def MRM_F0 : Format<69>; def MRM_F1 : Format<70>;
+def MRM_F2 : Format<71>; def MRM_F3 : Format<72>; def MRM_F4 : Format<73>;
+def MRM_F5 : Format<74>; def MRM_F6 : Format<75>; def MRM_F7 : Format<76>;
+def MRM_F8 : Format<77>; def MRM_F9 : Format<78>; def MRM_FA : Format<79>;
+def MRM_FB : Format<80>; def MRM_FC : Format<81>; def MRM_FD : Format<82>;
+def MRM_FE : Format<83>; def MRM_FF : Format<84>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -100,6 +101,7 @@ def CD8VF : CD8VForm<0>; // v := VL
def CD8VH : CD8VForm<1>; // v := VL/2
def CD8VQ : CD8VForm<2>; // v := VL/4
def CD8VO : CD8VForm<3>; // v := VL/8
+// The tuple (subvector) forms.
def CD8VT1 : CD8VForm<4>; // v := 1
def CD8VT2 : CD8VForm<5>; // v := 2
def CD8VT4 : CD8VForm<6>; // v := 4
@@ -184,13 +186,16 @@ class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
class EVEX_B { bit hasEVEX_B = 1; }
class EVEX_RC { bit hasEVEX_RC = 1; }
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
+class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
+class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
+
+// Specify AVX512 8-bit compressed displacement encoding based on the vector
+// element size in bits (8, 16, 32, 64) and the CDisp8 form.
class EVEX_CD8<int esize, CD8VForm form> {
- bits<2> EVEX_CD8E = !if(!eq(esize, 8), 0b00,
- !if(!eq(esize, 16), 0b01,
- !if(!eq(esize, 32), 0b10,
- !if(!eq(esize, 64), 0b11, ?))));
- bits<3> EVEX_CD8V = form.Value;
+ int CD8_EltSize = !srl(esize, 3);
+ bits<3> CD8_Form = form.Value;
}
+
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
class MemOp4 { bit hasMemOp4Prefix = 1; }
class XOP { Encoding OpEnc = EncXOP; }
@@ -253,12 +258,32 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasEVEX_Z = 0; // Does this inst set the EVEX_Z field?
bit hasEVEX_L2 = 0; // Does this inst set the EVEX_L2 field?
bit hasEVEX_B = 0; // Does this inst set the EVEX_B field?
- bits<2> EVEX_CD8E = 0; // Compressed disp8 form - element-size.
- bits<3> EVEX_CD8V = 0; // Compressed disp8 form - vector-width.
+ bits<3> CD8_Form = 0; // Compressed disp8 form - vector-width.
+ // Declare it int rather than bits<4> so that all bits are defined when
+ // assigning to bits<7>.
+ int CD8_EltSize = 0; // Compressed disp8 form - element-size in bytes.
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
bit hasMemOp4Prefix = 0; // Same bit as VEX_W, but used for swapping operands
bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction.
+ bits<2> EVEX_LL;
+ let EVEX_LL{0} = hasVEX_L;
+ let EVEX_LL{1} = hasEVEX_L2;
+ // Vector size in bytes.
+ bits<7> VectSize = !shl(16, EVEX_LL);
+
+ // The scaling factor for AVX512's compressed displacement is either
+ // - the size of a power-of-two number of elements or
+ // - the size of a single element for broadcasts or
+ // - the total vector size divided by a power-of-two number.
+ // Possible values are: 0 (non-AVX512 inst), 1, 2, 4, 8, 16, 32 and 64.
+ bits<7> CD8_Scale = !if (!eq (OpEnc.Value, EncEVEX.Value),
+ !if (CD8_Form{2},
+ !shl(CD8_EltSize, CD8_Form{1-0}),
+ !if (hasEVEX_B,
+ CD8_EltSize,
+ !srl(VectSize, CD8_Form{1-0}))), 0);
+
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{6-0} = FormBits;
let TSFlags{8-7} = OpSizeBits;
@@ -283,11 +308,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{45} = hasEVEX_Z;
let TSFlags{46} = hasEVEX_L2;
let TSFlags{47} = hasEVEX_B;
- let TSFlags{49-48} = EVEX_CD8E;
- let TSFlags{52-50} = EVEX_CD8V;
- let TSFlags{53} = has3DNow0F0FOpcode;
- let TSFlags{54} = hasMemOp4Prefix;
- let TSFlags{55} = hasEVEX_RC;
+ // If we run out of TSFlags bits, it's possible to encode this in 3 bits.
+ let TSFlags{54-48} = CD8_Scale;
+ let TSFlags{55} = has3DNow0F0FOpcode;
+ let TSFlags{56} = hasMemOp4Prefix;
+ let TSFlags{57} = hasEVEX_RC;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -690,14 +715,25 @@ class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
+class AVX512BIBase : PD {
+ Domain ExeDomain = SSEPackedInt;
+}
class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
+class AVX512BIi8Base : PD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
Requires<[HasAVX512]>;
+class AVX512AIi8Base : TAPD {
+ Domain ExeDomain = SSEPackedInt;
+ ImmType ImmT = Imm8;
+}
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>,
@@ -720,6 +756,11 @@ class AVX512FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8PD,
EVEX_4V, Requires<[HasAVX512]>;
+class AVX512FMA3Base : T8PD, EVEX_4V;
+
+class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, Requires<[HasAVX512]>;
// AES Instruction Templates:
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6f0fa94..1c7215c 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -83,7 +83,7 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW",
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
def X86insertps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
- SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+ SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
@@ -188,6 +188,8 @@ def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
+def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVec<2>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -197,12 +199,15 @@ def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
+ SDTCisVec<0>, SDTCisInt<2>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
+def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -231,10 +236,11 @@ def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
-def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
-def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
-def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
-def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
+def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
+def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
+def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
+def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
+def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
@@ -247,6 +253,9 @@ def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
[SDTCisVec<1>, SDTCisPtrTy<2>]>, []>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
+
+def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
+
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
@@ -254,6 +263,10 @@ def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
+def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
+def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
+
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
SDTCisVT<4, i8>]>;
@@ -311,6 +324,8 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
// 512-bit load pattern fragments
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
+def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
+def loadv32i16 : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
@@ -509,7 +524,9 @@ def I8Imm : SDNodeXForm<imm, [{
}]>;
def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>;
-def FROUND_CURRENT : ImmLeaf<i32, [{ return Imm == 4; }]>;
+def FROUND_CURRENT : ImmLeaf<i32, [{
+ return Imm == X86::STATIC_ROUNDING::CUR_DIRECTION;
+}]>;
// BYTE_imm - Transform bit immediates into byte immediates.
def BYTE_imm : SDNodeXForm<imm, [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0d3afc4..7f87bdd 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -100,8 +101,8 @@ void X86InstrInfo::anchor() {}
X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
: X86GenInstrInfo(
- (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
- (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
Subtarget(STI), RI(STI) {
static const X86OpTblEntry OpTbl2Addr[] = {
@@ -377,7 +378,39 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
{ X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
// AVX-512 foldable instructions
- { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }
+ { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions (128-bit versions)
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
@@ -415,6 +448,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
+ { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
@@ -493,6 +529,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
{ X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
{ X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
+ { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
+ { X86::VCVTPD2DQrr, X86::VCVTPD2DQXrm, 0 },
+ { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
{ X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
@@ -526,6 +567,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
// AVX 256-bit foldable instructions
+ { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
+ { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
+ { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
+ { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
+ { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
@@ -533,6 +579,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// AVX2 foldable instructions
{ X86::VPABSBrr256, X86::VPABSBrm256, 0 },
@@ -541,13 +594,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
{ X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
{ X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
- { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
- { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
- { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
{ X86::BEXTR32rr, X86::BEXTR32rm, 0 },
@@ -601,18 +647,46 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
- { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 },
- { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
- { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
- { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
// AES foldable instructions
{ X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
{ X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
{ X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 },
- { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 },
+ { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -869,8 +943,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
- { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VSQRTSSr, X86::VSQRTSSm, 0 },
@@ -1543,8 +1615,11 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::VMOVAPSrm:
case X86::VMOVAPDrm:
case X86::VMOVDQArm:
+ case X86::VMOVUPSYrm:
case X86::VMOVAPSYrm:
+ case X86::VMOVUPDYrm:
case X86::VMOVAPDYrm:
+ case X86::VMOVDQUYrm:
case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
@@ -1572,8 +1647,11 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::VMOVAPSmr:
case X86::VMOVAPDmr:
case X86::VMOVDQAmr:
+ case X86::VMOVUPSYmr:
case X86::VMOVAPSYmr:
+ case X86::VMOVUPDYmr:
case X86::VMOVAPDYmr:
+ case X86::VMOVDQUYmr:
case X86::VMOVDQAYmr:
case X86::VMOVUPSZmr:
case X86::VMOVAPSZmr:
@@ -2078,34 +2156,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
- case X86::SHUFPSrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- unsigned M = MI->getOperand(3).getImm();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
- case X86::SHUFPDrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- unsigned M = MI->getOperand(3).getImm();
-
- // Convert to PSHUFD mask.
- M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
-
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
@@ -2387,6 +2437,42 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI->getOperand(3).setImm(Size-Amt);
return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
+ case X86::BLENDPDrri:
+ case X86::BLENDPSrri:
+ case X86::PBLENDWrri:
+ case X86::VBLENDPDrri:
+ case X86::VBLENDPSrri:
+ case X86::VBLENDPDYrri:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDrri:
+ case X86::VPBLENDWrri:
+ case X86::VPBLENDDYrri:
+ case X86::VPBLENDWYrri:{
+ unsigned Mask;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::BLENDPDrri: Mask = 0x03; break;
+ case X86::BLENDPSrri: Mask = 0x0F; break;
+ case X86::PBLENDWrri: Mask = 0xFF; break;
+ case X86::VBLENDPDrri: Mask = 0x03; break;
+ case X86::VBLENDPSrri: Mask = 0x0F; break;
+ case X86::VBLENDPDYrri: Mask = 0x0F; break;
+ case X86::VBLENDPSYrri: Mask = 0xFF; break;
+ case X86::VPBLENDDrri: Mask = 0x0F; break;
+ case X86::VPBLENDWrri: Mask = 0xFF; break;
+ case X86::VPBLENDDYrri: Mask = 0xFF; break;
+ case X86::VPBLENDWYrri: Mask = 0xFF; break;
+ }
+ // Only the least significant bits of Imm are used.
+ unsigned Imm = MI->getOperand(3).getImm() & Mask;
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->getOperand(3).setImm(Mask ^ Imm);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
@@ -2471,6 +2557,20 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
switch (MI->getOpcode()) {
+ case X86::BLENDPDrri:
+ case X86::BLENDPSrri:
+ case X86::PBLENDWrri:
+ case X86::VBLENDPDrri:
+ case X86::VBLENDPSrri:
+ case X86::VBLENDPDYrri:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDrri:
+ case X86::VPBLENDDYrri:
+ case X86::VPBLENDWrri:
+ case X86::VPBLENDWYrri:
+ SrcOpIdx1 = 1;
+ SrcOpIdx2 = 2;
+ return true;
case X86::VFMADDPDr231r:
case X86::VFMADDPSr231r:
case X86::VFMADDSDr231r:
@@ -3067,6 +3167,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
inline static bool MaskRegClassContains(unsigned Reg) {
return X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg) ||
+ X86::VK32RegClass.contains(Reg) ||
+ X86::VK64RegClass.contains(Reg) ||
X86::VK1RegClass.contains(Reg);
}
static
@@ -3143,7 +3245,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Moving EFLAGS to / from another register requires a push and a pop.
// Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
+ // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
if (SrcReg == X86::EFLAGS) {
if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
@@ -3287,9 +3389,11 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned =
- (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
+ bool isAligned = (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -3324,9 +3428,11 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned =
- (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
+ bool isAligned = (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
@@ -3868,10 +3974,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
/// operand at the use. We fold the load instructions if load defines a virtual
/// register, the virtual register is used once in the same BB, and the
/// instructions in-between do not load or store, and have no side effects.
-MachineInstr* X86InstrInfo::
-optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
- MachineInstr *&DefMI) const {
+MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
if (FoldAsLoadDefReg == 0)
return nullptr;
// To be conservative, if there exists another load, clear the load candidate.
@@ -3887,55 +3993,35 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
if (!DefMI->isSafeToMove(this, nullptr, SawStore))
return nullptr;
- // We try to commute MI if possible.
- unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
- for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
- // Collect information about virtual register operands of MI.
- unsigned SrcOperandId = 0;
- bool FoundSrcOperand = false;
- for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg != FoldAsLoadDefReg)
- continue;
- // Do not fold if we have a subreg use or a def or multiple uses.
- if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
- return nullptr;
-
- SrcOperandId = i;
- FoundSrcOperand = true;
- }
- if (!FoundSrcOperand) return nullptr;
-
- // Check whether we can fold the def into SrcOperandId.
- SmallVector<unsigned, 8> Ops;
- Ops.push_back(SrcOperandId);
- MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
- if (FoldMI) {
- FoldAsLoadDefReg = 0;
- return FoldMI;
- }
-
- if (Idx == 1) {
- // MI was changed but it didn't help, commute it back!
- commuteInstruction(MI, false);
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
return nullptr;
- }
- // Check whether we can commute MI and enable folding.
- if (MI->isCommutable()) {
- MachineInstr *NewMI = commuteInstruction(MI, false);
- // Unable to commute.
- if (!NewMI) return nullptr;
- if (NewMI != MI) {
- // New instruction. It doesn't need to be kept.
- NewMI->eraseFromParent();
- return nullptr;
- }
- }
+ SrcOperandId = i;
+ FoundSrcOperand = true;
+ }
+ if (!FoundSrcOperand)
+ return nullptr;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
}
+
return nullptr;
}
@@ -3961,6 +4047,28 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
return true;
}
+// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
+// code sequence is needed for other targets.
+static void expandLoadStackGuard(MachineInstrBuilder &MIB,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ unsigned Reg = MIB->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->
+ getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8);
+ MachineBasicBlock::iterator I = MIB.getInstr();
+
+ BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
+ .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0)
+ .addMemOperand(MMO);
+ MIB->setDebugLoc(DL);
+ MIB->setDesc(TII.get(X86::MOV64rm));
+ MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = Subtarget.hasAVX();
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
@@ -3995,6 +4103,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
case X86::KSET1B:
case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
+ case TargetOpcode::LOAD_STACK_GUARD:
+ expandLoadStackGuard(MIB, *this);
+ return true;
}
return false;
}
@@ -4070,7 +4181,8 @@ MachineInstr*
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Align) const {
+ unsigned Size, unsigned Align,
+ bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -4138,8 +4250,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
return nullptr;
// If this is a 64-bit load, but the spill slot is 32, then we can do
- // a 32-bit load which is implicitly zero-extended. This likely is due
- // to liveintervalanalysis remat'ing a load from stack slot.
+ // a 32-bit load which is implicitly zero-extended. This likely is
+ // due to live interval analysis remat'ing a load from stack slot.
if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
return nullptr;
Opcode = X86::MOV32rm;
@@ -4158,8 +4270,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// to a 32-bit one.
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
- NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- X86::sub_32bit));
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
else
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
@@ -4167,6 +4278,65 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
}
+ // If the instruction and target operand are commutable, commute the
+ // instruction and try again.
+ if (AllowCommute) {
+ unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2;
+ if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
+ bool HasDef = MI->getDesc().getNumDefs();
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
+ unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
+ bool Tied0 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
+ bool Tied1 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
+
+ // If either of the commutable operands are tied to the destination
+ // then we can not commute + fold.
+ if ((HasDef && Reg0 == Reg1 && Tied0) ||
+ (HasDef && Reg0 == Reg2 && Tied1))
+ return nullptr;
+
+ if ((CommuteOpIdx1 == OriginalOpIdx) ||
+ (CommuteOpIdx2 == OriginalOpIdx)) {
+ MachineInstr *CommutedMI = commuteInstruction(MI, false);
+ if (!CommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (CommutedMI != MI) {
+ // New instruction. We can't fold from this.
+ CommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Attempt to fold with the commuted version of the instruction.
+ unsigned CommuteOp =
+ (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align,
+ /*AllowCommute=*/false);
+ if (NewMI)
+ return NewMI;
+
+ // Folding failed again - undo the commute before returning.
+ MachineInstr *UncommutedMI = commuteInstruction(MI, false);
+ if (!UncommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (UncommutedMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ UncommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Return here to prevent duplicate fuse failure report.
+ return nullptr;
+ }
+ }
+ }
+
// No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
@@ -4350,8 +4520,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
if (!RI.needsStackRealignment(MF))
- Alignment = std::min(
- Alignment, MF.getTarget().getFrameLowering()->getStackAlignment());
+ Alignment = std::min(Alignment, MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -4374,7 +4546,27 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
SmallVector<MachineOperand,4> MOs;
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ Size, Alignment, /*AllowCommute=*/true);
+}
+
+static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
+ const MachineFunction &MF) {
+ unsigned Opc = LoadMI.getOpcode();
+ unsigned RegSize =
+ MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
+
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+ // These instructions only load 32 bits, we can't fold them if the
+ // destination register is wider than 32 bits (4 bytes).
+ return true;
+
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+ // These instructions only load 64 bits, we can't fold them if the
+ // destination register is wider than 64 bits (8 bytes).
+ return true;
+
+ return false;
}
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -4384,8 +4576,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
- if (isLoadFromStackSlot(LoadMI, FrameIndex))
+ if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ if (isPartialRegisterLoad(*LoadMI, MF))
+ return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ }
// Check switch flag
if (NoFusing) return nullptr;
@@ -4496,19 +4691,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
default: {
- if ((LoadMI->getOpcode() == X86::MOVSSrm ||
- LoadMI->getOpcode() == X86::VMOVSSrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 4)
- // These instructions only load 32 bits, we can't fold them if the
- // destination register is wider than 32 bits (4 bytes).
- return nullptr;
- if ((LoadMI->getOpcode() == X86::MOVSDrm ||
- LoadMI->getOpcode() == X86::VMOVSDrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 8)
- // These instructions only load 64 bits, we can't fold them if the
- // destination register is wider than 64 bits (8 bytes).
+ if (isPartialRegisterLoad(*LoadMI, MF))
return nullptr;
// Folding a normal load. Just copy the load's address operands.
@@ -4517,7 +4700,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
}
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ /*Size=*/0, Alignment, /*AllowCommute=*/true);
}
@@ -5299,16 +5483,32 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+// This code must remain in sync with getJumpInstrTableEntryBound in this class!
+// In particular, getJumpInstrTableEntryBound must always return an upper bound
+// on the encoding lengths of the instructions generated by
+// getUnconditionalBranch and getTrap.
void X86InstrInfo::getUnconditionalBranch(
MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
Branch.setOpcode(X86::JMP_4);
Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
}
+// This code must remain in sync with getJumpInstrTableEntryBound in this class!
+// In particular, getJumpInstrTableEntryBound must always return an upper bound
+// on the encoding lengths of the instructions generated by
+// getUnconditionalBranch and getTrap.
void X86InstrInfo::getTrap(MCInst &MI) const {
MI.setOpcode(X86::TRAP);
}
+// See getTrap and getUnconditionalBranch for conditions on the value returned
+// by this function.
+unsigned X86InstrInfo::getJumpInstrTableEntryBound() const {
+ // 5 bytes suffice: JMP_4 Symbol@PLT is uses 1 byte (E9) for the JMP_4 and 4
+ // bytes for the symbol offset. And TRAP is ud2, which is two bytes (0F 0B).
+ return 5;
+}
+
bool X86InstrInfo::isHighLatencyDef(int opc) const {
switch (opc) {
default: return false;
@@ -5351,10 +5551,10 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VSQRTSSm:
case X86::VSQRTSSm_Int:
case X86::VSQRTSSr:
- case X86::VSQRTPDZrm:
- case X86::VSQRTPDZrr:
- case X86::VSQRTPSZrm:
- case X86::VSQRTPSZrr:
+ case X86::VSQRTPDZm:
+ case X86::VSQRTPDZr:
+ case X86::VSQRTPSZm:
+ case X86::VSQRTPSZr:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
case X86::VSQRTSDZr:
@@ -5426,7 +5626,7 @@ namespace {
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
unsigned PC;
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
@@ -5524,7 +5724,7 @@ namespace {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to RAX/EAX.
MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
@@ -5545,7 +5745,7 @@ namespace {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index c177e3a..57b1958 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86INSTRUCTIONINFO_H
-#define X86INSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86INSTRINFO_H
+#define LLVM_LIB_TARGET_X86_X86INSTRINFO_H
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86RegisterInfo.h"
@@ -404,7 +404,8 @@ public:
MachineInstr* MI,
unsigned OpNum,
const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Alignment) const;
+ unsigned Size, unsigned Alignment,
+ bool AllowCommute) const;
void
getUnconditionalBranch(MCInst &Branch,
@@ -412,6 +413,8 @@ public:
void getTrap(MCInst &MI) const override;
+ unsigned getJumpInstrTableEntryBound() const override;
+
bool isHighLatencyDef(int opc) const override;
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index e7b532c..3dbf819 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -551,11 +551,6 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
-class ImmZExtAsmOperandClass : AsmOperandClass {
- let SuperClasses = [ImmAsmOperand];
- let RenderMethod = "addImmOperands";
-}
-
def X86GR32orGR64AsmOperand : AsmOperandClass {
let Name = "GR32orGR64";
}
@@ -568,6 +563,7 @@ def AVX512RC : Operand<i32> {
let PrintMethod = "printRoundingControl";
let OperandType = "OPERAND_IMMEDIATE";
}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
@@ -595,12 +591,6 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti32i8";
}
-// [0, 0x000000FF]
-def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
- let Name = "ImmZExtu32u8";
-}
-
-
// [0, 0x0000007F] |
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
@@ -620,11 +610,6 @@ def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
let OperandType = "OPERAND_IMMEDIATE";
}
-// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
-def u32u8imm : Operand<i32> {
- let ParserMatchClass = ImmZExtu32u8AsmOperand;
- let OperandType = "OPERAND_IMMEDIATE";
-}
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64> {
@@ -708,6 +693,7 @@ def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def NoSSE41 : Predicate<"!Subtarget->hasSSE41()">;
def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
@@ -719,10 +705,16 @@ def HasAVX512 : Predicate<"Subtarget->hasAVX512()">,
AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">;
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
-def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
+def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasPFI : Predicate<"Subtarget->hasPFI()">;
def HasERI : Predicate<"Subtarget->hasERI()">;
+def HasDQI : Predicate<"Subtarget->hasDQI()">;
+def NoDQI : Predicate<"!Subtarget->hasDQI()">;
+def HasBWI : Predicate<"Subtarget->hasBWI()">;
+def HasVLX : Predicate<"Subtarget->hasVLX()">,
+ AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
+def NoVLX : Predicate<"!Subtarget->hasVLX()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
@@ -744,8 +736,10 @@ def HasHLE : Predicate<"Subtarget->hasHLE()">;
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
+def HasSGX : Predicate<"Subtarget->hasSGX()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasSMAP : Predicate<"Subtarget->hasSMAP()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
@@ -754,6 +748,8 @@ def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<"Mode64Bit", "64-bit mode">;
+def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">;
+def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">;
def In16BitMode : Predicate<"Subtarget->is16Bit()">,
AssemblerPredicate<"Mode16Bit", "16-bit mode">;
def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
@@ -2396,6 +2392,7 @@ include "X86InstrVMX.td"
include "X86InstrSVM.td"
include "X86InstrTSX.td"
+include "X86InstrSGX.td"
// System instructions.
include "X86InstrSystem.td"
@@ -2514,7 +2511,7 @@ def : MnemonicAlias<"fldcww", "fldcw", "att">;
def : MnemonicAlias<"fnstcww", "fnstcw", "att">;
def : MnemonicAlias<"fnstsww", "fnstsw", "att">;
def : MnemonicAlias<"fucomip", "fucompi", "att">;
-def : MnemonicAlias<"fwait", "wait", "att">;
+def : MnemonicAlias<"fwait", "wait">;
class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index ecf80a1..9001fba 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -38,12 +38,17 @@ def MMX_PHADDSUBD : OpndItins<
>;
}
+let Sched = WriteVecLogic in
+def MMX_INTALU_ITINS_VECLOGICSCHED : OpndItins<
+ IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
+>;
+
let Sched = WriteVecIMul in
def MMX_PMUL_ITINS : OpndItins<
IIC_MMX_PMUL, IIC_MMX_PMUL
>;
-let Sched = WriteVecALU in {
+let Sched = WriteVecIMul in {
def MMX_PSADBW_ITINS : OpndItins<
IIC_MMX_PSADBW, IIC_MMX_PSADBW
>;
@@ -167,12 +172,14 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
+ Sched<[WriteShuffle]>;
def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
- (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -192,11 +199,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
(ins DstRC:$src1, SrcRC:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- NoItinerary, d>;
+ NoItinerary, d>, Sched<[WriteCvtI2F]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- NoItinerary, d>;
+ NoItinerary, d>, Sched<[WriteCvtI2FLd]>;
}
//===----------------------------------------------------------------------===//
@@ -427,13 +434,13 @@ let Constraints = "$src1 = $dst" in
// Logical Instructions
defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
- MMX_INTALU_ITINS, 1>;
+ MMX_INTALU_ITINS_VECLOGICSCHED, 1>;
defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
- MMX_INTALU_ITINS, 1>;
+ MMX_INTALU_ITINS_VECLOGICSCHED, 1>;
defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
- MMX_INTALU_ITINS, 1>;
+ MMX_INTALU_ITINS_VECLOGICSCHED, 1>;
defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
- MMX_INTALU_ITINS>;
+ MMX_INTALU_ITINS_VECLOGICSCHED>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
diff --git a/lib/Target/X86/X86InstrSGX.td b/lib/Target/X86/X86InstrSGX.td
new file mode 100644
index 0000000..47c5dc5
--- /dev/null
+++ b/lib/Target/X86/X86InstrSGX.td
@@ -0,0 +1,24 @@
+//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel SGX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SGX instructions
+
+// ENCLS - Execute an Enclave System Function of Specified Leaf Number
+def ENCLS : I<0x01, MRM_CF, (outs), (ins),
+ "encls", []>, TB, Requires<[HasSGX]>;
+
+// ENCLU - Execute an Enclave User Function of Specified Leaf Number
+def ENCLU : I<0x01, MRM_D7, (outs), (ins),
+ "enclu", []>, TB, Requires<[HasSGX]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f9a5ae1..cc896f0 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -181,6 +181,7 @@ def SSE_MPSADBW_ITINS : OpndItins<
IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
>;
+let Sched = WriteVecIMul in
def SSE_PMULLD_ITINS : OpndItins<
IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
>;
@@ -218,11 +219,21 @@ def DEFAULT_ITINS_BLENDSCHED : OpndItins<
IIC_ALU_NONMEM, IIC_ALU_MEM
>;
+let Sched = WriteVarBlend in
+def DEFAULT_ITINS_VARBLENDSCHED : OpndItins<
+ IIC_ALU_NONMEM, IIC_ALU_MEM
+>;
+
let Sched = WriteFBlend in
def SSE_INTALU_ITINS_FBLEND_P : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
+let Sched = WriteBlend in
+def SSE_INTALU_ITINS_BLEND_P : OpndItins<
+ IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
+>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
@@ -601,29 +612,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
// Patterns
let Predicates = [UseAVX] in {
- let AddedComplexity = 15 in {
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVS{S,D} to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
- def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
-
- // Move low f32 and clear high bits.
- def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4f32 (V_SET0)),
- (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
- def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSSrr (v4i32 (V_SET0)),
- (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
- }
-
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
@@ -659,31 +647,10 @@ let Predicates = [UseAVX] in {
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
- def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0),
- (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
- sub_xmm)>;
- def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0),
- (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
- sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
- // Move low f64 and clear high bits.
- def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2f64 (V_SET0)),
- (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
-
- def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (VMOVSDrr (v2i64 (V_SET0)),
- (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
-
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
@@ -734,7 +701,6 @@ let Predicates = [UseAVX] in {
(EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
sub_xmm)>;
-
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
@@ -750,7 +716,7 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseSSE1] in {
- let AddedComplexity = 15 in {
+ let Predicates = [NoSSE41], AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
@@ -784,7 +750,7 @@ let Predicates = [UseSSE1] in {
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 15 in {
+ let Predicates = [NoSSE41], AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSD to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
@@ -854,6 +820,7 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
Sched<[WriteLoad]>;
}
+let Predicates = [HasAVX, NoVLX] in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
PS, VEX;
@@ -879,20 +846,26 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
PD, VEX, VEX_L;
+}
+
+let Predicates = [UseSSE1] in {
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
PS;
-defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
- "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
- PD;
defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
"movups", SSEPackedSingle, SSE_MOVU_ITINS>,
PS;
+}
+let Predicates = [UseSSE2] in {
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+ PD;
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
PD;
+}
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStore], Predicates = [HasAVX, NoVLX] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -1006,7 +979,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- SchedRW = [WriteMove] in {
+ SchedRW = [WriteFShuffle] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1036,7 +1009,7 @@ let Predicates = [UseSSE2] in
(MOVUPDmr addr:$dst, VR128:$src)>;
// Use vmovaps/vmovups for AVX integer load/store.
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
// 128-bit load/store
def : Pat<(alignedloadv2i64 addr:$src),
(VMOVAPSrm addr:$src)>;
@@ -1251,6 +1224,9 @@ let Predicates = [HasAVX] in {
(VMOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(VMOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
@@ -1298,6 +1274,9 @@ let Predicates = [UseSSE2] in {
(MOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movsd VR128:$src1,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
@@ -1360,6 +1339,11 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
+ // Also handle an i64 load because that may get selected as a faster way to
+ // load the data.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
}
let Predicates = [UseSSE1] in {
@@ -1380,6 +1364,11 @@ let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
+ // Also handle an i64 load because that may get selected as a faster way to
+ // load the data.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
}
//===----------------------------------------------------------------------===//
@@ -2577,18 +2566,17 @@ def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
- Domain d, bit IsConvertibleToThreeAddress = 0> {
+ Domain d> {
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
- let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
- def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
- [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
- Sched<[WriteFShuffle]>;
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteFShuffle]>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2607,10 +2595,10 @@ defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, PS;
+ memopv4f32, SSEPackedSingle>, PS;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, PD;
+ memopv2f64, SSEPackedDouble>, PD;
}
let Predicates = [HasAVX] in {
@@ -3136,7 +3124,6 @@ let Predicates = [UseSSE1] in {
let Predicates = [UseSSE2] in {
// SSE2 patterns to select scalar double-precision fp arithmetic instructions
-
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
(f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))))),
@@ -3156,10 +3143,10 @@ let Predicates = [UseSSE2] in {
}
let Predicates = [UseSSE41] in {
- // If the subtarget has SSE4.1 but not AVX, the vector insert
- // instruction is lowered into a X86insertps rather than a X86Movss.
- // When selecting SSE scalar single-precision fp arithmetic instructions,
- // make sure that we correctly match the X86insertps.
+ // If the subtarget has SSE4.1 but not AVX, the vector insert instruction is
+ // lowered into a X86insertps or a X86Blendi rather than a X86Movss. When
+ // selecting SSE scalar single-precision fp arithmetic instructions, make
+ // sure that we correctly match them.
def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
@@ -3177,6 +3164,57 @@ let Predicates = [UseSSE41] in {
(fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
}
let Predicates = [HasAVX] in {
@@ -3215,6 +3253,57 @@ let Predicates = [HasAVX] in {
(fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv
+ (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ FR32:$src))), (i8 1))),
+ (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (i8 1))),
+ (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv
+ (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ FR64:$src))), (v2f64 VR128:$dst), (i8 2))),
+ (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
}
// Patterns used to select SSE scalar fp arithmetic instructions from
@@ -3269,6 +3358,49 @@ let Predicates = [UseSSE2] in {
(DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
}
+let Predicates = [UseSSE41] in {
+ // With SSE4.1 we may see these operations using X86Blendi rather than
+ // X86Movs{s,d}.
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (ADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (SUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (MULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (DIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+}
+
let Predicates = [HasAVX] in {
// The following patterns select AVX Scalar single/double precision fp
// arithmetic instructions from a packed single precision fp instruction
@@ -3298,6 +3430,46 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst),
(fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))),
(VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+
+ // Also handle X86Blendi-based patterns.
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
+ (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
+ (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
+ (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
+
+ def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VADDSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VMULSDrr_Int v2f64:$dst, v2f64:$src)>;
+ def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)),
+ (v2f64 VR128:$dst), (i8 2))),
+ (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>;
}
/// Unop Arithmetic
@@ -3326,6 +3498,16 @@ def SSE_SQRTSD : OpndItins<
>;
}
+let Sched = WriteFRsqrt in {
+def SSE_RSQRTPS : OpndItins<
+ IIC_SSE_RSQRTPS_RR, IIC_SSE_RSQRTPS_RM
+>;
+
+def SSE_RSQRTSS : OpndItins<
+ IIC_SSE_RSQRTSS_RR, IIC_SSE_RSQRTSS_RM
+>;
+}
+
let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
@@ -3604,10 +3786,10 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>,
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>,
sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
- int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>;
+ int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>;
defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
@@ -3686,6 +3868,7 @@ let Predicates = [UseSSE1] in {
let AddedComplexity = 400 in { // Prefer non-temporal versions
let SchedRW = [WriteStore] in {
+let Predicates = [HasAVX, NoVLX] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3726,6 +3909,7 @@ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)],
IIC_SSE_MOVNT>, VEX, VEX_L;
+}
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3755,6 +3939,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
PS, Requires<[HasSSE2]>;
} // SchedRW = [WriteStore]
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVNTPSmr addr:$dst, VR128:$src)>;
+}
+
+def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (MOVNTPSmr addr:$dst, VR128:$src)>;
+
} // AddedComplexity
//===----------------------------------------------------------------------===//
@@ -5277,6 +5469,13 @@ let Predicates = [HasAVX] in {
(VMOVDDUPYrr VR256:$src)>;
}
+let Predicates = [UseAVX, OptForSize] in {
+ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>;
+}
+
let Predicates = [UseSSE3] in {
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
@@ -5357,56 +5556,34 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
// Patterns used to select 'addsub' instructions.
let Predicates = [HasAVX] in {
- // Constant 170 corresponds to the binary mask '10101010'.
- // When used as a blend mask, it allows selecting eight elements from two
- // input vectors as follow:
- // - Even-numbered values in the destination are copied from
- // the corresponding elements in the first input vector;
- // - Odd-numbered values in the destination are copied from
- // the corresponding elements in the second input vector.
-
- def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
- (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
- (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
-
- // Constant 10 corresponds to the binary mask '1010'.
- // In the two pattens below, constant 10 is used as a blend mask to select
- // - the 1st and 3rd element from the first input vector (the 'fsub' node);
- // - the 2nd and 4th element from the second input vector (the 'fadd' node).
-
- def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
- (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
- def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
- (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
- (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
(VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
- (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
- def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
- (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+ (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+ (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
+
+ def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))),
+ (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 (memop addr:$rhs)))),
+ (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>;
+ def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))),
+ (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))),
+ (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>;
}
let Predicates = [UseSSE3] in {
- // Constant 10 corresponds to the binary mask '1010'.
- // In the pattern below, it is used as a blend mask to select:
- // - the 1st and 3rd element from the first input vector (the fsub node);
- // - the 2nd and 4th element from the second input vector (the fadd node).
-
- def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
- (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))),
(ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
-
- def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
- (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
- (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
- def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
- (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))),
+ (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))),
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))),
+ (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>;
}
//===---------------------------------------------------------------------===//
@@ -6692,7 +6869,7 @@ let Constraints = "$src1 = $dst" in
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -6701,7 +6878,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
Sched<[WriteFShuffle]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
+ (ins VR128:$src1, f32mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
@@ -7308,7 +7485,7 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasAVX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
- memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
+ memopv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
VEX_4V;
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
@@ -7316,7 +7493,7 @@ let Predicates = [HasAVX] in {
}
let Predicates = [HasAVX2] in {
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
- memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
+ memopv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
VEX_4V, VEX_L;
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
@@ -7337,7 +7514,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -7346,7 +7523,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
Sched<[itins.Sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -7360,31 +7537,33 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
- let ExeDomain = SSEPackedSingle in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, loadv4f32, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, loadv8f32,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
- }
- let ExeDomain = SSEPackedDouble in {
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, loadv2f64, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256,VR256, loadv4f64,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
- }
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ VR128, loadv2i64, i128mem, 0,
+ DEFAULT_ITINS_MPSADSCHED>, VEX_4V;
+ }
+
+ let ExeDomain = SSEPackedSingle in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, loadv4f32, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, loadv8f32,
+ f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
+ VEX_4V, VEX_L;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, loadv2f64, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256,VR256, loadv4f64,
+ f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
+ VEX_4V, VEX_L;
+ }
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
VR128, loadv2i64, i128mem, 0,
DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
- defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_MPSADSCHED>, VEX_4V;
- }
+
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, loadv4f32, f128mem, 0,
@@ -7412,6 +7591,10 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv2i64, i128mem,
+ 1, SSE_MPSADBW_ITINS>;
+ }
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
VR128, memopv4f32, f128mem,
@@ -7422,11 +7605,7 @@ let Constraints = "$src1 = $dst" in {
1, SSE_INTALU_ITINS_FBLEND_P>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
VR128, memopv2i64, i128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem,
- 1, SSE_MPSADBW_ITINS>;
- }
+ 1, SSE_INTALU_ITINS_BLEND_P>;
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv4f32, f128mem, 1,
@@ -7545,6 +7724,57 @@ let Predicates = [HasAVX2] in {
(VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
}
+// Patterns
+let Predicates = [UseAVX] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (VBLENDPSrri (v4i32 (V_SET0)), VR128:$src, (i8 1))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+ (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
+ }
+
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0),
+ (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0),
+ (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
+ sub_xmm)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
+
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+ (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>;
+}
+
+let Predicates = [UseSSE41] in {
+ // With SSE41 we can use blends for these patterns.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (BLENDPDrri (v2f64 (V_SET0)), VR128:$src, (i8 1))>;
+}
+
+
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
@@ -7555,7 +7785,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr,
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src2),
@@ -7564,18 +7794,21 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (mem_frag addr:$src2)), XMM0))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
let ExeDomain = SSEPackedDouble in
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem,
- int_x86_sse41_blendvpd>;
+ int_x86_sse41_blendvpd,
+ DEFAULT_ITINS_FBLENDSCHED>;
let ExeDomain = SSEPackedSingle in
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem,
- int_x86_sse41_blendvps>;
+ int_x86_sse41_blendvps,
+ DEFAULT_ITINS_FBLENDSCHED>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
- int_x86_sse41_pblendvb>;
+ int_x86_sse41_pblendvb,
+ DEFAULT_ITINS_VARBLENDSCHED>;
// Aliases with the implicit xmm0 argument
def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
@@ -8393,13 +8626,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX,
+ [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffle]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX,
+ (vt (X86VPermilpi (memop addr:$src1), (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffleLd]>;
}
@@ -8417,19 +8650,37 @@ let ExeDomain = SSEPackedDouble in {
}
let Predicates = [HasAVX] in {
-def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
+ (VPERMILPSYrr VR256:$src1, VR256:$src2)>;
+def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
+ (VPERMILPSYrm VR256:$src1, addr:$src2)>;
+def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))),
+ (VPERMILPDYrr VR256:$src1, VR256:$src2)>;
+def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))),
+ (VPERMILPDYrm VR256:$src1, addr:$src2)>;
+
+def : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (loadv4i64 addr:$src1)),
+def : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)),
(i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilp (loadv4i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))),
+ (VPERMILPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))),
+ (VPERMILPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))),
+ (VPERMILPDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))),
+ (VPERMILPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))),
(VPERMILPDri VR128:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilp (loadv2i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDmi addr:$src1, imm:$imm)>;
}
@@ -8540,15 +8791,15 @@ let Predicates = [HasF16C] in {
// Patterns for matching conversions from float to half-float and vice versa.
let Predicates = [HasF16C] in {
- def : Pat<(f32_to_f16 FR32:$src),
+ def : Pat<(fp_to_f16 FR32:$src),
(i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
(COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
- def : Pat<(f16_to_f32 GR16:$src),
+ def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
(COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >;
- def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))),
+ def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
(VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
}
@@ -8563,13 +8814,13 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
Sched<[WriteBlend]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
@@ -8578,12 +8829,10 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
}
-let isCommutable = 0 in {
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
VR128, loadv2i64, i128mem>;
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
VR256, loadv4i64, i256mem>, VEX_L;
-}
def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
imm:$mask)),
@@ -8675,6 +8924,27 @@ let Predicates = [HasAVX2] in {
def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
(VBROADCASTSDYrr VR128:$src)>;
+ // Provide aliases for broadcast from the same regitser class that
+ // automatically does the extract.
+ def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
+ (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
+ (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
+ (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
+ (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
+ (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
+ sub_xmm)))>;
+ def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))),
+ (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src),
+ sub_xmm)))>;
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
@@ -8756,6 +9026,9 @@ let Predicates = [HasAVX] in {
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
}
+
+ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
}
//===----------------------------------------------------------------------===//
@@ -8763,14 +9036,14 @@ let Predicates = [HasAVX] in {
//
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT> {
+ ValueType OpVT, X86FoldableSchedWrite Sched> {
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
- Sched<[WriteFShuffle256]>, VEX_4V, VEX_L;
+ Sched<[Sched]>, VEX_4V, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
@@ -8778,22 +9051,22 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
[(set VR256:$dst,
(OpVT (X86VPermv VR256:$src1,
(bitconvert (mem_frag addr:$src2)))))]>,
- Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
+ Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32>;
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT> {
+ ValueType OpVT, X86FoldableSchedWrite Sched> {
def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
- Sched<[WriteShuffle256]>, VEX, VEX_L;
+ Sched<[Sched]>, VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
@@ -8801,12 +9074,14 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
(i8 imm:$src2))))]>,
- Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX, VEX_L;
+ Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L;
}
-defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64>, VEX_W;
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
+ WriteShuffle256>, VEX_W;
let ExeDomain = SSEPackedDouble in
-defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64>, VEX_W;
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
+ WriteFShuffle256>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 5402780..8cabdd0 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -462,11 +462,7 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
- def CPUID32 : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB,
- Requires<[Not64BitMode]>;
-let Defs = [RAX, RBX, RCX, RDX], Uses = [RAX, RCX] in
- def CPUID64 : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB,
- Requires<[In64BitMode]>;
+ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -479,10 +475,10 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
-let Defs = [RDX, RAX], Uses = [RCX] in
+let Defs = [EDX, EAX], Uses = [ECX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
-let Uses = [RDX, RAX, RCX] in
+let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
let Uses = [RDX, RAX] in {
@@ -563,7 +559,7 @@ def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
//===----------------------------------------------------------------------===//
// SMAP Instruction
-let Defs = [EFLAGS], Uses = [EFLAGS] in {
+let Predicates = [HasSMAP], Defs = [EFLAGS] in {
def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
}
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
new file mode 100644
index 0000000..d252f72
--- /dev/null
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -0,0 +1,320 @@
+//===-- X86IntinsicsInfo.h - X86 Instrinsics ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the details for lowering X86 intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86INTRINSICSINFO_H
+#define LLVM_LIB_TARGET_X86_X86INTRINSICSINFO_H
+
+namespace llvm {
+
+enum IntrinsicType {
+ INTR_NO_TYPE,
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
+ CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
+ INTR_TYPE_1OP_MASK_RM
+};
+
+struct IntrinsicData {
+
+ unsigned Id;
+ IntrinsicType Type;
+ unsigned Opc0;
+ unsigned Opc1;
+
+ bool operator<(const IntrinsicData &RHS) const {
+ return Id < RHS.Id;
+ }
+ bool operator==(const IntrinsicData &RHS) const {
+ return RHS.Id == Id;
+ }
+};
+
+#define X86_INTRINSIC_DATA(id, type, op0, op1) \
+ { Intrinsic::x86_##id, type, op0, op1 }
+
+/*
+ * IntrinsicsWithChain - the table should be sorted by Intrinsic ID - in
+ * the alphabetical order.
+ */
+static const IntrinsicData IntrinsicsWithChain[] = {
+ X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
+
+ X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
+
+ X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
+ X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
+ X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH,
+ X86::VGATHERPF0DPSm, X86::VGATHERPF1DPSm),
+ X86_INTRINSIC_DATA(avx512_gatherpf_qpd_512, PREFETCH,
+ X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm),
+ X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
+ X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
+
+ X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
+
+ X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
+ X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
+ X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH,
+ X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
+ X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
+ X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
+
+ X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
+ X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
+ X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
+ X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),
+ X86_INTRINSIC_DATA(rdseed_16, RDSEED, X86ISD::RDSEED, 0),
+ X86_INTRINSIC_DATA(rdseed_32, RDSEED, X86ISD::RDSEED, 0),
+ X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
+ X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
+ X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
+
+ X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
+ X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
+ X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
+};
+
+/*
+ * Find Intrinsic data by intrinsic ID
+ */
+static const IntrinsicData* getIntrinsicWithChain(unsigned IntNo) {
+
+ IntrinsicData IntrinsicToFind = {IntNo, INTR_NO_TYPE, 0, 0 };
+ const IntrinsicData *Data = std::lower_bound(std::begin(IntrinsicsWithChain),
+ std::end(IntrinsicsWithChain),
+ IntrinsicToFind);
+ if (Data != std::end(IntrinsicsWithChain) && *Data == IntrinsicToFind)
+ return Data;
+ return nullptr;
+}
+
+/*
+ * IntrinsicsWithoutChain - the table should be sorted by Intrinsic ID - in
+ * the alphabetical order.
+ */
+static const IntrinsicData IntrinsicsWithoutChain[] = {
+ X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_d_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_128, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_256, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_b_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_d_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_q_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_d_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_d_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_d_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_q_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_q_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_q_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(avx_hsub_ps_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(avx_sqrt_pd_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx_sqrt_ps_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(sse2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(sse2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(sse2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(sse2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(sse2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(sse2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(sse2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+ X86_INTRINSIC_DATA(sse2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(sse2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(sse2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(sse2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(sse2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(sse2_sqrt_pd, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse2_ucomile_sd, COMI, X86ISD::UCOMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse2_ucomilt_sd, COMI, X86ISD::UCOMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse2_ucomineq_sd, COMI, X86ISD::UCOMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse3_hadd_pd, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(sse3_hadd_ps, INTR_TYPE_2OP, X86ISD::FHADD, 0),
+ X86_INTRINSIC_DATA(sse3_hsub_pd, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, X86ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, X86ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, X86ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse_comile_ss, COMI, X86ISD::COMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse_sqrt_ps, INTR_TYPE_1OP, ISD::FSQRT, 0),
+ X86_INTRINSIC_DATA(sse_ucomieq_ss, COMI, X86ISD::UCOMI, ISD::SETEQ),
+ X86_INTRINSIC_DATA(sse_ucomige_ss, COMI, X86ISD::UCOMI, ISD::SETGE),
+ X86_INTRINSIC_DATA(sse_ucomigt_ss, COMI, X86ISD::UCOMI, ISD::SETGT),
+ X86_INTRINSIC_DATA(sse_ucomile_ss, COMI, X86ISD::UCOMI, ISD::SETLE),
+ X86_INTRINSIC_DATA(sse_ucomilt_ss, COMI, X86ISD::UCOMI, ISD::SETLT),
+ X86_INTRINSIC_DATA(sse_ucomineq_ss, COMI, X86ISD::UCOMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
+ X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(ssse3_phsub_w_128, INTR_TYPE_2OP, X86ISD::HSUB, 0)
+};
+
+/*
+ * Retrieve data for Intrinsic without chain.
+ * Return nullptr if intrinsic is not defined in the table.
+ */
+static const IntrinsicData* getIntrinsicWithoutChain(unsigned IntNo) {
+ IntrinsicData IntrinsicToFind = { IntNo, INTR_NO_TYPE, 0, 0 };
+ const IntrinsicData *Data = std::lower_bound(std::begin(IntrinsicsWithoutChain),
+ std::end(IntrinsicsWithoutChain),
+ IntrinsicToFind);
+ if (Data != std::end(IntrinsicsWithoutChain) && *Data == IntrinsicToFind)
+ return Data;
+ return nullptr;
+}
+
+static void verifyIntrinsicTables() {
+ assert(std::is_sorted(std::begin(IntrinsicsWithoutChain),
+ std::end(IntrinsicsWithoutChain)) &&
+ std::is_sorted(std::begin(IntrinsicsWithChain),
+ std::end(IntrinsicsWithChain)) &&
+ "Intrinsic data tables should be sorted by Intrinsic ID");
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
deleted file mode 100644
index a082c4f..0000000
--- a/lib/Target/X86/X86JITInfo.cpp
+++ /dev/null
@@ -1,588 +0,0 @@
-//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the X86 target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86JITInfo.h"
-#include "X86Relocations.h"
-#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Valgrind.h"
-#include <cstdlib>
-#include <cstring>
-using namespace llvm;
-
-#define DEBUG_TYPE "jit"
-
-// Determine the platform we're running on
-#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
-# define X86_64_JIT
-#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
-# define X86_32_JIT
-#endif
-
-void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- unsigned char *OldByte = (unsigned char *)Old;
- *OldByte++ = 0xE9; // Emit JMP opcode.
- unsigned *OldWord = (unsigned *)OldByte;
- unsigned NewAddr = (intptr_t)New;
- unsigned OldAddr = (intptr_t)OldWord;
- *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
-
- // X86 doesn't need to invalidate the processor cache, so just invalidate
- // Valgrind's cache directly.
- sys::ValgrindDiscardTranslations(Old, 5);
-}
-
-
-/// JITCompilerFunction - This contains the address of the JIT function used to
-/// compile a function lazily.
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-
-// Get the ASMPREFIX for the current host. This is often '_'.
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__
-#endif
-#define GETASMPREFIX2(X) #X
-#define GETASMPREFIX(X) GETASMPREFIX2(X)
-#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
-
-// For ELF targets, use a .size and .type directive, to let tools
-// know the extent of functions defined in assembler.
-#if defined(__ELF__)
-# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
-# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
-#else
-# define SIZE(sym)
-# define TYPE_FUNCTION(sym)
-#endif
-
-// Provide a convenient way for disabling usage of CFI directives.
-// This is needed for old/broken assemblers (for example, gas on
-// Darwin is pretty old and doesn't support these directives)
-#if defined(__APPLE__)
-# define CFI(x)
-#else
-// FIXME: Disable this until we really want to use it. Also, we will
-// need to add some workarounds for compilers, which support
-// only subset of these directives.
-# define CFI(x)
-#endif
-
-// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional
-// callee saved registers, for the fastcc calling convention.
-extern "C" {
-#if defined(X86_64_JIT)
-# ifndef _MSC_VER
- // No need to save EAX/EDX for X86-64.
- void X86CompilationCallback(void);
- asm(
- ".text\n"
- ".align 8\n"
- ".globl " ASMPREFIX "X86CompilationCallback\n"
- TYPE_FUNCTION(X86CompilationCallback)
- ASMPREFIX "X86CompilationCallback:\n"
- CFI(".cfi_startproc\n")
- // Save RBP
- "pushq %rbp\n"
- CFI(".cfi_def_cfa_offset 16\n")
- CFI(".cfi_offset %rbp, -16\n")
- // Save RSP
- "movq %rsp, %rbp\n"
- CFI(".cfi_def_cfa_register %rbp\n")
- // Save all int arg registers
- "pushq %rdi\n"
- CFI(".cfi_rel_offset %rdi, 0\n")
- "pushq %rsi\n"
- CFI(".cfi_rel_offset %rsi, 8\n")
- "pushq %rdx\n"
- CFI(".cfi_rel_offset %rdx, 16\n")
- "pushq %rcx\n"
- CFI(".cfi_rel_offset %rcx, 24\n")
- "pushq %r8\n"
- CFI(".cfi_rel_offset %r8, 32\n")
- "pushq %r9\n"
- CFI(".cfi_rel_offset %r9, 40\n")
- // Align stack on 16-byte boundary. ESP might not be properly aligned
- // (8 byte) if this is called from an indirect stub.
- "andq $-16, %rsp\n"
- // Save all XMM arg registers
- "subq $128, %rsp\n"
- "movaps %xmm0, (%rsp)\n"
- "movaps %xmm1, 16(%rsp)\n"
- "movaps %xmm2, 32(%rsp)\n"
- "movaps %xmm3, 48(%rsp)\n"
- "movaps %xmm4, 64(%rsp)\n"
- "movaps %xmm5, 80(%rsp)\n"
- "movaps %xmm6, 96(%rsp)\n"
- "movaps %xmm7, 112(%rsp)\n"
- // JIT callee
-#if defined(_WIN64) || defined(__CYGWIN__)
- "subq $32, %rsp\n"
- "movq %rbp, %rcx\n" // Pass prev frame and return address
- "movq 8(%rbp), %rdx\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
- "addq $32, %rsp\n"
-#else
- "movq %rbp, %rdi\n" // Pass prev frame and return address
- "movq 8(%rbp), %rsi\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
-#endif
- // Restore all XMM arg registers
- "movaps 112(%rsp), %xmm7\n"
- "movaps 96(%rsp), %xmm6\n"
- "movaps 80(%rsp), %xmm5\n"
- "movaps 64(%rsp), %xmm4\n"
- "movaps 48(%rsp), %xmm3\n"
- "movaps 32(%rsp), %xmm2\n"
- "movaps 16(%rsp), %xmm1\n"
- "movaps (%rsp), %xmm0\n"
- // Restore RSP
- "movq %rbp, %rsp\n"
- CFI(".cfi_def_cfa_register %rsp\n")
- // Restore all int arg registers
- "subq $48, %rsp\n"
- CFI(".cfi_adjust_cfa_offset 48\n")
- "popq %r9\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %r9\n")
- "popq %r8\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %r8\n")
- "popq %rcx\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rcx\n")
- "popq %rdx\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rdx\n")
- "popq %rsi\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rsi\n")
- "popq %rdi\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rdi\n")
- // Restore RBP
- "popq %rbp\n"
- CFI(".cfi_adjust_cfa_offset -8\n")
- CFI(".cfi_restore %rbp\n")
- "ret\n"
- CFI(".cfi_endproc\n")
- SIZE(X86CompilationCallback)
- );
-# else
- // No inline assembler support on this platform. The routine is in external
- // file.
- void X86CompilationCallback();
-
-# endif
-#elif defined (X86_32_JIT)
-# ifndef _MSC_VER
- void X86CompilationCallback(void);
- asm(
- ".text\n"
- ".align 8\n"
- ".globl " ASMPREFIX "X86CompilationCallback\n"
- TYPE_FUNCTION(X86CompilationCallback)
- ASMPREFIX "X86CompilationCallback:\n"
- CFI(".cfi_startproc\n")
- "pushl %ebp\n"
- CFI(".cfi_def_cfa_offset 8\n")
- CFI(".cfi_offset %ebp, -8\n")
- "movl %esp, %ebp\n" // Standard prologue
- CFI(".cfi_def_cfa_register %ebp\n")
- "pushl %eax\n"
- CFI(".cfi_rel_offset %eax, 0\n")
- "pushl %edx\n" // Save EAX/EDX/ECX
- CFI(".cfi_rel_offset %edx, 4\n")
- "pushl %ecx\n"
- CFI(".cfi_rel_offset %ecx, 8\n")
-# if defined(__APPLE__)
- "andl $-16, %esp\n" // Align ESP on 16-byte boundary
-# endif
- "subl $16, %esp\n"
- "movl 4(%ebp), %eax\n" // Pass prev frame and return address
- "movl %eax, 4(%esp)\n"
- "movl %ebp, (%esp)\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
- "movl %ebp, %esp\n" // Restore ESP
- CFI(".cfi_def_cfa_register %esp\n")
- "subl $12, %esp\n"
- CFI(".cfi_adjust_cfa_offset 12\n")
- "popl %ecx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ecx\n")
- "popl %edx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %edx\n")
- "popl %eax\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %eax\n")
- "popl %ebp\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ebp\n")
- "ret\n"
- CFI(".cfi_endproc\n")
- SIZE(X86CompilationCallback)
- );
-
- // Same as X86CompilationCallback but also saves XMM argument registers.
- void X86CompilationCallback_SSE(void);
- asm(
- ".text\n"
- ".align 8\n"
- ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
- TYPE_FUNCTION(X86CompilationCallback_SSE)
- ASMPREFIX "X86CompilationCallback_SSE:\n"
- CFI(".cfi_startproc\n")
- "pushl %ebp\n"
- CFI(".cfi_def_cfa_offset 8\n")
- CFI(".cfi_offset %ebp, -8\n")
- "movl %esp, %ebp\n" // Standard prologue
- CFI(".cfi_def_cfa_register %ebp\n")
- "pushl %eax\n"
- CFI(".cfi_rel_offset %eax, 0\n")
- "pushl %edx\n" // Save EAX/EDX/ECX
- CFI(".cfi_rel_offset %edx, 4\n")
- "pushl %ecx\n"
- CFI(".cfi_rel_offset %ecx, 8\n")
- "andl $-16, %esp\n" // Align ESP on 16-byte boundary
- // Save all XMM arg registers
- "subl $64, %esp\n"
- // FIXME: provide frame move information for xmm registers.
- // This can be tricky, because CFA register is ebp (unaligned)
- // and we need to produce offsets relative to it.
- "movaps %xmm0, (%esp)\n"
- "movaps %xmm1, 16(%esp)\n"
- "movaps %xmm2, 32(%esp)\n"
- "movaps %xmm3, 48(%esp)\n"
- "subl $16, %esp\n"
- "movl 4(%ebp), %eax\n" // Pass prev frame and return address
- "movl %eax, 4(%esp)\n"
- "movl %ebp, (%esp)\n"
- "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
- "addl $16, %esp\n"
- "movaps 48(%esp), %xmm3\n"
- CFI(".cfi_restore %xmm3\n")
- "movaps 32(%esp), %xmm2\n"
- CFI(".cfi_restore %xmm2\n")
- "movaps 16(%esp), %xmm1\n"
- CFI(".cfi_restore %xmm1\n")
- "movaps (%esp), %xmm0\n"
- CFI(".cfi_restore %xmm0\n")
- "movl %ebp, %esp\n" // Restore ESP
- CFI(".cfi_def_cfa_register esp\n")
- "subl $12, %esp\n"
- CFI(".cfi_adjust_cfa_offset 12\n")
- "popl %ecx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ecx\n")
- "popl %edx\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %edx\n")
- "popl %eax\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %eax\n")
- "popl %ebp\n"
- CFI(".cfi_adjust_cfa_offset -4\n")
- CFI(".cfi_restore %ebp\n")
- "ret\n"
- CFI(".cfi_endproc\n")
- SIZE(X86CompilationCallback_SSE)
- );
-# else
- void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
-
- _declspec(naked) void X86CompilationCallback(void) {
- __asm {
- push ebp
- mov ebp, esp
- push eax
- push edx
- push ecx
- and esp, -16
- sub esp, 16
- mov eax, dword ptr [ebp+4]
- mov dword ptr [esp+4], eax
- mov dword ptr [esp], ebp
- call LLVMX86CompilationCallback2
- mov esp, ebp
- sub esp, 12
- pop ecx
- pop edx
- pop eax
- pop ebp
- ret
- }
- }
-
-# endif // _MSC_VER
-
-#else // Not an i386 host
- void X86CompilationCallback() {
- llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
- }
-#endif
-}
-
-/// This is the target-specific function invoked by the
-/// function stub when we did not know the real target of a call. This function
-/// must locate the start of the stub or call site and pass it into the JIT
-/// compiler function.
-extern "C" {
-LLVM_ATTRIBUTE_USED // Referenced from inline asm.
-LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr,
- intptr_t RetAddr) {
- intptr_t *RetAddrLoc = &StackPtr[1];
- // We are reading raw stack data here. Tell MemorySanitizer that it is
- // sufficiently initialized.
- __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc));
- assert(*RetAddrLoc == RetAddr &&
- "Could not find return address on the stack!");
-
- // It's a stub if there is an interrupt marker after the call.
- bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
-
- // The call instruction should have pushed the return value onto the stack...
-#if defined (X86_64_JIT)
- RetAddr--; // Backtrack to the reference itself...
-#else
- RetAddr -= 4; // Backtrack to the reference itself...
-#endif
-
-#if 0
- DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
- << " ESP=" << (void*)StackPtr
- << ": Resolving call to function: "
- << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
-#endif
-
- // Sanity check to make sure this really is a call instruction.
-#if defined (X86_64_JIT)
- assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
- assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
-#else
- assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
-#endif
-
- intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
-
- // Rewrite the call target... so that we don't end up here every time we
- // execute the call.
-#if defined (X86_64_JIT)
- assert(isStub &&
- "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
- " the call instruction varies too much.");
-#else
- *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
-#endif
-
- if (isStub) {
- // If this is a stub, rewrite the call into an unconditional branch
- // instruction so that two return addresses are not pushed onto the stack
- // when the requested function finally gets called. This also makes the
- // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
-#if defined (X86_64_JIT)
- // If the target address is within 32-bit range of the stub, use a
- // PC-relative branch instead of loading the actual address. (This is
- // considerably shorter than the 64-bit immediate load already there.)
- // We assume here intptr_t is 64 bits.
- intptr_t diff = NewVal-RetAddr+7;
- if (diff >= -2147483648LL && diff <= 2147483647LL) {
- *(unsigned char*)(RetAddr-0xc) = 0xE9;
- *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
- } else {
- *(intptr_t *)(RetAddr - 0xa) = NewVal;
- ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
- }
- sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
-#else
- ((unsigned char*)RetAddr)[-1] = 0xE9;
- sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
-#endif
- }
-
- // Change the return address to reexecute the call instruction...
-#if defined (X86_64_JIT)
- *RetAddrLoc -= 0xd;
-#else
- *RetAddrLoc -= 5;
-#endif
-}
-}
-
-TargetJITInfo::LazyResolverFn
-X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
- TsanIgnoreWritesBegin();
- JITCompilerFunction = F;
- TsanIgnoreWritesEnd();
-
-#if defined (X86_32_JIT) && !defined (_MSC_VER)
-#if defined(__SSE__)
- // SSE Callback should be called for SSE-enabled LLVM.
- return X86CompilationCallback_SSE;
-#else
- if (useSSE)
- return X86CompilationCallback_SSE;
-#endif
-#endif
-
- return X86CompilationCallback;
-}
-
-X86JITInfo::X86JITInfo(bool UseSSE) {
- useSSE = UseSSE;
- useGOT = 0;
- TLSOffset = nullptr;
-}
-
-void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
- JITCodeEmitter &JCE) {
-#if defined (X86_64_JIT)
- const unsigned Alignment = 8;
- uint8_t Buffer[8];
- uint8_t *Cur = Buffer;
- MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr);
- MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32));
-#else
- const unsigned Alignment = 4;
- uint8_t Buffer[4];
- uint8_t *Cur = Buffer;
- MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr);
-#endif
- return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment);
-}
-
-TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
- // The 64-bit stub contains:
- // movabs r10 <- 8-byte-target-address # 10 bytes
- // call|jmp *r10 # 3 bytes
- // The 32-bit stub contains a 5-byte call|jmp.
- // If the stub is a call to the compilation callback, an extra byte is added
- // to mark it as a stub.
- StubLayout Result = {14, 4};
- return Result;
-}
-
-void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
- JITCodeEmitter &JCE) {
- // Note, we cast to intptr_t here to silence a -pedantic warning that
- // complains about casting a function pointer to a normal pointer.
-#if defined (X86_32_JIT) && !defined (_MSC_VER)
- bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
- Target != (void*)(intptr_t)X86CompilationCallback_SSE);
-#else
- bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
-#endif
- JCE.emitAlignment(4);
- void *Result = (void*)JCE.getCurrentPCValue();
- if (NotCC) {
-#if defined (X86_64_JIT)
- JCE.emitByte(0x49); // REX prefix
- JCE.emitByte(0xB8+2); // movabsq r10
- JCE.emitWordLE((unsigned)(intptr_t)Target);
- JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
- JCE.emitByte(0x41); // REX prefix
- JCE.emitByte(0xFF); // jmpq *r10
- JCE.emitByte(2 | (4 << 3) | (3 << 6));
-#else
- JCE.emitByte(0xE9);
- JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
-#endif
- return Result;
- }
-
-#if defined (X86_64_JIT)
- JCE.emitByte(0x49); // REX prefix
- JCE.emitByte(0xB8+2); // movabsq r10
- JCE.emitWordLE((unsigned)(intptr_t)Target);
- JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
- JCE.emitByte(0x41); // REX prefix
- JCE.emitByte(0xFF); // callq *r10
- JCE.emitByte(2 | (2 << 3) | (3 << 6));
-#else
- JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
-
- JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
-#endif
-
- // This used to use 0xCD, but that value is used by JITMemoryManager to
- // initialize the buffer with garbage, which means it may follow a
- // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929.
- JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
- return Result;
-}
-
-/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
-/// specific basic block.
-uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
-#if defined(X86_64_JIT)
- return BB - Entry;
-#else
- return BB - PICBase;
-#endif
-}
-
-template<typename T> static void addUnaligned(void *Pos, T Delta) {
- T Value;
- std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
- sizeof(T));
- Value += Delta;
- std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value),
- sizeof(T));
-}
-
-/// relocate - Before the JIT can run a block of code that has been emitted,
-/// it must rewrite the code to contain the actual addresses of any
-/// referenced global symbols.
-void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
- intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
- switch ((X86::RelocationType)MR->getRelocationType()) {
- case X86::reloc_pcrel_word: {
- // PC relative relocation, add the relocated value to the value already in
- // memory, after we adjust it for where the PC is.
- ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
- addUnaligned<unsigned>(RelocPos, ResultPtr);
- break;
- }
- case X86::reloc_picrel_word: {
- // PIC base relative relocation, add the relocated value to the value
- // already in memory, after we adjust it for where the PIC base is.
- ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
- addUnaligned<unsigned>(RelocPos, ResultPtr);
- break;
- }
- case X86::reloc_absolute_word:
- case X86::reloc_absolute_word_sext:
- // Absolute relocation, just add the relocated value to the value already
- // in memory.
- addUnaligned<unsigned>(RelocPos, ResultPtr);
- break;
- case X86::reloc_absolute_dword:
- addUnaligned<intptr_t>(RelocPos, ResultPtr);
- break;
- }
- }
-}
-
-char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
-#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
- TLSOffset -= size;
- return TLSOffset;
-#else
- llvm_unreachable("Cannot allocate thread local storage on this arch!");
-#endif
-}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
deleted file mode 100644
index 564343f..0000000
--- a/lib/Target/X86/X86JITInfo.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the X86 implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86JITINFO_H
-#define X86JITINFO_H
-
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetJITInfo.h"
-
-namespace llvm {
- class X86Subtarget;
-
- class X86JITInfo : public TargetJITInfo {
- uintptr_t PICBase;
- char *TLSOffset;
- bool useSSE;
- public:
- explicit X86JITInfo(bool UseSSE);
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
-
- /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
- /// to emit an indirect symbol which contains the address of the specified
- /// ptr.
- void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
- JITCodeEmitter &JCE) override;
-
- // getStubLayout - Returns the size and alignment of the largest call stub
- // on X86.
- StubLayout getStubLayout() override;
-
- /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
- /// small native function that simply calls the function at the specified
- /// address.
- void *emitFunctionStub(const Function* F, void *Target,
- JITCodeEmitter &JCE) override;
-
- /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
- /// specific basic block.
- uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) override;
-
- /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
-
- /// relocate - Before the JIT can run a block of code that has been emitted,
- /// it must rewrite the code to contain the actual addresses of any
- /// referenced global symbols.
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) override;
-
- /// allocateThreadLocalMemory - Each target has its own way of
- /// handling thread local variables. This method returns a value only
- /// meaningful to the target.
- char* allocateThreadLocalMemory(size_t size) override;
-
- /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
- /// PIC jumptable entry.
- void setPICBase(uintptr_t Base) { PICBase = Base; }
- uintptr_t getPICBase() const { return PICBase; }
- };
-}
-
-#endif
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 2bd70a9..4e0d594 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,20 +16,25 @@
#include "X86RegisterInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
+#include "Utils/X86ShuffleDecode.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
@@ -58,6 +63,53 @@ private:
} // end anonymous namespace
+// Emit a minimal sequence of nops spanning NumBytes bytes.
+static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
+ const MCSubtargetInfo &STI);
+
+namespace llvm {
+ X86AsmPrinter::StackMapShadowTracker::StackMapShadowTracker(TargetMachine &TM)
+ : TM(TM), InShadow(false), RequiredShadowSize(0), CurrentShadowSize(0) {}
+
+ X86AsmPrinter::StackMapShadowTracker::~StackMapShadowTracker() {}
+
+ void
+ X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &MF) {
+ CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
+ *TM.getSubtargetImpl()->getInstrInfo(),
+ *TM.getSubtargetImpl()->getRegisterInfo(), *TM.getSubtargetImpl(),
+ MF.getContext()));
+ }
+
+ void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ if (InShadow) {
+ SmallString<256> Code;
+ SmallVector<MCFixup, 4> Fixups;
+ raw_svector_ostream VecOS(Code);
+ CodeEmitter->EncodeInstruction(Inst, VecOS, Fixups, STI);
+ VecOS.flush();
+ CurrentShadowSize += Code.size();
+ if (CurrentShadowSize >= RequiredShadowSize)
+ InShadow = false; // The shadow is big enough. Stop counting.
+ }
+ }
+
+ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
+ MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
+ if (InShadow && CurrentShadowSize < RequiredShadowSize) {
+ InShadow = false;
+ EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
+ TM.getSubtarget<X86Subtarget>().is64Bit(), STI);
+ }
+ }
+
+ void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
+ OutStreamer.EmitInstruction(Inst, getSubtargetInfo());
+ SMShadowTracker.count(Inst, getSubtargetInfo());
+ }
+} // end llvm namespace
+
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
: Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()),
@@ -72,7 +124,7 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
GetSymbolFromOperand(const MachineOperand &MO) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
SmallString<128> Name;
@@ -212,7 +264,8 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
Ctx);
- if (MO.isJTI() && MAI.hasSetDirective()) {
+ if (MO.isJTI()) {
+ assert(MAI.doesSetDirectiveSuppressesReloc());
// If .set directive is supported, use it to reduce the number of
// relocations the assembler will generate for differences between
// local labels. This is only safe when the symbols are in the same
@@ -531,14 +584,38 @@ ReSimplify:
// Atomic load and store require a separate pseudo-inst because Acquire
// implies mayStore and Release implies mayLoad; fix these to regular MOV
// instructions here
- case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
- case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
- case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
- case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
- case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
- case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
- case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
- case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+ case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
+ case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
+ case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
+ case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
+ case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
+ case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
+ case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify;
+ case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify;
+ case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
+ case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
+ case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
+ case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
+ case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
+ case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
+ case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
+ case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
+ case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
+ case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
+ case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
+ case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
+ case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
+ case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
+ case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify;
+ case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify;
+ case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify;
+ case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify;
+ case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify;
+ case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify;
+ case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify;
+ case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify;
// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for
@@ -602,10 +679,8 @@ ReSimplify:
}
}
-static void LowerTlsAddr(MCStreamer &OutStreamer,
- X86MCInstLower &MCInstLowering,
- const MachineInstr &MI,
- const MCSubtargetInfo& STI) {
+void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
@@ -615,7 +690,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCContext &context = OutStreamer.getContext();
if (needsPadding)
- OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI);
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
@@ -662,12 +737,12 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
LEA.addOperand(MCOperand::CreateReg(0)); // seg
}
- OutStreamer.EmitInstruction(LEA, STI);
+ EmitAndCountInstruction(LEA);
if (needsPadding) {
- OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI);
- OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI);
- OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX), STI);
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
}
StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
@@ -677,9 +752,9 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCSymbolRefExpr::VK_PLT,
context);
- OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
- : X86::CALLpcrel32)
- .addExpr(tlsRef), STI);
+ EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
+ : X86::CALLpcrel32)
+ .addExpr(tlsRef));
}
/// \brief Emit the optimal amount of multi-byte nops on X86.
@@ -725,10 +800,9 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSu
break;
case X86::NOOPL:
case X86::NOOPW:
- OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg).addImm(ScaleVal)
- .addReg(IndexReg)
- .addImm(Displacement)
- .addReg(SegmentReg), STI);
+ OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg)
+ .addImm(ScaleVal).addReg(IndexReg)
+ .addImm(Displacement).addReg(SegmentReg), STI);
break;
}
} // while (NumBytes)
@@ -736,22 +810,20 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSu
// Lower a stackmap of the form:
// <id>, <shadowBytes>, ...
-static void LowerSTACKMAP(MCStreamer &OS, StackMaps &SM,
- const MachineInstr &MI, bool Is64Bit, const MCSubtargetInfo& STI) {
- unsigned NumBytes = MI.getOperand(1).getImm();
+void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
SM.recordStackMap(MI);
- // Emit padding.
- // FIXME: These nops ensure that the stackmap's shadow is covered by
- // instructions from the same basic block, but the nops should not be
- // necessary if instructions from the same block follow the stackmap.
- EmitNops(OS, NumBytes, Is64Bit, STI);
+ unsigned NumShadowBytes = MI.getOperand(1).getImm();
+ SMShadowTracker.reset(NumShadowBytes);
}
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
-static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,
- const MachineInstr &MI, bool Is64Bit, const MCSubtargetInfo& STI) {
- assert(Is64Bit && "Patchpoint currently only supports X86-64");
+void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI) {
+ assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
+
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+
SM.recordPatchPoint(MI);
PatchPointOpers opers(&MI);
@@ -766,22 +838,111 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,
EncodedBytes = 13;
else
EncodedBytes = 12;
- OS.EmitInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg)
- .addImm(CallTarget), STI);
- OS.EmitInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg), STI);
+ EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg)
+ .addImm(CallTarget));
+ EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
}
// Emit padding.
unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
assert(NumBytes >= EncodedBytes &&
"Patchpoint can't request size less than the length of a call.");
- EmitNops(OS, NumBytes - EncodedBytes, Is64Bit, STI);
+ EmitNops(OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
+ getSubtargetInfo());
+}
+
+// Returns instruction preceding MBBI in MachineFunction.
+// If MBBI is the first instruction of the first basic block, returns null.
+static MachineBasicBlock::const_iterator
+PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
+ const MachineBasicBlock *MBB = MBBI->getParent();
+ while (MBBI == MBB->begin()) {
+ if (MBB == MBB->getParent()->begin())
+ return nullptr;
+ MBB = MBB->getPrevNode();
+ MBBI = MBB->end();
+ }
+ return --MBBI;
+}
+
+static const Constant *getConstantFromPool(const MachineInstr &MI,
+ const MachineOperand &Op) {
+ if (!Op.isCPI())
+ return nullptr;
+
+ ArrayRef<MachineConstantPoolEntry> Constants =
+ MI.getParent()->getParent()->getConstantPool()->getConstants();
+ const MachineConstantPoolEntry &ConstantEntry =
+ Constants[Op.getIndex()];
+
+ // Bail if this is a machine constant pool entry, we won't be able to dig out
+ // anything useful.
+ if (ConstantEntry.isMachineConstantPoolEntry())
+ return nullptr;
+
+ auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal);
+ assert((!C || ConstantEntry.getType() == C->getType()) &&
+ "Expected a constant of the same type!");
+ return C;
+}
+
+static std::string getShuffleComment(const MachineOperand &DstOp,
+ const MachineOperand &SrcOp,
+ ArrayRef<int> Mask) {
+ std::string Comment;
+
+ // Compute the name for a register. This is really goofy because we have
+ // multiple instruction printers that could (in theory) use different
+ // names. Fortunately most people use the ATT style (outside of Windows)
+ // and they actually agree on register naming here. Ultimately, this is
+ // a comment, and so its OK if it isn't perfect.
+ auto GetRegisterName = [](unsigned RegNum) -> StringRef {
+ return X86ATTInstPrinter::getRegisterName(RegNum);
+ };
+
+ StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
+ StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem";
+
+ raw_string_ostream CS(Comment);
+ CS << DstName << " = ";
+ bool NeedComma = false;
+ bool InSrc = false;
+ for (int M : Mask) {
+ // Wrap up any prior entry...
+ if (M == SM_SentinelZero && InSrc) {
+ InSrc = false;
+ CS << "]";
+ }
+ if (NeedComma)
+ CS << ",";
+ else
+ NeedComma = true;
+
+ // Print this shuffle...
+ if (M == SM_SentinelZero) {
+ CS << "zero";
+ } else {
+ if (!InSrc) {
+ InSrc = true;
+ CS << SrcName << "[";
+ }
+ if (M == SM_SentinelUndef)
+ CS << "u";
+ else
+ CS << M;
+ }
+ }
+ if (InSrc)
+ CS << "]";
+ CS.flush();
+
+ return Comment;
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
- const X86RegisterInfo *RI =
- static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
+ const X86RegisterInfo *RI = static_cast<const X86RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
@@ -812,7 +973,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TLS_addr64:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
- return LowerTlsAddr(OutStreamer, MCInstLowering, *MI, getSubtargetInfo());
+ return LowerTlsAddr(MCInstLowering, *MI);
case X86::MOVPC32r: {
// This is a pseudo op for a two instruction sequence with a label, which
@@ -825,15 +986,15 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::CALLpcrel32)
+ EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
// popl $reg
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::POP32r)
- .addReg(MI->getOperand(0).getReg()));
+ EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
+ .addReg(MI->getOperand(0).getReg()));
return;
}
@@ -863,7 +1024,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
DotExpr, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::ADD32ri)
+ EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(DotExpr));
@@ -871,21 +1032,21 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(OutStreamer, SM, *MI, Subtarget->is64Bit(), getSubtargetInfo());
+ return LowerSTACKMAP(*MI);
case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), getSubtargetInfo());
+ return LowerPATCHPOINT(*MI);
case X86::MORESTACK_RET:
- EmitToStreamer(OutStreamer, MCInstBuilder(getRetOpcode(*Subtarget)));
+ EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
return;
case X86::MORESTACK_RET_RESTORE_R10:
// Return, then restore R10.
- EmitToStreamer(OutStreamer, MCInstBuilder(getRetOpcode(*Subtarget)));
- EmitToStreamer(OutStreamer, MCInstBuilder(X86::MOV64rr)
- .addReg(X86::R10)
- .addReg(X86::RAX));
+ EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
+ EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::R10)
+ .addReg(X86::RAX));
return;
case X86::SEH_PushReg:
@@ -918,9 +1079,151 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::SEH_EndPrologue:
OutStreamer.EmitWinCFIEndProlog();
return;
+
+ case X86::SEH_Epilogue: {
+ MachineBasicBlock::const_iterator MBBI(MI);
+ // Check if preceded by a call and emit nop if so.
+ for (MBBI = PrevCrossBBInst(MBBI); MBBI; MBBI = PrevCrossBBInst(MBBI)) {
+ // Conservatively assume that pseudo instructions don't emit code and keep
+ // looking for a call. We may emit an unnecessary nop in some cases.
+ if (!MBBI->isPseudo()) {
+ if (MBBI->isCall())
+ EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
+ break;
+ }
+ }
+ return;
+ }
+
+ // Lower PSHUFB and VPERMILP normally but add a comment if we can find
+ // a constant shuffle mask. We won't be able to do this at the MC layer
+ // because the mask isn't an immediate.
+ case X86::PSHUFBrm:
+ case X86::VPSHUFBrm:
+ case X86::VPSHUFBYrm: {
+ if (!OutStreamer.isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() > 5 &&
+ "We should always have at least 5 operands!");
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ const MachineOperand &MaskOp = MI->getOperand(5);
+
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 16> Mask;
+ DecodePSHUFBMask(C, Mask);
+ if (!Mask.empty())
+ OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ }
+ break;
+ }
+ case X86::VPERMILPSrm:
+ case X86::VPERMILPDrm:
+ case X86::VPERMILPSYrm:
+ case X86::VPERMILPDYrm: {
+ if (!OutStreamer.isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() > 5 &&
+ "We should always have at least 5 operands!");
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ const MachineOperand &MaskOp = MI->getOperand(5);
+
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 16> Mask;
+ DecodeVPERMILPMask(C, Mask);
+ if (!Mask.empty())
+ OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ }
+ break;
+ }
+
+ // For loads from a constant pool to a vector register, print the constant
+ // loaded.
+ case X86::MOVAPDrm:
+ case X86::VMOVAPDrm:
+ case X86::VMOVAPDYrm:
+ case X86::MOVUPDrm:
+ case X86::VMOVUPDrm:
+ case X86::VMOVUPDYrm:
+ case X86::MOVAPSrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVAPSYrm:
+ case X86::MOVUPSrm:
+ case X86::VMOVUPSrm:
+ case X86::VMOVUPSYrm:
+ case X86::MOVDQArm:
+ case X86::VMOVDQArm:
+ case X86::VMOVDQAYrm:
+ case X86::MOVDQUrm:
+ case X86::VMOVDQUrm:
+ case X86::VMOVDQUYrm:
+ if (!OutStreamer.isVerboseAsm())
+ break;
+ if (MI->getNumOperands() > 4)
+ if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ CS << "[";
+ for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) {
+ if (i != 0)
+ CS << ",";
+ if (CDS->getElementType()->isIntegerTy())
+ CS << CDS->getElementAsInteger(i);
+ else if (CDS->getElementType()->isFloatTy())
+ CS << CDS->getElementAsFloat(i);
+ else if (CDS->getElementType()->isDoubleTy())
+ CS << CDS->getElementAsDouble(i);
+ else
+ CS << "?";
+ }
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ CS << "<";
+ for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
+ if (i != 0)
+ CS << ",";
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ CS << "u";
+ } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
+ CS << CI->getZExtValue();
+ } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
+ SmallString<32> Str;
+ CF->getValueAPF().toString(Str);
+ CS << Str;
+ } else {
+ CS << "?";
+ }
+ }
+ CS << ">";
+ OutStreamer.AddComment(CS.str());
+ }
+ }
+ break;
}
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
- EmitToStreamer(OutStreamer, TmpInst);
+
+ // Stackmap shadows cannot include branch targets, so we can count the bytes
+ // in a call towards the shadow, but must ensure that the no thread returns
+ // in to the stackmap shadow. The only way to achieve this is if the call
+ // is at the end of the shadow.
+ if (MI->isCall()) {
+ // Count then size of the call towards the shadow
+ SMShadowTracker.count(TmpInst, getSubtargetInfo());
+ // Then flush the shadow so that we fill with nops before the call, not
+ // after it.
+ SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo());
+ // Then emit the call
+ OutStreamer.EmitInstruction(TmpInst, getSubtargetInfo());
+ return;
+ }
+
+ EmitAndCountInstruction(TmpInst);
}
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 78d20ce..79a51b3 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -11,10 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86MACHINEFUNCTIONINFO_H
-#define X86MACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include <vector>
namespace llvm {
@@ -70,6 +72,22 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
unsigned NumLocalDynamics;
public:
+ /// Describes a register that needs to be forwarded from the prologue to a
+ /// musttail call.
+ struct Forward {
+ Forward(unsigned VReg, MCPhysReg PReg, MVT VT)
+ : VReg(VReg), PReg(PReg), VT(VT) {}
+ unsigned VReg;
+ MCPhysReg PReg;
+ MVT VT;
+ };
+
+private:
+ /// ForwardedMustTailRegParms - A list of virtual and physical registers
+ /// that must be forwarded to every musttail call.
+ std::vector<Forward> ForwardedMustTailRegParms;
+
+public:
X86MachineFunctionInfo() : ForceFramePointer(false),
CalleeSavedFrameSize(0),
BytesToPopOnReturn(0),
@@ -138,6 +156,9 @@ public:
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+ std::vector<Forward> &getForwardedMustTailRegParms() {
+ return ForwardedMustTailRegParms;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 6639875..adc05b2 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -105,7 +105,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
if (!TM->getSubtarget<X86Subtarget>().padShortFunctions())
return false;
- TII = TM->getInstrInfo();
+ TII = TM->getSubtargetImpl()->getInstrInfo();
// Search through basic blocks and mark the ones that have early returns
ReturnBBs.clear();
@@ -195,7 +195,8 @@ bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
return true;
}
- CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
+ CyclesToEnd += TII->getInstrLatency(
+ TM->getSubtargetImpl()->getInstrItineraryData(), MI);
}
VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index e8a7e84..a4a366d 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -68,8 +68,10 @@ X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
if (Is64Bit) {
SlotSize = 8;
- StackPtr = X86::RSP;
- FramePtr = X86::RBP;
+ StackPtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
+ X86::RSP : X86::ESP;
+ FramePtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ?
+ X86::RBP : X86::EBP;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
@@ -120,7 +122,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass*
X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
// Don't allow super-classes of GR8_NOREX. This class is only used after
- // extrating sub_8bit_hi sub-registers. The H sub-registers cannot be copied
+ // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
// to the full GR8 register class in 64-bit mode, so we cannot allow the
// reigster class inflation.
//
@@ -196,7 +198,7 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
unsigned
X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
@@ -324,7 +326,7 @@ X86RegisterInfo::getNoPreservedMask() const {
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
// Set the stack-pointer register and its aliases as reserved.
for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
@@ -441,7 +443,8 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ MF.getSubtarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -456,13 +459,9 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (Reg == FramePtr && TFI->hasFP(MF)) {
- FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
- return true;
- }
- return false;
+ // Since X86 defines assignCalleeSavedSpillSlots which always return true
+ // this function neither used nor tested.
+ llvm_unreachable("Unused function on X86. Otherwise need a test case.");
}
void
@@ -473,7 +472,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr;
@@ -488,6 +487,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else
BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+ // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
+ // register as source operand, semantic is the same and destination is
+ // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
+ if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
+ BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false);
+
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
@@ -526,7 +531,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 74efd1f..cc0a7b2 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86REGISTERINFO_H
-#define X86REGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
+#define LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 33c402b..311a717 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -166,6 +166,7 @@ def FP3 : X86Reg<"fp3", 0>;
def FP4 : X86Reg<"fp4", 0>;
def FP5 : X86Reg<"fp5", 0>;
def FP6 : X86Reg<"fp6", 0>;
+def FP7 : X86Reg<"fp7", 0>;
// XMM Registers, used by the various SSE instruction set extensions.
def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
@@ -234,22 +235,18 @@ let SubRegIndices = [sub_ymm] in {
def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
-class STRegister<string n, bits<16> Enc, list<Register> A> : X86Reg<n, Enc> {
- let Aliases = A;
-}
-
// Floating point stack registers. These don't map one-to-one to the FP
// pseudo registers, but we still mark them as aliasing FP registers. That
// way both kinds can be live without exceeding the stack depth. ST registers
// are only live around inline assembly.
-def ST0 : STRegister<"st(0)", 0, []>, DwarfRegNum<[33, 12, 11]>;
-def ST1 : STRegister<"st(1)", 1, [FP6]>, DwarfRegNum<[34, 13, 12]>;
-def ST2 : STRegister<"st(2)", 2, [FP5]>, DwarfRegNum<[35, 14, 13]>;
-def ST3 : STRegister<"st(3)", 3, [FP4]>, DwarfRegNum<[36, 15, 14]>;
-def ST4 : STRegister<"st(4)", 4, [FP3]>, DwarfRegNum<[37, 16, 15]>;
-def ST5 : STRegister<"st(5)", 5, [FP2]>, DwarfRegNum<[38, 17, 16]>;
-def ST6 : STRegister<"st(6)", 6, [FP1]>, DwarfRegNum<[39, 18, 17]>;
-def ST7 : STRegister<"st(7)", 7, [FP0]>, DwarfRegNum<[40, 19, 18]>;
+def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
+def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
+def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
+def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
+def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>;
+def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
+def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
+def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
// Floating-point status word
def FPSW : X86Reg<"fpsw", 0>;
@@ -449,7 +446,7 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
}
// AVX-512 vector/mask registers.
-def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512,
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512,
(sequence "ZMM%u", 0, 31)>;
// Scalar AVX-512 floating point registers.
@@ -463,13 +460,19 @@ def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
256, (sequence "YMM%u", 0, 31)>;
-// The size of the all masked registers is 16 bit because we have only one
-// KMOVW istruction that can store this register in memory, and it writes 2 bytes
-def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)>;
-def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK1)> {let Size = 16;}
+// Mask registers
+def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
+def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
+def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
+def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
+def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
+def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;}
+def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
+def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
-def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
-
+def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
+def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
+def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
deleted file mode 100644
index 0333056..0000000
--- a/lib/Target/X86/X86Relocations.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- X86Relocations.h - X86 Code Relocations -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the X86 target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86RELOCATIONS_H
-#define X86RELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
- namespace X86 {
- /// RelocationType - An enum for the x86 relocation codes. Note that
- /// the terminology here doesn't follow x86 convention - word means
- /// 32-bit and dword means 64-bit. The relocations will be treated
- /// by JIT or ObjectCode emitters, this is transparent to the x86 code
- /// emitter but JIT and ObjectCode will treat them differently
- enum RelocationType {
- /// reloc_pcrel_word - PC relative relocation, add the relocated value to
- /// the value already in memory, after we adjust it for where the PC is.
- reloc_pcrel_word = 0,
-
- /// reloc_picrel_word - PIC base relative relocation, add the relocated
- /// value to the value already in memory, after we adjust it for where the
- /// PIC base is.
- reloc_picrel_word = 1,
-
- /// reloc_absolute_word - absolute relocation, just add the relocated
- /// value to the value already in memory.
- reloc_absolute_word = 2,
-
- /// reloc_absolute_word_sext - absolute relocation, just add the relocated
- /// value to the value already in memory. In object files, it represents a
- /// value which must be sign-extended when resolving the relocation.
- reloc_absolute_word_sext = 3,
-
- /// reloc_absolute_dword - absolute relocation, just add the relocated
- /// value to the value already in memory.
- reloc_absolute_dword = 4
- };
- }
-}
-
-#endif
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 6966d61..73a3230 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -48,13 +48,17 @@ def HWPort6 : ProcResource<1>;
def HWPort7 : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
+def HWPort01 : ProcResGroup<[HWPort0, HWPort1]>;
def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
+def HWPort04 : ProcResGroup<[HWPort0, HWPort4]>;
def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
-def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
+def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>;
+def HWPort56 : ProcResGroup<[HWPort5, HWPort6]>;
def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
+def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
// 60 Entry Unified Scheduler
@@ -125,6 +129,7 @@ defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
+defm : HWWriteResPair<WriteFRsqrt, HWPort0, 5>;
defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
@@ -261,4 +266,1882 @@ def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
def : WriteRes<WriteNop, []>;
+
+//================ Exceptions ================//
+
+//-- Specific Scheduling Models --//
+
+// Starting with P0.
+def WriteP0 : SchedWriteRes<[HWPort0]>;
+
+def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+
+def WriteP01 : SchedWriteRes<[HWPort01]>;
+
+def Write2P01 : SchedWriteRes<[HWPort01]> {
+ let NumMicroOps = 2;
+}
+def Write3P01 : SchedWriteRes<[HWPort01]> {
+ let NumMicroOps = 3;
+}
+
+def WriteP015 : SchedWriteRes<[HWPort015]>;
+
+def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> {
+ let NumMicroOps = 2;
+}
+def WriteP06 : SchedWriteRes<[HWPort06]>;
+
+def Write2P06 : SchedWriteRes<[HWPort06]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+
+def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+
+def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
+ let NumMicroOps = 2;
+}
+
+def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> {
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+
+def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> {
+ let Latency = 6;
+ let ResourceCycles = [2, 1];
+}
+
+def Write5P0156 : SchedWriteRes<[HWPort0156]> {
+ let NumMicroOps = 5;
+ let ResourceCycles = [5];
+}
+
+def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 2, 1];
+}
+
+def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 2, 1];
+}
+
+def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [3, 2, 1];
+}
+
+// Starting with P1.
+def WriteP1 : SchedWriteRes<[HWPort1]>;
+
+def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
+ let NumMicroOps = 2;
+}
+def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> {
+ let Latency = 3;
+}
+def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> {
+ let Latency = 7;
+}
+
+def Write2P1 : SchedWriteRes<[HWPort1]> {
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> {
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def WriteP15 : SchedWriteRes<[HWPort15]>;
+def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> {
+ let Latency = 4;
+}
+
+def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+
+def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+
+// Starting with P2.
+def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 1];
+}
+
+// Starting with P5.
+def WriteP5 : SchedWriteRes<[HWPort5]>;
+def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+
+// Notation:
+// - r: register.
+// - mm: 64 bit mmx register.
+// - x = 128 bit xmm register.
+// - (x)mm = mmx or xmm register.
+// - y = 256 bit ymm register.
+// - v = any vector register.
+// - m = memory.
+
+//=== Integer Instructions ===//
+//-- Move instructions --//
+
+// MOV.
+// r16,m.
+def : InstRW<[WriteALULd], (instregex "MOV16rm")>;
+
+// MOVSX, MOVZX.
+// r,m.
+def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
+
+// CMOVcc.
+// r,r.
+def : InstRW<[Write2P0156_Lat2],
+ (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>;
+// r,m.
+def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd],
+ (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>;
+
+// XCHG.
+// r,r.
+def WriteXCHG : SchedWriteRes<[HWPort0156]> {
+ let Latency = 2;
+ let ResourceCycles = [3];
+}
+
+def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
+
+// r,m.
+def WriteXCHGrm : SchedWriteRes<[]> {
+ let Latency = 21;
+ let NumMicroOps = 8;
+}
+def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>;
+
+// XLAT.
+def WriteXLAT : SchedWriteRes<[]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteXLAT], (instregex "XLAT")>;
+
+// PUSH.
+// m.
+def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>;
+
+// PUSHF.
+def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>;
+
+// PUSHA.
+def WritePushA : SchedWriteRes<[]> {
+ let NumMicroOps = 19;
+}
+def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>;
+
+// POP.
+// m.
+def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>;
+
+// POPF.
+def WritePopF : SchedWriteRes<[]> {
+ let NumMicroOps = 9;
+}
+def : InstRW<[WritePopF], (instregex "POPF(16|32)")>;
+
+// POPA.
+def WritePopA : SchedWriteRes<[]> {
+ let NumMicroOps = 18;
+}
+def : InstRW<[WritePopA], (instregex "POPA(16|32)")>;
+
+// LAHF SAHF.
+def : InstRW<[WriteP06], (instregex "(S|L)AHF")>;
+
+// BSWAP.
+// r32.
+def WriteBSwap32 : SchedWriteRes<[HWPort15]>;
+def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>;
+
+// r64.
+def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>;
+
+// MOVBE.
+// r16,m16 / r64,m64.
+def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>;
+
+// r32, m32.
+def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>;
+
+// m16,r16.
+def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>;
+
+// m32,r32.
+def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>;
+
+// m64,r64.
+def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>;
+
+//-- Arithmetic instructions --//
+
+// ADD SUB.
+// m,r/i.
+def : InstRW<[Write2P0156_2P237_P4],
+ (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
+ "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>;
+
+// ADC SBB.
+// r,r/i.
+def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)",
+ "(ADC|SBB)(16|32|64)ri8",
+ "(ADC|SBB)64ri32",
+ "(ADC|SBB)(8|16|32|64)rr_REV")>;
+
+// r,m.
+def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>;
+
+// m,r/i.
+def : InstRW<[Write3P0156_2P237_P4],
+ (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
+ "(ADC|SBB)(16|32|64)mi8",
+ "(ADC|SBB)64mi32")>;
+
+// INC DEC NOT NEG.
+// m.
+def : InstRW<[WriteP0156_2P237_P4],
+ (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
+ "(INC|DEC)64(16|32)m")>;
+
+// MUL IMUL.
+// r16.
+def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>;
+
+// m16.
+def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>;
+
+// r32.
+def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>;
+
+// m32.
+def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>;
+
+// r64.
+def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>;
+
+// m64.
+def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>;
+
+// r16,r16.
+def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>;
+
+// r16,m16.
+def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>;
+
+// MULX.
+// r32,r32,r32.
+def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteMulX32], (instregex "MULX32rr")>;
+
+// r32,r32,m32.
+def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>;
+
+// r64,r64,r64.
+def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteMulX64], (instregex "MULX64rr")>;
+
+// r64,r64,m64.
+def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>;
+
+// DIV.
+// r8.
+def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 22;
+ let NumMicroOps = 9;
+}
+def : InstRW<[WriteDiv8], (instregex "DIV8r")>;
+
+// r16.
+def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 23;
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteDiv16], (instregex "DIV16r")>;
+
+// r32.
+def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 22;
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteDiv32], (instregex "DIV32r")>;
+
+// r64.
+def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 32;
+ let NumMicroOps = 36;
+}
+def : InstRW<[WriteDiv64], (instregex "DIV64r")>;
+
+// IDIV.
+// r8.
+def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 23;
+ let NumMicroOps = 9;
+}
+def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>;
+
+// r16.
+def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 23;
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>;
+
+// r32.
+def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 22;
+ let NumMicroOps = 9;
+}
+def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>;
+
+// r64.
+def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> {
+ let Latency = 39;
+ let NumMicroOps = 59;
+}
+def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>;
+
+//-- Logic instructions --//
+
+// AND OR XOR.
+// m,r/i.
+def : InstRW<[Write2P0156_2P237_P4],
+ (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
+ "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
+
+// SHR SHL SAR.
+// m,i.
+def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
+
+// r,cl.
+def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>;
+
+// m,cl.
+def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> {
+ let NumMicroOps = 6;
+ let ResourceCycles = [3, 2, 1];
+}
+def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>;
+
+// ROR ROL.
+// r,1.
+def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>;
+
+// m,i.
+def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 2, 1];
+}
+def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>;
+
+// r,cl.
+def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>;
+
+// m,cl.
+def WriteRotateRMWCL : SchedWriteRes<[]> {
+ let NumMicroOps = 6;
+}
+def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>;
+
+// RCR RCL.
+// r,1.
+def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>;
+
+// m,1.
+def WriteRCm1 : SchedWriteRes<[]> {
+ let NumMicroOps = 6;
+}
+def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>;
+
+// r,i.
+def WriteRCri : SchedWriteRes<[HWPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+}
+def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>;
+
+// m,i.
+def WriteRCmi : SchedWriteRes<[]> {
+ let NumMicroOps = 11;
+}
+def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>;
+
+// SHRD SHLD.
+// r,r,i.
+def WriteShDrr : SchedWriteRes<[HWPort1]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>;
+
+// m,r,i.
+def WriteShDmr : SchedWriteRes<[]> {
+ let NumMicroOps = 5;
+}
+def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>;
+
+// r,r,cl.
+def WriteShlDCL : SchedWriteRes<[HWPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>;
+
+// r,r,cl.
+def WriteShrDCL : SchedWriteRes<[HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>;
+
+// m,r,cl.
+def WriteShDmrCL : SchedWriteRes<[]> {
+ let NumMicroOps = 7;
+}
+def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>;
+
+// BT.
+// r,r/i.
+def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
+
+// m,r.
+def WriteBTmr : SchedWriteRes<[]> {
+ let NumMicroOps = 10;
+}
+def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>;
+
+// m,i.
+def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
+
+// BTR BTS BTC.
+// r,r,i.
+def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
+
+// m,r.
+def WriteBTRSCmr : SchedWriteRes<[]> {
+ let NumMicroOps = 11;
+}
+def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>;
+
+// m,i.
+def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>;
+
+// BSF BSR.
+// r,r.
+def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>;
+
+// SETcc.
+// r.
+def : InstRW<[WriteShift],
+ (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>;
+// m.
+def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteSetCCm],
+ (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
+
+// CLD STD.
+def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>;
+
+// LZCNT TZCNT.
+// r,r.
+def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>;
+
+// ANDN.
+// r,r.
+def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>;
+
+// BLSI BLSMSK BLSR.
+// r,r.
+def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>;
+// r,m.
+def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>;
+
+// BEXTR.
+// r,r,r.
+def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>;
+// r,m,r.
+def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>;
+
+// BZHI.
+// r,r,r.
+def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>;
+// r,m,r.
+def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>;
+
+// PDEP PEXT.
+// r,r,r.
+def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
+// r,m,r.
+def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
+
+//-- Control transfer instructions --//
+
+// J(E|R)CXZ.
+def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>;
+
+// LOOP.
+def WriteLOOP : SchedWriteRes<[]> {
+ let NumMicroOps = 7;
+}
+def : InstRW<[WriteLOOP], (instregex "LOOP")>;
+
+// LOOP(N)E
+def WriteLOOPE : SchedWriteRes<[]> {
+ let NumMicroOps = 11;
+}
+def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>;
+
+// CALL.
+// r.
+def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>;
+
+// m.
+def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>;
+
+// RET.
+def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>;
+
+// i.
+def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> {
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>;
+
+// BOUND.
+// r,m.
+def WriteBOUND : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>;
+
+// INTO.
+def WriteINTO : SchedWriteRes<[]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteINTO], (instregex "INTO")>;
+
+//-- String instructions --//
+
+// LODSB/W.
+def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>;
+
+// LODSD/Q.
+def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>;
+
+// STOS.
+def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>;
+
+// MOVS.
+def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 1, 2];
+}
+def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>;
+
+// SCAS.
+def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>;
+
+// CMPS.
+def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2, 3];
+}
+def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>;
+
+//-- Synchronization instructions --//
+
+// XADD.
+def WriteXADD : SchedWriteRes<[]> {
+ let NumMicroOps = 5;
+}
+def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>;
+
+// CMPXCHG.
+def WriteCMPXCHG : SchedWriteRes<[]> {
+ let NumMicroOps = 6;
+}
+def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
+
+// CMPXCHG8B.
+def WriteCMPXCHG8B : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>;
+
+// CMPXCHG16B.
+def WriteCMPXCHG16B : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>;
+
+//-- Other --//
+
+// PAUSE.
+def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> {
+ let NumMicroOps = 5;
+ let ResourceCycles = [1, 3];
+}
+def : InstRW<[WritePAUSE], (instregex "PAUSE")>;
+
+// LEAVE.
+def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>;
+
+// XGETBV.
+def WriteXGETBV : SchedWriteRes<[]> {
+ let NumMicroOps = 8;
+}
+def : InstRW<[WriteXGETBV], (instregex "XGETBV")>;
+
+// RDTSC.
+def WriteRDTSC : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteRDTSC], (instregex "RDTSC")>;
+
+// RDPMC.
+def WriteRDPMC : SchedWriteRes<[]> {
+ let NumMicroOps = 34;
+}
+def : InstRW<[WriteRDPMC], (instregex "RDPMC")>;
+
+// RDRAND.
+def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> {
+ let NumMicroOps = 17;
+ let ResourceCycles = [1, 16];
+}
+def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>;
+
+//=== Floating Point x87 Instructions ===//
+//-- Move instructions --//
+
+// FLD.
+// m80.
+def : InstRW<[WriteP01], (instregex "LD_Frr")>;
+
+def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 2];
+}
+def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>;
+
+// FBLD.
+// m80.
+def WriteFBLD : SchedWriteRes<[]> {
+ let Latency = 47;
+ let NumMicroOps = 43;
+}
+def : InstRW<[WriteFBLD], (instregex "FBLDm")>;
+
+// FST(P).
+// r.
+def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>;
+
+// m80.
+def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> {
+ let NumMicroOps = 7;
+ let ResourceCycles = [3, 2, 2];
+}
+def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>;
+
+// FBSTP.
+// m80.
+def WriteFBSTP : SchedWriteRes<[]> {
+ let NumMicroOps = 226;
+}
+def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>;
+
+// FXCHG.
+def : InstRW<[WriteNop], (instregex "XCH_F")>;
+
+// FILD.
+def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>;
+
+// FIST(P) FISTTP.
+def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>;
+
+// FLDZ.
+def : InstRW<[WriteP01], (instregex "LD_F0")>;
+
+// FLD1.
+def : InstRW<[Write2P01], (instregex "LD_F1")>;
+
+// FLDPI FLDL2E etc.
+def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>;
+
+// FCMOVcc.
+def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>;
+
+// FNSTSW.
+// AX.
+def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>;
+
+// m16.
+def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>;
+
+// FLDCW.
+def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>;
+
+// FNSTCW.
+def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>;
+
+// FINCSTP FDECSTP.
+def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>;
+
+// FFREE.
+def : InstRW<[WriteP01], (instregex "FFREE")>;
+
+// FNSAVE.
+def WriteFNSAVE : SchedWriteRes<[]> {
+ let NumMicroOps = 147;
+}
+def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>;
+
+// FRSTOR.
+def WriteFRSTOR : SchedWriteRes<[]> {
+ let NumMicroOps = 90;
+}
+def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>;
+
+//-- Arithmetic instructions --//
+
+// FABS.
+def : InstRW<[WriteP0], (instregex "ABS_F")>;
+
+// FCHS.
+def : InstRW<[WriteP0], (instregex "CHS_F")>;
+
+// FCOM(P) FUCOM(P).
+// r.
+def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr",
+ "UCOM_FPr")>;
+// m.
+def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>;
+
+// FCOMPP FUCOMPP.
+// r.
+def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>;
+
+// FCOMI(P) FUCOMI(P).
+// m.
+def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr",
+ "UCOM_FIPr")>;
+
+// FICOM(P).
+def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>;
+
+// FTST.
+def : InstRW<[WriteP1], (instregex "TST_F")>;
+
+// FXAM.
+def : InstRW<[Write2P1], (instregex "FXAM")>;
+
+// FPREM.
+def WriteFPREM : SchedWriteRes<[]> {
+ let Latency = 19;
+ let NumMicroOps = 28;
+}
+def : InstRW<[WriteFPREM], (instregex "FPREM")>;
+
+// FPREM1.
+def WriteFPREM1 : SchedWriteRes<[]> {
+ let Latency = 27;
+ let NumMicroOps = 41;
+}
+def : InstRW<[WriteFPREM1], (instregex "FPREM1")>;
+
+// FRNDINT.
+def WriteFRNDINT : SchedWriteRes<[]> {
+ let Latency = 11;
+ let NumMicroOps = 17;
+}
+def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>;
+
+//-- Math instructions --//
+
+// FSCALE.
+def WriteFSCALE : SchedWriteRes<[]> {
+ let Latency = 75; // 49-125
+ let NumMicroOps = 50; // 25-75
+}
+def : InstRW<[WriteFSCALE], (instregex "FSCALE")>;
+
+// FXTRACT.
+def WriteFXTRACT : SchedWriteRes<[]> {
+ let Latency = 15;
+ let NumMicroOps = 17;
+}
+def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>;
+
+//-- Other instructions --//
+
+// FNOP.
+def : InstRW<[WriteP01], (instregex "FNOP")>;
+
+// WAIT.
+def : InstRW<[Write2P01], (instregex "WAIT")>;
+
+// FNCLEX.
+def : InstRW<[Write5P0156], (instregex "FNCLEX")>;
+
+// FNINIT.
+def WriteFNINIT : SchedWriteRes<[]> {
+ let NumMicroOps = 26;
+}
+def : InstRW<[WriteFNINIT], (instregex "FNINIT")>;
+
+//=== Integer MMX and XMM Instructions ===//
+//-- Move instructions --//
+
+// MOVD.
+// r32/64 <- (x)mm.
+def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr",
+ "VMOVPDI2DIrr", "MOVPDI2DIrr")>;
+
+// (x)mm <- r32/64.
+def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr",
+ "VMOVDI2PDIrr", "MOVDI2PDIrr")>;
+
+// MOVQ.
+// r64 <- (x)mm.
+def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>;
+
+// (x)mm <- r64.
+def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>;
+
+// (x)mm <- (x)mm.
+def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>;
+
+// (V)MOVDQA/U.
+// x <- x.
+def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr",
+ "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV",
+ "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>;
+
+// MOVDQ2Q.
+def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>;
+
+// MOVQ2DQ.
+def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>;
+
+
+// PACKSSWB/DW.
+// mm <- mm.
+def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr",
+ "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
+
+// mm <- m64.
+def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 3];
+}
+def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm",
+ "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
+
+// VPMOVSX/ZX BW BD BQ DW DQ.
+// y <- x.
+def WriteVPMOVSX : SchedWriteRes<[HWPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+}
+def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
+
+// PBLENDW.
+// x,x,i / v,v,v,i
+def WritePBLENDWr : SchedWriteRes<[HWPort5]>;
+def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>;
+
+// x,m,i / v,v,m,i
+def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> {
+ let NumMicroOps = 2;
+ let Latency = 4;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>;
+
+// VPBLENDD.
+// v,v,v,i.
+def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>;
+def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>;
+
+// v,v,m,i
+def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> {
+ let NumMicroOps = 2;
+ let Latency = 4;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>;
+
+// MASKMOVQ.
+def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 2];
+}
+def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>;
+
+// MASKMOVDQU.
+def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 10;
+ let ResourceCycles = [4, 2, 4];
+}
+def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>;
+
+// VPMASKMOV D/Q.
+// v,v,m.
+def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVPMASKMOVr, ReadAfterLd],
+ (instregex "VPMASKMOV(D|Q)(Y?)rm")>;
+
+// m, v,v.
+def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
+
+// PMOVMSKB.
+def WritePMOVMSKB : SchedWriteRes<[HWPort0]> {
+ let Latency = 3;
+}
+def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>;
+
+// PEXTR B/W/D/Q.
+// r32,x,i.
+def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>;
+
+// m8,x,i.
+def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> {
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>;
+
+// VPBROADCAST B/W.
+// x, m8/16.
+def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd],
+ (instregex "VPBROADCAST(B|W)rm")>;
+
+// y, m8/16
+def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd],
+ (instregex "VPBROADCAST(B|W)Yrm")>;
+
+// VPGATHERDD.
+// x.
+def WriteVPGATHERDD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>;
+
+// y.
+def WriteVPGATHERDD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 34;
+}
+def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>;
+
+// VPGATHERQD.
+// x.
+def WriteVPGATHERQD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>;
+
+// y.
+def WriteVPGATHERQD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>;
+
+// VPGATHERDQ.
+// x.
+def WriteVPGATHERDQ128 : SchedWriteRes<[]> {
+ let NumMicroOps = 12;
+}
+def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>;
+
+// y.
+def WriteVPGATHERDQ256 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>;
+
+// VPGATHERQQ.
+// x.
+def WriteVPGATHERQQ128 : SchedWriteRes<[]> {
+ let NumMicroOps = 14;
+}
+def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>;
+
+// y.
+def WriteVPGATHERQQ256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>;
+
+//-- Arithmetic instructions --//
+
+// PHADD|PHSUB (S) W/D.
+// v <- v,v.
+def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64",
+ "MMX_PHADDSWrr64",
+ "MMX_PHSUB(W|D)rr64",
+ "MMX_PHSUBSWrr64",
+ "(V?)PH(ADD|SUB)(W|D)(Y?)rr",
+ "(V?)PH(ADD|SUB)SWrr(256)?")>;
+
+// v <- v,m.
+def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WritePHADDSUBm, ReadAfterLd],
+ (instregex "MMX_PHADD(W?)rm64",
+ "MMX_PHADDSWrm64",
+ "MMX_PHSUB(W|D)rm64",
+ "MMX_PHSUBSWrm64",
+ "(V?)PH(ADD|SUB)(W|D)(Y?)rm",
+ "(V?)PH(ADD|SUB)SWrm(128|256)?")>;
+
+// PCMPGTQ.
+// v <- v,v.
+def WritePCMPGTQr : SchedWriteRes<[HWPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
+
+// v <- v,m.
+def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>;
+
+// PMULLD.
+// x,x / y,y,y.
+def WritePMULLDr : SchedWriteRes<[HWPort0]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>;
+
+// x,m / y,y,m.
+def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>;
+
+//-- Logic instructions --//
+
+// PTEST.
+// v,v.
+def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>;
+
+// v,m.
+def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>;
+
+// PSLL,PSRL,PSRA W/D/Q.
+// x,x / v,v,x.
+def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>;
+
+// PSLL,PSRL DQ.
+def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>;
+
+//-- Other --//
+
+// EMMS.
+def WriteEMMS : SchedWriteRes<[]> {
+ let Latency = 13;
+ let NumMicroOps = 31;
+}
+def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>;
+
+//=== Floating Point XMM and YMM Instructions ===//
+//-- Move instructions --//
+
+// MOVMSKP S/D.
+// r32 <- x.
+def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> {
+ let Latency = 3;
+}
+def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>;
+
+// r32 <- y.
+def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> {
+ let Latency = 2;
+}
+def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>;
+
+// VPERM2F128.
+def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>;
+def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>;
+
+// BLENDVP S/D.
+def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>;
+def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>;
+
+// VBROADCASTF128.
+def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>;
+
+// EXTRACTPS.
+// r32,x,i.
+def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
+
+// m32,x,i.
+def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
+
+// VEXTRACTF128.
+// x,y,i.
+def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>;
+
+// m128,y,i.
+def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>;
+
+// VINSERTF128.
+// y,y,x,i.
+def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>;
+
+// y,y,m128,i.
+def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>;
+
+// VMASKMOVP S/D.
+// v,v,m.
+def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>;
+
+// m128,x,x.
+def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>;
+
+// m256,y,y.
+def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
+
+// VGATHERDPS.
+// x.
+def WriteVGATHERDPS128 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>;
+
+// y.
+def WriteVGATHERDPS256 : SchedWriteRes<[]> {
+ let NumMicroOps = 34;
+}
+def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>;
+
+// VGATHERQPS.
+// x.
+def WriteVGATHERQPS128 : SchedWriteRes<[]> {
+ let NumMicroOps = 15;
+}
+def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>;
+
+// y.
+def WriteVGATHERQPS256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>;
+
+// VGATHERDPD.
+// x.
+def WriteVGATHERDPD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 12;
+}
+def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>;
+
+// y.
+def WriteVGATHERDPD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 20;
+}
+def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>;
+
+// VGATHERQPD.
+// x.
+def WriteVGATHERQPD128 : SchedWriteRes<[]> {
+ let NumMicroOps = 14;
+}
+def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>;
+
+// y.
+def WriteVGATHERQPD256 : SchedWriteRes<[]> {
+ let NumMicroOps = 22;
+}
+def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>;
+
+//-- Conversion instructions --//
+
+// CVTPD2PS.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>;
+
+// x,m128.
+def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>;
+
+// x,y.
+def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>;
+
+// x,m256.
+def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>;
+
+// CVTSD2SS.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>;
+
+// x,m64.
+def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>;
+
+// CVTPS2PD.
+// x,x.
+def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>;
+
+// x,m64.
+// y,m128.
+def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>;
+
+// y,x.
+def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>;
+
+// CVTSS2SD.
+// x,x.
+def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>;
+
+// x,m32.
+def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>;
+
+// CVTDQ2PD.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>;
+
+// y,x.
+def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>;
+
+// CVT(T)PD2DQ.
+// x,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>;
+// x,m128.
+def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>;
+// x,y.
+def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>;
+// x,m256.
+def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>;
+
+// CVT(T)PS2PI.
+// mm,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>;
+
+// CVTPI2PD.
+// x,mm.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>;
+
+// CVT(T)PD2PI.
+// mm,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>;
+
+// CVSTSI2SS.
+// x,r32.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>;
+
+// CVT(T)SS2SI.
+// r32,x.
+def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>;
+// r32,m32.
+def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>;
+
+// CVTSI2SD.
+// x,r32/64.
+def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>;
+
+// CVTSD2SI.
+// r32/64
+def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>;
+// r32,m32.
+def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>;
+
+// VCVTPS2PH.
+// x,v,i.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>;
+// m,v,i.
+def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>;
+
+// VCVTPH2PS.
+// v,x.
+def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>;
+
+//-- Arithmetic instructions --//
+
+// HADD, HSUB PS/PD
+// x,x / v,v,v.
+def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>;
+
+// x,m / v,v,m.
+def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 2, 1];
+}
+def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>;
+
+// MULL SS/SD PS/PD.
+// x,x / v,v,v.
+def WriteMULr : SchedWriteRes<[HWPort01]> {
+ let Latency = 5;
+}
+def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>;
+
+// x,m / v,v,m.
+def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>;
+
+// VDIVPS.
+// y,y,y.
+def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 19; // 18-21 cycles.
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>;
+
+// y,y,m256.
+def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 23; // 18-21 + 4 cycles.
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>;
+
+// VDIVPD.
+// y,y,y.
+def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 27; // 19-35 cycles.
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>;
+
+// y,y,m256.
+def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 31; // 19-35 + 4 cycles.
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>;
+
+// VRCPPS.
+// y,y.
+def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>;
+
+// y,m256.
+def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>;
+
+// ROUND SS/SD PS/PD.
+// v,v,i.
+def WriteROUNDr : SchedWriteRes<[HWPort1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>;
+
+// v,m,i.
+def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>;
+
+// DPPS.
+// x,x,i / v,v,v,i.
+def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>;
+
+// x,m,i / v,v,m,i.
+def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> {
+ let Latency = 18;
+ let NumMicroOps = 6;
+ let ResourceCycles = [2, 1, 1, 1, 1];
+}
+def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>;
+
+// DPPD.
+// x,x,i.
+def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>;
+
+// x,m,i.
+def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>;
+
+// VFMADD.
+// v,v,v.
+def WriteFMADDr : SchedWriteRes<[HWPort01]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+}
+def : InstRW<[WriteFMADDr],
+ (instregex
+ // 3p forms.
+ "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
+ // 3s forms.
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r",
+ // 4s/4s_int forms.
+ "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
+ // 4p forms.
+ "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>;
+
+// v,v,m.
+def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteFMADDm],
+ (instregex
+ // 3p forms.
+ "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
+ // 3s forms.
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m",
+ // 4s/4s_int forms.
+ "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
+ // 4p forms.
+ "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>;
+
+//-- Math instructions --//
+
+// VSQRTPS.
+// y,y.
+def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 19;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
+
+// y,m256.
+def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 23;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>;
+
+// VSQRTPD.
+// y,y.
+def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 28;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
+
+// y,m256.
+def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 32;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>;
+
+// RSQRT SS/PS.
+// x,x.
+def WriteRSQRTr : SchedWriteRes<[HWPort0]> {
+ let Latency = 5;
+}
+def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>;
+
+// x,m128.
+def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 1];
+}
+def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>;
+
+// RSQRTPS 256.
+// y,y.
+def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2, 1];
+}
+def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>;
+
+// y,m256.
+def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2, 1, 1];
+}
+def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>;
+
+//-- Logic instructions --//
+
+// AND, ANDN, OR, XOR PS/PD.
+// x,x / v,v,v.
+def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>;
+// x,m / v,v,m.
+def : InstRW<[WriteP5Ld, ReadAfterLd],
+ (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>;
+
+//-- Other instructions --//
+
+// VZEROUPPER.
+def WriteVZEROUPPER : SchedWriteRes<[]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>;
+
+// VZEROALL.
+def WriteVZEROALL : SchedWriteRes<[]> {
+ let NumMicroOps = 12;
+}
+def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>;
+
+// LDMXCSR.
+def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1, 1, 1];
+}
+def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>;
+
+// STMXCSR.
+def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1, 1, 1, 1];
+}
+def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 83f0534..eca65c2 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -117,6 +117,7 @@ defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
+defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>;
defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 25c5a6b..a261356 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -63,12 +63,13 @@ def WriteZero : SchedWrite;
defm WriteJump : X86SchedWritePair;
// Floating point. This covers both scalar and vector operations.
-defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
-defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
-defm WriteFDiv : X86SchedWritePair; // Floating point division.
-defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
-defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
-defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
+defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
+defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
@@ -314,6 +315,11 @@ def IIC_SSE_SQRTPD_RM : InstrItinClass;
def IIC_SSE_SQRTSD_RR : InstrItinClass;
def IIC_SSE_SQRTSD_RM : InstrItinClass;
+def IIC_SSE_RSQRTPS_RR : InstrItinClass;
+def IIC_SSE_RSQRTPS_RM : InstrItinClass;
+def IIC_SSE_RSQRTSS_RR : InstrItinClass;
+def IIC_SSE_RSQRTSS_RM : InstrItinClass;
+
def IIC_SSE_RCPP_RR : InstrItinClass;
def IIC_SSE_RCPP_RM : InstrItinClass;
def IIC_SSE_RCPS_RR : InstrItinClass;
@@ -633,9 +639,12 @@ def GenericModel : SchedMachineModel {
let MicroOpBufferSize = 32;
let LoadLatency = 4;
let HighLatency = 10;
+ let PostRAScheduler = 0;
}
include "X86ScheduleAtom.td"
include "X86SchedSandyBridge.td"
include "X86SchedHaswell.td"
include "X86ScheduleSLM.td"
+include "X86ScheduleBtVer2.td"
+
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 3256ee7..4c559c9 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -224,6 +224,11 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<62, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<62, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RSQRTPS_RR, [InstrStage<9, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RSQRTPS_RM, [InstrStage<10, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_RSQRTSS_RR, [InstrStage<4, [Port0]>] >,
+ InstrItinData<IIC_SSE_RSQRTSS_RM, [InstrStage<4, [Port0]>] >,
+
InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
@@ -538,6 +543,7 @@ def AtomModel : SchedMachineModel {
// On the Atom, the throughput for taken branches is 2 cycles. For small
// simple loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
+ let PostRAScheduler = 1;
let Itineraries = AtomItineraries;
}
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
new file mode 100644
index 0000000..ce1ece3
--- /dev/null
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -0,0 +1,341 @@
+//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD btver2 (Jaguar) to support
+// instruction scheduling and other instruction cost heuristics. Based off AMD Software
+// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix.
+//
+//===----------------------------------------------------------------------===//
+
+def BtVer2Model : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and btver2 can
+ // decode 2 instructions per cycle.
+ let IssueWidth = 2;
+ let MicroOpBufferSize = 64; // Retire Control Unit
+ let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency)
+ let HighLatency = 25;
+ let MispredictPenalty = 14; // Minimum branch misdirection penalty
+ let PostRAScheduler = 1;
+
+ // FIXME: SSE4/AVX is unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
+ let CompleteModel = 0;
+}
+
+let SchedModel = BtVer2Model in {
+
+// Jaguar can issue up to 6 micro-ops in one cycle
+def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam)
+def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV
+def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU
+def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA)
+def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
+def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
+
+// Any pipe - FIXME we need this until we can discriminate between int/fpu load/store/moves properly
+def JAny : ProcResGroup<[JALU0, JALU1, JLAGU, JSAGU, JFPU0, JFPU1]>;
+
+// Integer Pipe Scheduler
+def JALU01 : ProcResGroup<[JALU0, JALU1]> {
+ let BufferSize=20;
+}
+
+// AGU Pipe Scheduler
+def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> {
+ let BufferSize=12;
+}
+
+// Fpu Pipe Scheduler
+def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> {
+ let BufferSize=18;
+}
+
+def JDiv : ProcResource<1>; // integer division
+def JMul : ProcResource<1>; // integer multiplication
+def JVALU0 : ProcResource<1>; // vector integer
+def JVALU1 : ProcResource<1>; // vector integer
+def JVIMUL : ProcResource<1>; // vector integer multiplication
+def JSTC : ProcResource<1>; // vector store/convert
+def JFPM : ProcResource<1>; // FP multiplication
+def JFPA : ProcResource<1>; // FP addition
+
+// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [JLAGU, ExePort]> {
+ let Latency = !add(Lat, 3);
+ }
+}
+
+multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [JLAGU, ExePort]> {
+ let Latency = !add(Lat, 5);
+ }
+}
+
+// A folded store needs a cycle on the SAGU for the store data.
+def : WriteRes<WriteRMW, [JSAGU]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteALU, JALU01, 1>;
+defm : JWriteResIntPair<WriteIMul, JALU1, 3>;
+
+def : WriteRes<WriteIMulH, [JALU1]> {
+ let Latency = 6;
+ let ResourceCycles = [4];
+}
+
+// FIXME 8/16 bit divisions
+def : WriteRes<WriteIDiv, [JALU1, JDiv]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 25];
+}
+def : WriteRes<WriteIDivLd, [JALU1, JLAGU, JDiv]> {
+ let Latency = 41;
+ let ResourceCycles = [1, 1, 25];
+}
+
+// This is for simple LEAs with one or two input operands.
+// FIXME: SAGU 3-operand LEA
+def : WriteRes<WriteLEA, [JALU01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteShift, JALU01, 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+// FIXME: Split x86 and SSE load/store/moves
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteStore, [JSAGU]>;
+def : WriteRes<WriteMove, [JAny]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero, []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteJump, JALU01, 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+// FIXME: should we bother splitting JFPU pipe + unit stages for fast instructions?
+// FIXME: Double precision latencies
+// FIXME: SS vs PS latencies
+// FIXME: ymm latencies
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteFAdd, JFPU0, 3>;
+defm : JWriteResFpuPair<WriteFMul, JFPU1, 2>;
+defm : JWriteResFpuPair<WriteFRcp, JFPU1, 2>;
+defm : JWriteResFpuPair<WriteFRsqrt, JFPU1, 2>;
+defm : JWriteResFpuPair<WriteFShuffle, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteFBlend, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteFShuffle256, JFPU01, 1>;
+
+def : WriteRes<WriteFSqrt, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 21;
+ let ResourceCycles = [1, 1, 21];
+}
+def : WriteRes<WriteFSqrtLd, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 26;
+ let ResourceCycles = [1, 1, 21];
+}
+
+def : WriteRes<WriteFDiv, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 19;
+ let ResourceCycles = [1, 1, 19];
+}
+def : WriteRes<WriteFDivLd, [JFPU1, JLAGU, JFPM]> {
+ let Latency = 24;
+ let ResourceCycles = [1, 1, 19];
+}
+
+// FIXME: integer pipes
+defm : JWriteResFpuPair<WriteCvtF2I, JFPU1, 3>; // Float -> Integer.
+defm : JWriteResFpuPair<WriteCvtI2F, JFPU1, 3>; // Integer -> Float.
+defm : JWriteResFpuPair<WriteCvtF2F, JFPU1, 3>; // Float -> Float size conversion.
+
+def : WriteRes<WriteFVarBlend, [JFPU01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteFVarBlendLd, [JLAGU, JFPU01]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2];
+}
+
+// Vector integer operations.
+defm : JWriteResFpuPair<WriteVecALU, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteVecShift, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteVecIMul, JFPU0, 2>;
+defm : JWriteResFpuPair<WriteShuffle, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteBlend, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteVecLogic, JFPU01, 1>;
+defm : JWriteResFpuPair<WriteShuffle256, JFPU01, 1>;
+
+def : WriteRes<WriteVarBlend, [JFPU01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVarBlendLd, [JLAGU, JFPU01]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 2];
+}
+
+// FIXME: why do we need to define AVX2 resource on CPU that doesn't have AVX2?
+def : WriteRes<WriteVarVecShift, [JFPU01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteVarVecShiftLd, [JLAGU, JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1];
+}
+
+def : WriteRes<WriteMPSAD, [JFPU0]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteMPSADLd, [JLAGU, JFPU0]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 2];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// String instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+// FIXME: approximate latencies + pipe dependencies
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WritePCmpIStrM, [JFPU01]> {
+ let Latency = 7;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WritePCmpIStrMLd, [JLAGU, JFPU01]> {
+ let Latency = 12;
+ let ResourceCycles = [1, 2];
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+def : WriteRes<WritePCmpEStrM, [JFPU01]> {
+ let Latency = 13;
+ let ResourceCycles = [5];
+}
+def : WriteRes<WritePCmpEStrMLd, [JLAGU, JFPU01]> {
+ let Latency = 18;
+ let ResourceCycles = [1, 5];
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+def : WriteRes<WritePCmpIStrI, [JFPU01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WritePCmpIStrILd, [JLAGU, JFPU01]> {
+ let Latency = 11;
+ let ResourceCycles = [1, 2];
+}
+
+// Packed Compare Explicit Length Strings, Return Index
+def : WriteRes<WritePCmpEStrI, [JFPU01]> {
+ let Latency = 13;
+ let ResourceCycles = [5];
+}
+def : WriteRes<WritePCmpEStrILd, [JLAGU, JFPU01]> {
+ let Latency = 18;
+ let ResourceCycles = [1, 5];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteAESDecEnc, [JFPU01, JVIMUL]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 1];
+}
+def : WriteRes<WriteAESDecEncLd, [JFPU01, JLAGU, JVIMUL]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 1, 1];
+}
+
+def : WriteRes<WriteAESIMC, [JVIMUL]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteAESIMCLd, [JLAGU, JVIMUL]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 1];
+}
+
+def : WriteRes<WriteAESKeyGen, [JVIMUL]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteAESKeyGenLd, [JLAGU, JVIMUL]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 1];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteCLMul, [JVIMUL]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteCLMulLd, [JLAGU, JVIMUL]> {
+ let Latency = 7;
+ let ResourceCycles = [1, 1];
+}
+
+// FIXME: pipe for system/microcode?
+def : WriteRes<WriteSystem, [JAny]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [JAny]> { let Latency = 100; }
+def : WriteRes<WriteFence, [JSAGU]>;
+def : WriteRes<WriteNop, []>;
+} // SchedModel
+
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index 823d101..f95d4fa 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -19,6 +19,7 @@ def SLMModel : SchedMachineModel {
let MicroOpBufferSize = 32; // Based on the reorder buffer.
let LoadLatency = 3;
let MispredictPenalty = 10;
+ let PostRAScheduler = 1;
// For small loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
@@ -100,6 +101,7 @@ def : WriteRes<WriteIDivLd, [MEC_RSV, IEC_RSV01, SMDivider]> {
// Scalar and vector floating point.
defm : SMWriteResPair<WriteFAdd, FPC_RSV1, 3>;
defm : SMWriteResPair<WriteFRcp, FPC_RSV0, 5>;
+defm : SMWriteResPair<WriteFRsqrt, FPC_RSV0, 5>;
defm : SMWriteResPair<WriteFSqrt, FPC_RSV0, 15>;
defm : SMWriteResPair<WriteCvtF2I, FPC_RSV01, 4>;
defm : SMWriteResPair<WriteCvtI2F, FPC_RSV01, 4>;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index a83dd9b..821044f 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -29,6 +29,26 @@ X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL)
X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
+bool X86SelectionDAGInfo::isBaseRegConflictPossible(
+ SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const {
+ // We cannot use TRI->hasBasePointer() until *after* we select all basic
+ // blocks. Legalization may introduce new stack temporaries with large
+ // alignment requirements. Fall back to generic code if there are any
+ // dynamic stack adjustments (hopefully rare) and the base pointer would
+ // conflict if we had to use it.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust())
+ return false;
+
+ const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
+ DAG.getSubtarget().getRegisterInfo());
+ unsigned BaseReg = TRI->getBaseRegister();
+ for (unsigned R : ClobberSet)
+ if (BaseReg == R)
+ return true;
+ return false;
+}
+
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
@@ -39,6 +59,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
+#ifndef NDEBUG
+ // If the base register might conflict with our physical registers, bail out.
+ unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
+ X86::ECX, X86::EAX, X86::EDI};
+ assert(!isBaseRegConflictPossible(DAG, ClobberSet));
+#endif
+
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
return SDValue();
@@ -201,12 +228,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
- // ESI might be used as a base pointer, in that case we can't simply overwrite
- // the register. Fall back to generic code.
- const X86RegisterInfo *TRI =
- static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
- if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
- TRI->getBaseRegister() == X86::ESI)
+ // If the base register might conflict with our physical registers, bail out.
+ unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
+ X86::ECX, X86::ESI, X86::EDI};
+ if (isBaseRegConflictPossible(DAG, ClobberSet))
return SDValue();
MVT AVT;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index c12555a..eb7e0ed 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86SELECTIONDAGINFO_H
-#define X86SELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_X86_X86SELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_X86_X86SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -23,6 +23,11 @@ class X86TargetMachine;
class X86Subtarget;
class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Returns true if it is possible for the base register to conflict with the
+ /// given set of clobbers for a memory intrinsic.
+ bool isBaseRegConflictPossible(SelectionDAG &DAG,
+ ArrayRef<unsigned> ClobberSet) const;
+
public:
explicit X86SelectionDAGInfo(const DataLayout &DL);
~X86SelectionDAGInfo();
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 79b7e68..9d877c9 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -13,6 +13,7 @@
#include "X86Subtarget.h"
#include "X86InstrInfo.h"
+#include "X86TargetMachine.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -67,12 +68,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
- // Determine whether this is a reference to a definition or a declaration.
- // Materializable GVs (in JIT lazy compilation mode) do not require an extra
- // load from stub.
- bool isDecl = GV->hasAvailableExternallyLinkage();
- if (GV->isDeclaration() && !GV->isMaterializable())
- isDecl = true;
+ bool isDecl = GV->isDeclarationForLinker();
// X86-64 in PIC mode.
if (isPICStyleRIPRel()) {
@@ -182,23 +178,7 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
-void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
- AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
- Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
- std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
- std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty()) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
- }
-}
-
-void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
@@ -219,9 +199,6 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Make sure the right MCSchedModel is used.
InitCPUSchedModel(CPUName);
- if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM)
- PostRAScheduler = true;
-
InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
@@ -275,10 +252,15 @@ void X86Subtarget::initializeEnvironment() {
HasERI = false;
HasCDI = false;
HasPFI = false;
+ HasDQI = false;
+ HasBWI = false;
+ HasVLX = false;
HasADX = false;
HasSHA = false;
+ HasSGX = false;
HasPRFCHW = false;
HasRDSEED = false;
+ HasSMAP = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
IsUAMemFast = false;
@@ -286,48 +268,51 @@ void X86Subtarget::initializeEnvironment() {
HasCmpxchg16b = false;
UseLeaForSP = false;
HasSlowDivide = false;
- PostRAScheduler = false;
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;
SlowLEA = false;
SlowIncDec = false;
+ UseSqrtEst = false;
+ UseReciprocalEst = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
}
-static std::string computeDataLayout(const X86Subtarget &ST) {
+static std::string computeDataLayout(const Triple &TT) {
// X86 is little endian
std::string Ret = "e";
- Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+ Ret += DataLayout::getManglingComponent(TT);
// X86 and x32 have 32 bit pointers.
- if (ST.isTarget64BitILP32() || !ST.is64Bit())
+ if ((TT.isArch64Bit() &&
+ (TT.getEnvironment() == Triple::GNUX32 || TT.isOSNaCl())) ||
+ !TT.isArch64Bit())
Ret += "-p:32:32";
// Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
- if (ST.is64Bit() || ST.isOSWindows() || ST.isTargetNaCl())
+ if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
Ret += "-i64:64";
else
Ret += "-f64:32:64";
// Some ABIs align long double to 128 bits, others to 32.
- if (ST.isTargetNaCl())
+ if (TT.isOSNaCl())
; // No f80
- else if (ST.is64Bit() || ST.isTargetDarwin())
+ else if (TT.isArch64Bit() || TT.isOSDarwin())
Ret += "-f80:128";
else
Ret += "-f80:32";
// The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
- if (ST.is64Bit())
+ if (TT.isArch64Bit())
Ret += "-n8:16:32:64";
else
Ret += "-n8:16:32";
// The stack is aligned to 32 bits on some ABIs and 128 bits on others.
- if (!ST.is64Bit() && ST.isOSWindows())
+ if (!TT.isArch64Bit() && TT.isOSWindows())
Ret += "-S32";
else
Ret += "-S128";
@@ -338,37 +323,47 @@ static std::string computeDataLayout(const X86Subtarget &ST) {
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
+ initSubtargetFeatures(CPU, FS);
return *this;
}
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, X86TargetMachine &TM,
+ const std::string &FS, const X86TargetMachine &TM,
unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
PICStyle(PICStyles::None), TargetTriple(TT),
+ DL(computeDataLayout(TargetTriple)),
StackAlignOverride(StackAlignOverride),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- DL(computeDataLayout(*this)), TSInfo(DL),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
- FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(),
- is64Bit() ? -8 : -4),
- JITInfo(hasSSE1()) {}
-
-bool
-X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode &Mode,
- RegClassVector &CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
- CriticalPathRCs.clear();
- return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+ TSInfo(DL), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM), FrameLowering(TargetFrameLowering::StackGrowsDown,
+ getStackAlignment(), is64Bit() ? -8 : -4) {
+ // Determine the PICStyle based on the target selected.
+ if (TM.getRelocationModel() == Reloc::Static) {
+ // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+ setPICStyle(PICStyles::None);
+ } else if (is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ setPICStyle(PICStyles::RIPRel);
+ } else if (isTargetCOFF()) {
+ setPICStyle(PICStyles::None);
+ } else if (isTargetDarwin()) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ setPICStyle(PICStyles::StubPIC);
+ else {
+ assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);
+ setPICStyle(PICStyles::StubDynamicNoPIC);
+ }
+ } else if (isTargetELF()) {
+ setPICStyle(PICStyles::GOT);
+ }
}
-bool
-X86Subtarget::enableEarlyIfConversion() const {
+bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
}
+
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 09db0eb..091b6c4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86SUBTARGET_H
-#define X86SUBTARGET_H
+#ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
+#define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
#include "X86FrameLowering.h"
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
#include "X86SelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
@@ -139,12 +138,18 @@ protected:
/// HasSHA - Processor has SHA instructions.
bool HasSHA;
+ /// HasSGX - Processor has SGX instructions.
+ bool HasSGX;
+
/// HasPRFCHW - Processor has PRFCHW instructions.
bool HasPRFCHW;
/// HasRDSEED - Processor has RDSEED instructions.
bool HasRDSEED;
+ /// HasSMAP - Processor has SMAP instructions.
+ bool HasSMAP;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -170,9 +175,6 @@ protected:
/// full divides and should be used when possible.
bool HasSlowDivide;
- /// PostRAScheduler - True if using post-register-allocation scheduler.
- bool PostRAScheduler;
-
/// PadShortFunctions - True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
@@ -190,15 +192,34 @@ protected:
/// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
+ /// Use the RSQRT* instructions to optimize square root calculations.
+ /// For this to be profitable, the cost of FSQRT and FDIV must be
+ /// substantially higher than normal FP ops like FADD and FMUL.
+ bool UseSqrtEst;
+
+ /// Use the RCP* instructions to optimize FP division calculations.
+ /// For this to be profitable, the cost of FDIV must be
+ /// substantially higher than normal FP ops like FADD and FMUL.
+ bool UseReciprocalEst;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
-
+
/// Processor has AVX-512 Exponential and Reciprocal Instructions
bool HasERI;
-
+
/// Processor has AVX-512 Conflict Detection Instructions
bool HasCDI;
-
+
+ /// Processor has AVX-512 Doubleword and Quadword instructions
+ bool HasDQI;
+
+ /// Processor has AVX-512 Byte and Word instructions
+ bool HasBWI;
+
+ /// Processor has AVX-512 Vector Length eXtenstions
+ bool HasVLX;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -214,6 +235,9 @@ protected:
InstrItineraryData InstrItins;
private:
+ // Calculates type size & alignment
+ const DataLayout DL;
+
/// StackAlignOverride - Override the stack alignment.
unsigned StackAlignOverride;
@@ -226,30 +250,35 @@ private:
/// In16BitMode - True if compiling for 16-bit, false for 32-bit or 64-bit.
bool In16BitMode;
- // Calculates type size & alignment
- const DataLayout DL;
X86SelectionDAGInfo TSInfo;
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
// X86TargetLowering needs.
X86InstrInfo InstrInfo;
X86TargetLowering TLInfo;
X86FrameLowering FrameLowering;
- X86JITInfo JITInfo;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, X86TargetMachine &TM,
+ const std::string &FS, const X86TargetMachine &TM,
unsigned StackAlignOverride);
- const X86TargetLowering *getTargetLowering() const { return &TLInfo; }
- const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const X86FrameLowering *getFrameLowering() const { return &FrameLowering; }
- const X86SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- X86JITInfo *getJITInfo() { return &JITInfo; }
+ const X86TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const X86FrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const X86RegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
@@ -264,14 +293,12 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// \brief Reset the features for the X86 target.
- void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
/// \brief Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
- void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+ void initSubtargetFeatures(StringRef CPU, StringRef FS);
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
@@ -294,7 +321,8 @@ public:
/// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
bool isTarget64BitLP64() const {
- return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
+ return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 &&
+ TargetTriple.getOS() != Triple::NaCl);
}
PICStyles::Style getPICStyle() const { return PICStyle; }
@@ -335,8 +363,10 @@ public:
bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
+ bool hasSGX() const { return HasSGX; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
+ bool hasSMAP() const { return HasSMAP; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
@@ -349,9 +379,14 @@ public:
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
bool slowIncDec() const { return SlowIncDec; }
+ bool useSqrtEst() const { return UseSqrtEst; }
+ bool useReciprocalEst() const { return UseReciprocalEst; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
+ bool hasDQI() const { return HasDQI; }
+ bool hasBWI() const { return HasBWI; }
+ bool hasVLX() const { return HasVLX; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
@@ -391,6 +426,10 @@ public:
return TargetTriple.isWindowsGNUEnvironment();
}
+ bool isTargetWindowsItanium() const {
+ return TargetTriple.isWindowsItaniumEnvironment();
+ }
+
bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
bool isOSWindows() const { return TargetTriple.isOSWindows(); }
@@ -453,18 +492,17 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
- /// enablePostRAScheduler - run for Atom optimization.
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const override;
-
- bool postRAScheduler() const { return PostRAScheduler; }
-
bool enableEarlyIfConversion() const override;
/// getInstrItins = Return the instruction itineraries based on the
/// subtarget selection.
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ AntiDepBreakMode getAntiDepBreakMode() const override {
+ return TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f12140f..8802feb 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -13,7 +13,9 @@
#include "X86TargetMachine.h"
#include "X86.h"
+#include "X86TargetObjectFile.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
@@ -27,7 +29,23 @@ extern "C" void LLVMInitializeX86Target() {
RegisterTargetMachine<X86TargetMachine> Y(TheX86_64Target);
}
-void X86TargetMachine::anchor() { }
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::x86_64)
+ return make_unique<X86_64MachoTargetObjectFile>();
+ return make_unique<TargetLoweringObjectFileMachO>();
+ }
+
+ if (TT.isOSLinux())
+ return make_unique<X86LinuxTargetObjectFile>();
+ if (TT.isOSBinFormatELF())
+ return make_unique<TargetLoweringObjectFileELF>();
+ if (TT.isKnownWindowsMSVCEnvironment())
+ return make_unique<X86WindowsTargetObjectFile>();
+ if (TT.isOSBinFormatCOFF())
+ return make_unique<TargetLoweringObjectFileCOFF>();
+ llvm_unreachable("unknown subtarget type");
+}
/// X86TargetMachine ctor - Create an X86 target.
///
@@ -36,27 +54,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(createTLOF(Triple(getTargetTriple()))),
Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
- // Determine the PICStyle based on the target selected.
- if (getRelocationModel() == Reloc::Static) {
- // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
- Subtarget.setPICStyle(PICStyles::None);
- } else if (Subtarget.is64Bit()) {
- // PIC in 64 bit mode is always rip-rel.
- Subtarget.setPICStyle(PICStyles::RIPRel);
- } else if (Subtarget.isTargetCOFF()) {
- Subtarget.setPICStyle(PICStyles::None);
- } else if (Subtarget.isTargetDarwin()) {
- if (getRelocationModel() == Reloc::PIC_)
- Subtarget.setPICStyle(PICStyles::StubPIC);
- else {
- assert(getRelocationModel() == Reloc::DynamicNoPIC);
- Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
- }
- } else if (Subtarget.isTargetELF()) {
- Subtarget.setPICStyle(PICStyles::GOT);
- }
-
// default to hard float ABI
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType = FloatABI::Hard;
@@ -71,6 +70,47 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
initAsmInfo();
}
+X86TargetMachine::~X86TargetMachine() {}
+
+const X86Subtarget *
+X86TargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ Attribute SFAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
+ bool SoftFloat = !SFAttr.hasAttribute(Attribute::None)
+ ? SFAttr.getValueAsString() == "true"
+ : Options.UseSoftFloat;
+
+ auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true"
+ : "use-soft-float=false")];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
+ Options.StackAlignmentOverride);
+ }
+ return I.get();
+}
+
//===----------------------------------------------------------------------===//
// Command line options for x86
//===----------------------------------------------------------------------===//
@@ -125,7 +165,7 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
}
void X86PassConfig::addIRPasses() {
- addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
+ addPass(createAtomicExpandPass(&getX86TargetMachine()));
TargetPassConfig::addIRPasses();
}
@@ -177,10 +217,3 @@ bool X86PassConfig::addPreEmitPass() {
return ShouldPrint;
}
-
-bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE) {
- PM.add(createX86JITCodeEmitterPass(*this, JCE));
-
- return false;
-}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 41d5157..916278c 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86TARGETMACHINE_H
-#define X86TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
+#define LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/IR/DataLayout.h"
@@ -23,46 +23,29 @@ namespace llvm {
class StringRef;
class X86TargetMachine final : public LLVMTargetMachine {
- virtual void anchor();
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
X86Subtarget Subtarget;
+ mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
+
public:
X86TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~X86TargetMachine() override;
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const X86InstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- X86JITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
- const X86TargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const X86RegisterInfo *getRegisterInfo() const override {
- return &getInstrInfo()->getRegisterInfo();
- }
- const InstrItineraryData *getInstrItineraryData() const override {
- return &getSubtargetImpl()->getInstrItineraryData();
- }
+ const X86Subtarget *getSubtargetImpl(const Function &F) const override;
/// \brief Register X86 analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
// Set up the pass pipeline.
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-
- bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 8157085..f8bcd61 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -8,10 +8,12 @@
//===----------------------------------------------------------------------===//
#include "X86TargetObjectFile.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetLowering.h"
@@ -106,3 +108,64 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
MCSymbolRefExpr::VK_COFF_IMGREL32,
getContext());
}
+
+static std::string APIntToHexString(const APInt &AI) {
+ unsigned Width = (AI.getBitWidth() / 8) * 2;
+ std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true);
+ unsigned Size = HexString.size();
+ assert(Width >= Size && "hex string is too large!");
+ HexString.insert(HexString.begin(), Width - Size, '0');
+
+ return HexString;
+}
+
+
+static std::string scalarConstantToHexString(const Constant *C) {
+ Type *Ty = C->getType();
+ APInt AI;
+ if (isa<UndefValue>(C)) {
+ AI = APInt(Ty->getPrimitiveSizeInBits(), /*val=*/0);
+ } else if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+ const auto *CFP = cast<ConstantFP>(C);
+ AI = CFP->getValueAPF().bitcastToAPInt();
+ } else if (Ty->isIntegerTy()) {
+ const auto *CI = cast<ConstantInt>(C);
+ AI = CI->getValue();
+ } else {
+ llvm_unreachable("unexpected constant pool element type!");
+ }
+ return APIntToHexString(AI);
+}
+
+const MCSection *
+X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
+ const Constant *C) const {
+ if (Kind.isReadOnly()) {
+ if (C) {
+ Type *Ty = C->getType();
+ SmallString<32> COMDATSymName;
+ if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+ COMDATSymName = "__real@";
+ COMDATSymName += scalarConstantToHexString(C);
+ } else if (const auto *VTy = dyn_cast<VectorType>(Ty)) {
+ uint64_t NumBits = VTy->getBitWidth();
+ if (NumBits == 128 || NumBits == 256) {
+ COMDATSymName = NumBits == 128 ? "__xmm@" : "__ymm@";
+ for (int I = VTy->getNumElements() - 1, E = -1; I != E; --I)
+ COMDATSymName +=
+ scalarConstantToHexString(C->getAggregateElement(I));
+ }
+ }
+ if (!COMDATSymName.empty()) {
+ unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_LNK_COMDAT;
+ return getContext().getCOFFSection(".rdata", Characteristics, Kind,
+ COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ANY);
+ }
+ }
+ }
+
+ return TargetLoweringObjectFile::getSectionForConstant(Kind, C);
+}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index a08ed09..6a6988a 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
-#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_X86_X86TARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_X86_X86TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -46,6 +46,11 @@ namespace llvm {
const MCExpr *
getExecutableRelativeSymbol(const ConstantExpr *CE, Mangler &Mang,
const TargetMachine &TM) const override;
+
+ /// \brief Given a mergeable constant with the specified size and relocation
+ /// information, return a section that it should be placed in.
+ const MCSection *getSectionForConstant(SectionKind Kind,
+ const Constant *C) const override;
};
} // end namespace llvm
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index c961e2f..2b70fd0 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -48,8 +48,8 @@ public:
}
X86TTI(const X86TargetMachine *TM)
- : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
+ : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
+ TLI(TM->getSubtargetImpl()->getTargetLowering()) {
initializeX86TTIPass(*PassRegistry::getPassRegistry());
}
@@ -82,9 +82,10 @@ public:
unsigned getNumberOfRegisters(bool Vector) const override;
unsigned getRegisterBitWidth(bool Vector) const override;
- unsigned getMaximumUnrollFactor() const override;
+ unsigned getMaxInterleaveFactor() const override;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind) const override;
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override;
unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const override;
unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -166,7 +167,7 @@ unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
}
-unsigned X86TTI::getMaximumUnrollFactor() const {
+unsigned X86TTI::getMaxInterleaveFactor() const {
if (ST->isAtom())
return 1;
@@ -178,15 +179,37 @@ unsigned X86TTI::getMaximumUnrollFactor() const {
return 2;
}
-unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
+unsigned X86TTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ISD == ISD::SDIV &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ // On X86, vector signed division by constants power-of-two are
+ // normally expanded to the sequence SRA + SRL + ADD + SRA.
+ // The OperandValue properties many not be same as that of previous
+ // operation;conservatively assume OP_None.
+ unsigned Cost =
+ 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+
+ return Cost;
+ }
+
static const CostTblEntry<MVT::SimpleValueType>
AVX2UniformConstCostTable[] = {
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
@@ -202,6 +225,15 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return LT.first * AVX2UniformConstCostTable[Idx].Cost;
}
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+ };
+
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
@@ -237,6 +269,11 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX512CostTable[Idx].Cost;
+ }
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
@@ -541,7 +578,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
// There are faster sequences for float conversions.
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
@@ -557,6 +594,45 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ AVX512ConversionTbl[] = {
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
+ { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
+ { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
+
+ // v16i1 -> v16i32 - load + broadcast
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
+ LTSrc.second);
+ if (Idx != -1)
+ return AVX512ConversionTbl[Idx].Cost;
+ }
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@@ -589,6 +665,11 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },
+
+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
+ { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
+
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
};
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
@@ -715,6 +796,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 1 },
};
+ static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+ { ISD::SETCC, MVT::v8i64, 1 },
+ { ISD::SETCC, MVT::v16i32, 1 },
+ { ISD::SETCC, MVT::v8f64, 1 },
+ { ISD::SETCC, MVT::v16f32, 1 },
+ };
+
+ if (ST->hasAVX512()) {
+ int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX512CostTbl[Idx].Cost;
+ }
+
if (ST->hasAVX2()) {
int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 0bb5f99..d93baeb 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -250,7 +250,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
if (!ST.hasAVX() || ST.hasAVX512())
return false;
- TII = MF.getTarget().getInstrInfo();
+ TII = MF.getSubtarget().getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
EverMadeChange = false;
diff --git a/lib/Target/XCore/Disassembler/LLVMBuild.txt b/lib/Target/XCore/Disassembler/LLVMBuild.txt
index 028de2c..cc30d04 100644
--- a/lib/Target/XCore/Disassembler/LLVMBuild.txt
+++ b/lib/Target/XCore/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = XCoreDisassembler
parent = XCore
-required_libraries = MC Support XCoreInfo
+required_libraries = MCDisassembler Support XCoreInfo
add_to_library_groups = XCore
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 7fef796..640e6b0 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -19,7 +19,6 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -36,47 +35,35 @@ public:
XCoreDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {}
- /// \brief See MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const override;
-
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
};
}
-static bool readInstruction16(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint16_t &insn) {
- uint8_t Bytes[4];
-
+static bool readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint16_t &Insn) {
// We want to read exactly 2 Bytes of data.
- if (region.readBytes(address, 2, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 2) {
+ Size = 0;
return false;
}
// Encoded as a little-endian 16-bit word in the stream.
- insn = (Bytes[0] << 0) | (Bytes[1] << 8);
+ Insn = (Bytes[0] << 0) | (Bytes[1] << 8);
return true;
}
-static bool readInstruction32(const MemoryObject &region,
- uint64_t address,
- uint64_t &size,
- uint32_t &insn) {
- uint8_t Bytes[4];
-
+static bool readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ uint64_t &Size, uint32_t &Insn) {
// We want to read exactly 4 Bytes of data.
- if (region.readBytes(address, 4, Bytes) == -1) {
- size = 0;
+ if (Bytes.size() < 4) {
+ Size = 0;
return false;
}
// Encoded as a little-endian 32-bit word in the stream.
- insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
- (Bytes[3] << 24);
+ Insn =
+ (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | (Bytes[3] << 24);
return true;
}
@@ -748,16 +735,12 @@ DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
return S;
}
-MCDisassembler::DecodeStatus
-XCoreDisassembler::getInstruction(MCInst &instr,
- uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &vStream,
- raw_ostream &cStream) const {
+MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(
+ MCInst &instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &vStream, raw_ostream &cStream) const {
uint16_t insn16;
- if (!readInstruction16(Region, Address, Size, insn16)) {
+ if (!readInstruction16(Bytes, Address, Size, insn16)) {
return Fail;
}
@@ -771,7 +754,7 @@ XCoreDisassembler::getInstruction(MCInst &instr,
uint32_t insn32;
- if (!readInstruction32(Region, Address, Size, insn32)) {
+ if (!readInstruction32(Bytes, Address, Size, insn32)) {
return Fail;
}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 98e7c98..78521fd 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -13,8 +13,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef XCOREINSTPRINTER_H
-#define XCOREINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
+#define LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 5665911..f2d2b37 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -28,7 +28,6 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) {
ProtectedVisibilityAttr = MCSA_Invalid;
// Debug
- HasLEB128 = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
DwarfRegNumForCFI = true;
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index da2689a..26df211 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORETARGETASMINFO_H
-#define XCORETARGETASMINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCASMINFO_H
+#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index d54e94f..4073549 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -99,10 +99,10 @@ class XCoreTargetAsmStreamer : public XCoreTargetStreamer {
formatted_raw_ostream &OS;
public:
XCoreTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- virtual void emitCCTopData(StringRef Name) override;
- virtual void emitCCTopFunction(StringRef Name) override;
- virtual void emitCCBottomData(StringRef Name) override;
- virtual void emitCCBottomFunction(StringRef Name) override;
+ void emitCCTopData(StringRef Name) override;
+ void emitCCTopFunction(StringRef Name) override;
+ void emitCCBottomData(StringRef Name) override;
+ void emitCCBottomFunction(StringRef Name) override;
};
XCoreTargetAsmStreamer::XCoreTargetAsmStreamer(MCStreamer &S,
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index a255adb..0ff5961 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREMCTARGETDESC_H
-#define XCOREMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
+#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
namespace llvm {
class Target;
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index d707edc..140ba2a 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef TARGET_XCORE_H
-#define TARGET_XCORE_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORE_H
+#define LLVM_LIB_TARGET_XCORE_XCORE_H
#include "MCTargetDesc/XCoreMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index e98d4f9..82e4e36 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -117,7 +117,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitSpecialLLVMGlobal(GV))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
OutStreamer.SwitchSection(
getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
@@ -210,7 +210,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index e694736..7c74340 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -16,6 +16,7 @@
#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
+#include "XCoreSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -226,7 +227,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineModuleInfo *MMI = &MF.getMMI();
const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -262,7 +263,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
MBB.addLiveIn(XCore::LR);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode));
MIB.addImm(Adjusted);
- MIB->addRegisterKilled(XCore::LR, MF.getTarget().getRegisterInfo(), true);
+ MIB->addRegisterKilled(XCore::LR, MF.getSubtarget().getRegisterInfo(),
+ true);
if (emitFrameMoves) {
EmitDefCfaOffset(MBB, MBBI, dl, TII, MMI, Adjusted*4);
unsigned DRegNum = MRI->getDwarfRegNum(XCore::LR, true);
@@ -310,11 +312,10 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
// Frame moves for callee saved.
- auto SpillLabels = XFI->getSpillLabels();
- for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
- MachineBasicBlock::iterator Pos = SpillLabels[I].first;
+ for (const auto &SpillLabel : XFI->getSpillLabels()) {
+ MachineBasicBlock::iterator Pos = SpillLabel.first;
++Pos;
- CalleeSavedInfo &CSI = SpillLabels[I].second;
+ const CalleeSavedInfo &CSI = SpillLabel.second;
int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
unsigned DRegNum = MRI->getDwarfRegNum(CSI.getReg(), true);
EmitCfiOffset(MBB, Pos, dl, TII, MMI, DRegNum, Offset);
@@ -323,7 +324,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
// The unwinder requires stack slot & CFI offsets for the exception info.
// We do not save/spill these registers.
SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI, MF.getTarget().getTargetLowering());
+ GetEHSpillList(SpillList, MFI, XFI,
+ MF.getSubtarget().getTargetLowering());
assert(SpillList.size()==2 && "Unexpected SpillList size");
EmitCfiOffset(MBB, MBBI, dl, TII, MMI,
MRI->getDwarfRegNum(SpillList[0].Reg, true),
@@ -340,7 +342,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
DebugLoc dl = MBBI->getDebugLoc();
unsigned RetOpcode = MBBI->getOpcode();
@@ -355,7 +357,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
// 'Restore' the exception info the unwinder has placed into the stack
// slots.
SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI, MF.getTarget().getTargetLowering());
+ GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering());
RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList);
// Return to the landing pad.
@@ -413,7 +415,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
@@ -446,7 +448,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const{
MachineFunction *MF = MBB.getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
bool AtStart = MI == MBB.begin();
MachineBasicBlock::iterator BeforeI = MI;
if (!AtStart)
@@ -479,7 +481,7 @@ void XCoreFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
if (!hasReservedCallFrame(MF)) {
// Turn the adjcallstackdown instruction into 'extsp <amt>' and the
// adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index e4f806a..7b169c2 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREFRAMEINFO_H
-#define XCOREFRAMEINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H
+#define LLVM_LIB_TARGET_XCORE_XCOREFRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -59,4 +59,4 @@ namespace llvm {
};
}
-#endif // XCOREFRAMEINFO_H
+#endif
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index 30c7b59..77292c4 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -13,6 +13,7 @@
#include "XCore.h"
#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -43,7 +44,7 @@ FunctionPass *llvm::createXCoreFrameToArgsOffsetEliminationPass() {
bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) {
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
unsigned StackSize = MF.getFrameInfo()->getStackSize();
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++MFI) {
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index be7ef64..96c43ae 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -69,7 +69,7 @@ getTargetNodeName(unsigned Opcode) const
}
XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM)
- : TargetLowering(TM, new XCoreTargetObjectFile()), TM(TM),
+ : TargetLowering(TM), TM(TM),
Subtarget(TM.getSubtarget<XCoreSubtarget>()) {
// Set up the register classes.
@@ -426,7 +426,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Unexpected extension type");
assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
- if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
+ if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(),
+ LD->getAddressSpace(),
+ LD->getAlignment()))
return SDValue();
unsigned ABIAlignment = getDataLayout()->
@@ -461,14 +463,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (LD->getAlignment() == 2) {
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
BasePtr, LD->getPointerInfo(), MVT::i16,
- LD->isVolatile(), LD->isNonTemporal(), 2);
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
HighAddr,
LD->getPointerInfo().getWithOffset(2),
MVT::i16, LD->isVolatile(),
- LD->isNonTemporal(), 2);
+ LD->isNonTemporal(), LD->isInvariant(), 2);
SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
DAG.getConstant(16, MVT::i32));
SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
@@ -504,7 +507,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
StoreSDNode *ST = cast<StoreSDNode>(Op);
assert(!ST->isTruncatingStore() && "Unexpected store type");
assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
- if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
+ ST->getAddressSpace(),
+ ST->getAlignment())) {
return SDValue();
}
unsigned ABIAlignment = getDataLayout()->
@@ -800,7 +805,8 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
return SDValue();
MachineFunction &MF = DAG.getMachineFunction();
- const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op),
RegInfo->getFrameRegister(MF), MVT::i32);
}
@@ -846,7 +852,8 @@ LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
// Absolute SP = (FP + FrameToArgs) + Offset
- const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *RegInfo =
+ getTargetMachine().getSubtargetImpl()->getRegisterInfo();
SDValue Stack = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
RegInfo->getFrameRegister(MF), MVT::i32);
SDValue FrameToArgs = DAG.getNode(XCoreISD::FRAME_TO_ARGS_OFFSET, dl,
@@ -969,7 +976,7 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
N->isInvariant(), N->getAlignment(),
- N->getTBAAInfo(), N->getRanges());
+ N->getAAInfo(), N->getRanges());
}
if (N->getMemoryVT() == MVT::i16) {
if (N->getAlignment() < 2)
@@ -977,13 +984,13 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
N->getBasePtr(), N->getPointerInfo(), MVT::i16,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->isInvariant(), N->getAlignment(), N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i8)
return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(),
N->getBasePtr(), N->getPointerInfo(), MVT::i8,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->isInvariant(), N->getAlignment(), N->getAAInfo());
return SDValue();
}
@@ -999,7 +1006,7 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i16) {
if (N->getAlignment() < 2)
@@ -1007,13 +1014,13 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(), MVT::i16,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
}
if (N->getMemoryVT() == MVT::i8)
return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(),
N->getBasePtr(), N->getPointerInfo(), MVT::i8,
N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getTBAAInfo());
+ N->getAlignment(), N->getAAInfo());
return SDValue();
}
@@ -1118,8 +1125,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
// The ABI dictates there should be one stack slot available to the callee
// on function entry (for saving lr).
@@ -1129,8 +1136,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> RVLocs;
// Analyze return values to determine the number of bytes of stack required.
- CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4);
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
@@ -1284,8 +1291,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
@@ -1443,7 +1450,7 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
if (!CCInfo.CheckReturn(Outs, RetCC_XCore))
return false;
if (CCInfo.getNextStackOffset() != 0 && isVarArg)
@@ -1467,8 +1474,8 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze return values.
if (!isVarArg)
@@ -1541,7 +1548,8 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
MachineBasicBlock *
XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ const TargetInstrInfo &TII =
+ *getTargetMachine().getSubtargetImpl()->getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
assert((MI->getOpcode() == XCore::SELECT_CC) &&
"Unexpected instr type to insert");
@@ -1803,7 +1811,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// Replace unaligned store of unaligned load with memmove.
StoreSDNode *ST = cast<StoreSDNode>(N);
if (!DCI.isBeforeLegalize() ||
- allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+ allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
+ ST->getAddressSpace(),
+ ST->getAlignment()) ||
ST->isVolatile() || ST->isIndexed()) {
break;
}
@@ -1912,7 +1922,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
unsigned Size = TD->getTypeAllocSize(Ty);
if (AM.BaseGV) {
return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 62b89c3..13154c6 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREISELLOWERING_H
-#define XCOREISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREISELLOWERING_H
+#define LLVM_LIB_TARGET_XCORE_XCOREISELLOWERING_H
#include "XCore.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -215,4 +215,4 @@ namespace llvm {
};
}
-#endif // XCOREISELLOWERING_H
+#endif
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 36ea9a0..c310aa3 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -446,16 +446,19 @@ MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate(
dl = MI->getDebugLoc();
if (isImmMskBitp(Value)) {
int N = Log2_32(Value) + 1;
- return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg).addImm(N);
+ return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg)
+ .addImm(N)
+ .getInstr();
}
if (isImmU16(Value)) {
int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
- return BuildMI(MBB, MI, dl, get(Opcode), Reg).addImm(Value);
+ return BuildMI(MBB, MI, dl, get(Opcode), Reg).addImm(Value).getInstr();
}
MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool();
const Constant *C = ConstantInt::get(
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value);
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
return BuildMI(MBB, MI, dl, get(XCore::LDWCP_lru6), Reg)
- .addConstantPoolIndex(Idx);
+ .addConstantPoolIndex(Idx)
+ .getInstr();
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e0be96b..60bb3f8 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREINSTRUCTIONINFO_H
-#define XCOREINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREINSTRINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCOREINSTRINFO_H
#include "XCoreRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 00cb705..d34ed7a 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -412,7 +412,7 @@ def STW_l3r : _FL3R<0b000001100, (outs),
(ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
"stw $val, $addr[$offset]", []>;
-def STW_2rus : _F2RUS<0b0000, (outs),
+def STW_2rus : _F2RUS<0b00000, (outs),
(ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
"stw $val, $addr[$offset]", []>;
}
@@ -902,7 +902,7 @@ def BYTEREV_l2r : _FL2R<0b0000011001, (outs GRRegs:$dst), (ins GRRegs:$src),
"byterev $dst, $src",
[(set GRRegs:$dst, (bswap GRRegs:$src))]>;
-def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
+def CLZ_l2r : _FL2R<0b0000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
"clz $dst, $src",
[(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
index 28e702b..5691478 100644
--- a/lib/Target/XCore/XCoreMCInstLower.h
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREMCINSTLOWER_H
-#define XCOREMCINSTLOWER_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREMCINSTLOWER_H
+#define LLVM_LIB_TARGET_XCORE_XCOREMCINSTLOWER_H
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 212a5cf..078ffde 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREMACHINEFUNCTIONINFO_H
-#define XCOREMACHINEFUNCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -103,4 +103,4 @@ public:
};
} // End llvm namespace
-#endif // XCOREMACHINEFUNCTIONINFO_H
+#endif
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 316c82c..5c666ae 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
+#include "XCoreSubtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -98,7 +99,7 @@ static void InsertFPConstInst(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
- RS->setUsed(ScratchOffset);
+ RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
switch (MI.getOpcode()) {
@@ -170,12 +171,12 @@ static void InsertSPConstInst(MachineBasicBlock::iterator II,
unsigned ScratchBase;
if (OpCode==XCore::STWFI) {
ScratchBase = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
- RS->setUsed(ScratchBase);
+ RS->setRegUsed(ScratchBase);
} else
ScratchBase = Reg;
BuildMI(MBB, II, dl, TII.get(XCore::LDAWSP_ru6), ScratchBase).addImm(0);
unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
- RS->setUsed(ScratchOffset);
+ RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
switch (OpCode) {
@@ -221,7 +222,7 @@ const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF
XCore::R8, XCore::R9,
0
};
- const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
if (TFI->hasFP(*MF))
return CalleeSavedRegsFP;
return CalleeSavedRegs;
@@ -229,7 +230,7 @@ const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF
BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
Reserved.set(XCore::CP);
Reserved.set(XCore::DP);
@@ -267,9 +268,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
const XCoreInstrInfo &TII =
- *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
int StackSize = MF.getFrameInfo()->getStackSize();
@@ -323,7 +324,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
}
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index aa617a0..5d7721c 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCOREREGISTERINFO_H
-#define XCOREREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCOREREGISTERINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCOREREGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 91b33fd..a348844 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -33,7 +33,7 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
// Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
if (!AlwaysInline && (Align & 3) == 0 &&
DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
- const TargetLowering &TLI = *DAG.getTarget().getTargetLowering();
+ const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Ty = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 0079de1..cfd80b3 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORESELECTIONDAGINFO_H
-#define XCORESELECTIONDAGINFO_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORESELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_XCORE_XCORESELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 1e9810b..695578d 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORESUBTARGET_H
-#define XCORESUBTARGET_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORESUBTARGET_H
+#define LLVM_LIB_TARGET_XCORE_XCORESUBTARGET_H
#include "XCoreFrameLowering.h"
#include "XCoreISelLowering.h"
@@ -48,14 +48,20 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const XCoreFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const XCoreTargetLowering *getTargetLowering() const { return &TLInfo; }
- const XCoreSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
- const TargetRegisterInfo *getRegisterInfo() const {
+ const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const XCoreFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const XCoreTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const XCoreSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const TargetRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const DataLayout *getDataLayout() const { return &DL; }
+ const DataLayout *getDataLayout() const override { return &DL; }
};
} // End llvm namespace
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 8d8bb38..0fa8c21 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "XCoreTargetMachine.h"
+#include "XCoreTargetObjectFile.h"
#include "XCore.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Module.h"
@@ -26,10 +27,13 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<XCoreTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
+XCoreTargetMachine::~XCoreTargetMachine() {}
+
namespace {
/// XCore Code Generator Pass Configuration Options.
class XCorePassConfig : public TargetPassConfig {
@@ -41,6 +45,7 @@ public:
return getTM<XCoreTargetMachine>();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addPreEmitPass() override;
@@ -51,6 +56,12 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
return new XCorePassConfig(this, PM);
}
+void XCorePassConfig::addIRPasses() {
+ addPass(createAtomicExpandPass(&getXCoreTargetMachine()));
+
+ TargetPassConfig::addIRPasses();
+}
+
bool XCorePassConfig::addPreISel() {
addPass(createXCoreLowerThreadLocalPass());
return false;
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 14c43bf..8ff9269 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORETARGETMACHINE_H
-#define XCORETARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
+#define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
#include "XCoreSubtarget.h"
#include "llvm/Target/TargetMachine.h"
@@ -20,37 +20,24 @@
namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
XCoreSubtarget Subtarget;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
+ ~XCoreTargetMachine() override;
- const XCoreInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const XCoreFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const XCoreTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
- const XCoreSelectionDAGInfo* getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
- const TargetRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index ab0f7ad..86d0de6 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -145,9 +145,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
if (Kind.isMergeableConst16()) return MergeableConst16Section;
}
Type *ObjType = GV->getType()->getPointerElementType();
- if (TM.getCodeModel() == CodeModel::Small ||
- !ObjType->isSized() ||
- TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) {
+ if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() ||
+ TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(ObjType) <
+ CodeModelLargeSize) {
if (Kind.isReadOnly()) return UseCPRel? ReadOnlySection
: DataRelROSection;
if (Kind.isBSS() || Kind.isCommon())return BSSSection;
@@ -165,8 +165,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
report_fatal_error("Target does not support TLS or Common sections");
}
-const MCSection *XCoreTargetObjectFile::
-getSectionForConstant(SectionKind Kind) const {
+const MCSection *
+XCoreTargetObjectFile::getSectionForConstant(SectionKind Kind,
+ const Constant *C) const {
if (Kind.isMergeableConst4()) return MergeableConst4Section;
if (Kind.isMergeableConst8()) return MergeableConst8Section;
if (Kind.isMergeableConst16()) return MergeableConst16Section;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 34d756e..7d3f49d 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
-#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORETARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_XCORE_XCORETARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -34,7 +34,8 @@ static const unsigned CodeModelLargeSize = 256;
Mangler &Mang,
const TargetMachine &TM) const override;
- const MCSection *getSectionForConstant(SectionKind Kind) const override;
+ const MCSection *getSectionForConstant(SectionKind Kind,
+ const Constant *C) const override;
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index 0a394da..48bf0fa 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef XCORETARGETSTREAMER_H
-#define XCORETARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_XCORE_XCORETARGETSTREAMER_H
+#define LLVM_LIB_TARGET_XCORE_XCORETARGETSTREAMER_H
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
index 80d193d..da232da 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.cpp
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
@@ -43,17 +43,17 @@ public:
initializeXCoreTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
+ void initializePass() override {
pushTTIStack(this);
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
static char ID;
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt
index e724dbc..3851b35 100644
--- a/lib/Transforms/Hello/CMakeLists.txt
+++ b/lib/Transforms/Hello/CMakeLists.txt
@@ -6,6 +6,10 @@ if( NOT LLVM_REQUIRES_RTTI )
endif()
endif()
+if(WIN32 OR CYGWIN)
+ set(LLVM_LINK_COMPONENTS Core Support)
+endif()
+
add_llvm_loadable_module( LLVMHello
Hello.cpp
)
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index f9de54a..c4706e8 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -78,11 +78,15 @@ namespace {
const DataLayout *DL;
private:
+ bool isDenselyPacked(Type *type);
+ bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
CallGraphNode *DoPromotion(Function *F,
- SmallPtrSet<Argument*, 8> &ArgsToPromote,
- SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+ SmallPtrSetImpl<Argument*> &ArgsToPromote,
+ SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
+
+ using llvm::Pass::doInitialization;
bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
@@ -123,6 +127,78 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
+/// \brief Checks if a type could have padding bytes.
+bool ArgPromotion::isDenselyPacked(Type *type) {
+
+ // There is no size information, so be conservative.
+ if (!type->isSized())
+ return false;
+
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type))
+ return false;
+
+ if (!isa<CompositeType>(type))
+ return true;
+
+ // For homogenous sequential types, check for padding within members.
+ if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+ return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType());
+
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(type);
+ const StructLayout *Layout = DL->getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+ Type *ElTy = StructTy->getElementType(i);
+ if (!isDenselyPacked(ElTy))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(i))
+ return false;
+ StartPos += DL->getTypeAllocSizeInBits(ElTy);
+ }
+
+ return true;
+}
+
+/// \brief Checks if the padding bytes of an argument could be accessed.
+bool ArgPromotion::canPaddingBeAccessed(Argument *arg) {
+
+ assert(arg->hasByValAttr());
+
+ // Track all the pointers to the argument to make sure they are not captured.
+ SmallPtrSet<Value *, 16> PtrValues;
+ PtrValues.insert(arg);
+
+ // Track all of the stores.
+ SmallVector<StoreInst *, 16> Stores;
+
+ // Scan through the uses recursively to make sure the pointer is always used
+ // sanely.
+ SmallVector<Value *, 16> WorkList;
+ WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+ if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+ if (PtrValues.insert(V).second)
+ WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+ Stores.push_back(Store);
+ } else if (!isa<LoadInst>(V)) {
+ return true;
+ }
+ }
+
+// Check to make sure the pointers aren't captured
+ for (StoreInst *Store : Stores)
+ if (PtrValues.count(Store->getValueOperand()))
+ return true;
+
+ return false;
+}
+
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct). If safe to promote some arguments, it
@@ -154,6 +230,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
isSelfRecursive = true;
}
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg()) return nullptr;
+
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
@@ -163,9 +246,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe. This does not apply to
- // inalloca.
- if (PtrArg->hasByValAttr()) {
+ // pass the elements, which is always safe, if the passed value is densely
+ // packed or if we can prove the padding bytes are never accessed. This does
+ // not apply to inalloca.
+ bool isSafeToPromote =
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg));
+ if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
DEBUG(dbgs() << "argpromotion disable promoting argument '"
@@ -443,7 +530,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// of elements of the aggregate.
return false;
}
- ToPromote.insert(Operands);
+ ToPromote.insert(std::move(Operands));
}
}
@@ -475,10 +562,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
// loading block.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
- for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
- I = idf_ext_begin(P, TranspBlocks),
- E = idf_ext_end(P, TranspBlocks); I != E; ++I)
- if (AA.canBasicBlockModify(**I, Loc))
+ for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
+ if (AA.canBasicBlockModify(*TranspBB, Loc))
return false;
}
}
@@ -493,8 +578,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
/// arguments, and returns the new function. At this point, we know that it's
/// safe to do so.
CallGraphNode *ArgPromotion::DoPromotion(Function *F,
- SmallPtrSet<Argument*, 8> &ArgsToPromote,
- SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+ SmallPtrSetImpl<Argument*> &ArgsToPromote,
+ SmallPtrSetImpl<Argument*> &ByValArgsToTransform) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but has modified arguments.
@@ -615,9 +700,15 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(F);
- if (DI != FunctionDIs.end())
- DI->second.replaceFunction(NF);
-
+ if (DI != FunctionDIs.end()) {
+ DISubprogram SP = DI->second;
+ SP.replaceFunction(NF);
+ // Ensure the map is updated so it can be reused on subsequent argument
+ // promotions of the same function.
+ FunctionDIs.erase(DI);
+ FunctionDIs[NF] = SP;
+ }
+
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
@@ -716,9 +807,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// of the previous load.
LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
newLoad->setAlignment(OrigLoad->getAlignment());
- // Transfer the TBAA info too.
- newLoad->setMetadata(LLVMContext::MD_tbaa,
- OrigLoad->getMetadata(LLVMContext::MD_tbaa));
+ // Transfer the AA info too.
+ AAMDNodes AAInfo;
+ OrigLoad->getAAMetadata(AAInfo);
+ newLoad->setAAMetadata(AAInfo);
+
Args.push_back(newLoad);
AA.copyValue(OrigLoad, Args.back());
}
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 23be081..0b6ade9 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -66,7 +66,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
/// Find values that are marked as llvm.used.
static void FindUsedValues(GlobalVariable *LLVMUsed,
- SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
if (!LLVMUsed) return;
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index ac3853d..4045c09 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -199,10 +199,15 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
return false;
// Okay, we know we can transform this function if safe. Scan its body
- // looking for calls to llvm.vastart.
+ // looking for calls marked musttail or calls to llvm.vastart.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ continue;
+ if (CI->isMustTailCall())
+ return false;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
if (II->getIntrinsicID() == Intrinsic::vastart)
return false;
}
@@ -297,8 +302,14 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Patch the pointer to LLVM function in debug info descriptor.
auto DI = FunctionDIs.find(&Fn);
- if (DI != FunctionDIs.end())
- DI->second.replaceFunction(NF);
+ if (DI != FunctionDIs.end()) {
+ DISubprogram SP = DI->second;
+ SP.replaceFunction(NF);
+ // Ensure the map is updated so it can be reused on non-varargs argument
+ // eliminations of the same function.
+ FunctionDIs.erase(DI);
+ FunctionDIs[NF] = SP;
+ }
// Fix up any BlockAddresses that refer to the function.
Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
@@ -1088,8 +1099,8 @@ bool DAE::runOnModule(Module &M) {
// determine that dead arguments passed into recursive functions are dead).
//
DEBUG(dbgs() << "DAE - Determining liveness\n");
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- SurveyFunction(*I);
+ for (auto &F : M)
+ SurveyFunction(F);
// Now, remove all dead arguments and return values from each function in
// turn.
@@ -1102,11 +1113,8 @@ bool DAE::runOnModule(Module &M) {
// Finally, look for any unused parameters in functions with non-local
// linkage and replace the passed in parameters with undef.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function& F = *I;
-
+ for (auto &F : M)
Changed |= RemoveDeadArgumentsFromCallers(F);
- }
return Changed;
}
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 40ec9fa..2f8c7d9 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -91,7 +91,7 @@ namespace {
continue;
}
- makeVisible(*I, Delete);
+ makeVisible(*I, Delete);
if (Delete)
I->setInitializer(nullptr);
@@ -106,7 +106,7 @@ namespace {
continue;
}
- makeVisible(*I, Delete);
+ makeVisible(*I, Delete);
if (Delete)
I->deleteBody();
@@ -118,8 +118,8 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- bool Delete = deleteStuff == (bool)Named.count(CurI);
- makeVisible(*CurI, Delete);
+ bool Delete = deleteStuff == (bool)Named.count(CurI);
+ makeVisible(*CurI, Delete);
if (Delete) {
Type *Ty = CurI->getType()->getElementType();
@@ -148,7 +148,7 @@ namespace {
char GVExtractorPass::ID = 0;
}
-ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs,
bool deleteFn) {
return new GVExtractorPass(GVs, deleteFn);
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 8174df9..823ae53 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -161,8 +161,9 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (!F)
- // External node - may write memory. Just give up.
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
+ // External node or node we don't want to optimize - assume it may write
+ // memory and give up.
return false;
AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
@@ -204,9 +205,11 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
CI != CE; ++CI) {
Value *Arg = *CI;
if (Arg->getType()->isPointerTy()) {
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+
AliasAnalysis::Location Loc(Arg,
- AliasAnalysis::UnknownSize,
- I->getMetadata(LLVMContext::MD_tbaa));
+ AliasAnalysis::UnknownSize, AAInfo);
if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
if (MRB & AliasAnalysis::Mod)
// Writes non-local memory. Give up.
@@ -443,7 +446,7 @@ determinePointerReadAttrs(Argument *A,
case Instruction::AddrSpaceCast:
// The original value is not read/written via this if the new value isn't.
for (Use &UU : I->uses())
- if (Visited.insert(&UU))
+ if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
break;
@@ -457,7 +460,7 @@ determinePointerReadAttrs(Argument *A,
auto AddUsersToWorklistIfCapturing = [&] {
if (Captures)
for (Use &UU : I->uses())
- if (Visited.insert(&UU))
+ if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
};
@@ -525,7 +528,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// looking up whether a given CallGraphNode is in this SCC.
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F && !F->isDeclaration() && !F->mayBeOverridden())
+ if (F && !F->isDeclaration() && !F->mayBeOverridden() &&
+ !F->hasFnAttribute(Attribute::OptimizeNone))
SCCNodes.insert(F);
}
@@ -539,8 +543,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (!F)
- // External node - only a problem for arguments that we pass to it.
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
+ // External node or function we're trying not to optimize - only a problem
+ // for arguments that we pass to it.
continue;
// Definitions with weak linkage may be overridden at linktime with
@@ -792,8 +797,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (!F)
- // External node - skip it;
+ if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
+ // External node or node we don't want to optimize - skip it;
return false;
// Already noalias.
@@ -832,6 +837,9 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
/// given function and set any applicable attributes. Returns true
/// if any attributes were set and false otherwise.
bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
FunctionType *FTy = F.getFunctionType();
LibFunc::Func TheLibFunc;
if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 7e7a4c0..705e929 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -77,9 +78,6 @@ bool GlobalDCE::runOnModule(Module &M) {
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
- typedef std::multimap<const Comdat *, GlobalValue *> ComdatGVPairsTy;
- ComdatGVPairsTy ComdatGVPairs;
-
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
@@ -87,8 +85,6 @@ bool GlobalDCE::runOnModule(Module &M) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
if (!I->isDiscardableIfUnused())
GlobalIsNeeded(I);
- else if (const Comdat *C = I->getComdat())
- ComdatGVPairs.insert(std::make_pair(C, I));
}
}
@@ -100,8 +96,6 @@ bool GlobalDCE::runOnModule(Module &M) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
if (!I->isDiscardableIfUnused())
GlobalIsNeeded(I);
- else if (const Comdat *C = I->getComdat())
- ComdatGVPairs.insert(std::make_pair(C, I));
}
}
@@ -111,24 +105,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Externally visible aliases are needed.
if (!I->isDiscardableIfUnused()) {
GlobalIsNeeded(I);
- } else if (const Comdat *C = I->getComdat()) {
- ComdatGVPairs.insert(std::make_pair(C, I));
- }
- }
-
- for (ComdatGVPairsTy::iterator I = ComdatGVPairs.begin(),
- E = ComdatGVPairs.end();
- I != E;) {
- ComdatGVPairsTy::iterator UB = ComdatGVPairs.upper_bound(I->first);
- bool CanDiscard = std::all_of(I, UB, [](ComdatGVPairsTy::value_type Pair) {
- return Pair.second->isDiscardableIfUnused();
- });
- if (!CanDiscard) {
- std::for_each(I, UB, [this](ComdatGVPairsTy::value_type Pair) {
- GlobalIsNeeded(Pair.second);
- });
}
- I = UB;
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -141,7 +118,12 @@ bool GlobalDCE::runOnModule(Module &M) {
I != E; ++I)
if (!AliveGlobals.count(I)) {
DeadGlobalVars.push_back(I); // Keep track of dead globals
- I->setInitializer(nullptr);
+ if (I->hasInitializer()) {
+ Constant *Init = I->getInitializer();
+ I->setInitializer(nullptr);
+ if (isSafeToDestroyConstant(Init))
+ Init->destroyConstant();
+ }
}
// The second pass drops the bodies of functions which are dead...
@@ -203,9 +185,22 @@ bool GlobalDCE::runOnModule(Module &M) {
/// recursively mark anything that it uses as also needed.
void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// If the global is already in the set, no need to reprocess it.
- if (!AliveGlobals.insert(G))
+ if (!AliveGlobals.insert(G).second)
return;
-
+
+ Module *M = G->getParent();
+ if (Comdat *C = G->getComdat()) {
+ for (Function &F : *M)
+ if (F.getComdat() == C)
+ GlobalIsNeeded(&F);
+ for (GlobalVariable &GV : M->globals())
+ if (GV.getComdat() == C)
+ GlobalIsNeeded(&GV);
+ for (GlobalAlias &GA : M->aliases())
+ if (GA.getComdat() == C)
+ GlobalIsNeeded(&GA);
+ }
+
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
// If this is a global variable, we must make sure to add any global values
// referenced by the initializer to the alive set.
@@ -243,7 +238,7 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
// If we've already processed this constant there's no need to do it again.
Constant *Op = dyn_cast<Constant>(*I);
- if (Op && SeenConstants.insert(Op))
+ if (Op && SeenConstants.insert(Op).second)
MarkUsedGlobalsAsNeeded(Op);
}
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index c1d0d3b..6e0ae83 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -88,6 +88,7 @@ namespace {
const DataLayout *DL;
TargetLibraryInfo *TLI;
+ SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
}
@@ -612,7 +613,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
/// value will trap if the value is dynamically null. PHIs keeps track of any
/// phi nodes we've seen to avoid reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
- SmallPtrSet<const PHINode*, 8> &PHIs) {
+ SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users())
if (isa<LoadInst>(U)) {
// Will trap.
@@ -638,7 +639,7 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
} else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
// If we've already seen this phi node, ignore it, it has already been
// checked.
- if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+ if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
return false;
} else if (isa<ICmpInst>(U) &&
isa<ConstantPointerNull>(U->getOperand(1))) {
@@ -957,7 +958,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
/// it is to the specified global.
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV,
- SmallPtrSet<const PHINode*, 8> &PHIs) {
+ SmallPtrSetImpl<const PHINode*> &PHIs) {
for (const User *U : V->users()) {
const Instruction *Inst = cast<Instruction>(U);
@@ -981,7 +982,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
// PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
// cycles.
- if (PHIs.insert(PN))
+ if (PHIs.insert(PN).second)
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
return false;
continue;
@@ -1047,8 +1048,8 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
/// of a load) are simple enough to perform heap SRA on. This permits GEP's
/// that index through the array and struct field, icmps of null, and PHIs.
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
- SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
- SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+ SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
+ SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
// We permit two users of the load: setcc comparing against the null
// pointer, and a getelementptr of a specific form.
for (const User *U : V->users()) {
@@ -1072,11 +1073,11 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
}
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
- if (!LoadUsingPHIsPerLoad.insert(PN))
+ if (!LoadUsingPHIsPerLoad.insert(PN).second)
// This means some phi nodes are dependent on each other.
// Avoid infinite looping!
return false;
- if (!LoadUsingPHIs.insert(PN))
+ if (!LoadUsingPHIs.insert(PN).second)
// If we have already analyzed this PHI, then it is safe.
continue;
@@ -1115,9 +1116,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
// that all inputs the to the PHI nodes are in the same equivalence sets.
// Check to verify that all operands of the PHIs are either PHIS that can be
// transformed, loads from GV, or MI itself.
- for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
- , E = LoadUsingPHIs.end(); I != E; ++I) {
- const PHINode *PN = *I;
+ for (const PHINode *PN : LoadUsingPHIs) {
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
Value *InVal = PN->getIncomingValue(op);
@@ -1910,8 +1909,11 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
// Functions without names cannot be referenced outside this module.
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
+
+ const Comdat *C = F->getComdat();
+ bool inComdat = C && NotDiscardableComdats.count(C);
F->removeDeadConstantUsers();
- if (F->isDefTriviallyDead()) {
+ if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) {
F->eraseFromParent();
Changed = true;
++NumFnDeleted;
@@ -1943,12 +1945,6 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
bool GlobalOpt::OptimizeGlobalVars(Module &M) {
bool Changed = false;
- SmallSet<const Comdat *, 8> NotDiscardableComdats;
- for (const GlobalVariable &GV : M.globals())
- if (const Comdat *C = GV.getComdat())
- if (!GV.isDiscardableIfUnused())
- NotDiscardableComdats.insert(C);
-
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
GlobalVariable *GV = GVI++;
@@ -1965,7 +1961,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
if (GV->isDiscardableIfUnused()) {
if (const Comdat *C = GV->getComdat())
- if (NotDiscardableComdats.count(C))
+ if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage())
continue;
Changed |= ProcessGlobal(GV, GVI);
}
@@ -1975,7 +1971,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants,
+ SmallPtrSetImpl<Constant*> &SimpleConstants,
const DataLayout *DL);
@@ -1988,7 +1984,7 @@ isSimpleEnoughValueToCommit(Constant *C,
/// in SimpleConstants to avoid having to rescan the same constants all the
/// time.
static bool isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants,
+ SmallPtrSetImpl<Constant*> &SimpleConstants,
const DataLayout *DL) {
// Simple global addresses are supported, do not allow dllimport or
// thread-local globals.
@@ -2046,10 +2042,11 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSet<Constant*, 8> &SimpleConstants,
+ SmallPtrSetImpl<Constant*> &SimpleConstants,
const DataLayout *DL) {
// If we already checked this constant, we win.
- if (!SimpleConstants.insert(C)) return true;
+ if (!SimpleConstants.insert(C).second)
+ return true;
// Check the constant.
return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
}
@@ -2217,7 +2214,7 @@ public:
return MutatedMemory;
}
- const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
+ const SmallPtrSetImpl<GlobalVariable*> &getInvariants() const {
return Invariants;
}
@@ -2394,6 +2391,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
getVal(SI->getOperand(2)));
DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
<< "\n");
+ } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getExtractValue(
+ getVal(EVI->getAggregateOperand()), EVI->getIndices());
+ DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult
+ << "\n");
+ } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {
+ InstResult = ConstantExpr::getInsertValue(
+ getVal(IVI->getAggregateOperand()),
+ getVal(IVI->getInsertedValueOperand()), IVI->getIndices());
+ DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult
+ << "\n");
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
Constant *P = getVal(GEP->getOperand(0));
SmallVector<Constant*, 8> GEPOps;
@@ -2663,7 +2671,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
// Okay, we succeeded in evaluating this control flow. See if we have
// executed the new block before. If so, we have a looping function,
// which we cannot evaluate in reasonable time.
- if (!ExecutedBlocks.insert(NextBB))
+ if (!ExecutedBlocks.insert(NextBB).second)
return false; // looped!
// Okay, we have never been in this block before. Check to see if there
@@ -2700,10 +2708,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
I != E; ++I)
CommitValueTo(I->second, I->first);
- for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
- Eval.getInvariants().begin(), E = Eval.getInvariants().end();
- I != E; ++I)
- (*I)->setConstant(true);
+ for (GlobalVariable *GV : Eval.getInvariants())
+ GV->setConstant(true);
}
return EvalSuccess;
@@ -2714,7 +2720,7 @@ static int compareNames(Constant *const *A, Constant *const *B) {
}
static void setUsedInitializer(GlobalVariable &V,
- SmallPtrSet<GlobalValue *, 8> Init) {
+ const SmallPtrSet<GlobalValue *, 8> &Init) {
if (Init.empty()) {
V.eraseFromParent();
return;
@@ -2724,10 +2730,9 @@ static void setUsedInitializer(GlobalVariable &V,
PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
SmallVector<llvm::Constant *, 8> UsedArray;
- for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
- I != E; ++I) {
+ for (GlobalValue *GV : Init) {
Constant *Cast
- = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy);
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
UsedArray.push_back(Cast);
}
// Sort to get deterministic order.
@@ -2758,18 +2763,27 @@ public:
CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
}
typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
+ typedef iterator_range<iterator> used_iterator_range;
iterator usedBegin() { return Used.begin(); }
iterator usedEnd() { return Used.end(); }
+ used_iterator_range used() {
+ return used_iterator_range(usedBegin(), usedEnd());
+ }
iterator compilerUsedBegin() { return CompilerUsed.begin(); }
iterator compilerUsedEnd() { return CompilerUsed.end(); }
+ used_iterator_range compilerUsed() {
+ return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
+ }
bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
bool compilerUsedCount(GlobalValue *GV) const {
return CompilerUsed.count(GV);
}
bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
- bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
- bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
+ bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
+ bool compilerUsedInsert(GlobalValue *GV) {
+ return CompilerUsed.insert(GV).second;
+ }
void syncVariablesAndSets() {
if (UsedV)
@@ -2814,7 +2828,8 @@ static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
return U.usedCount(&GA) || U.compilerUsedCount(&GA);
}
-static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
+static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
+ bool &RenameTarget) {
RenameTarget = false;
bool Ret = false;
if (hasUseOtherThanLLVMUsed(GA, U))
@@ -2849,10 +2864,8 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool Changed = false;
LLVMUsed Used(M);
- for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
- E = Used.usedEnd();
- I != E; ++I)
- Used.compilerUsedErase(*I);
+ for (GlobalValue *GV : Used.used())
+ Used.compilerUsedErase(GV);
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E;) {
@@ -2963,7 +2976,7 @@ static bool cxxDtorIsEmpty(const Function &Fn,
SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
// Don't treat recursive functions as empty.
- if (!NewCalledFunctions.insert(CalledFn))
+ if (!NewCalledFunctions.insert(CalledFn).second)
return false;
if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
@@ -3035,6 +3048,20 @@ bool GlobalOpt::runOnModule(Module &M) {
while (LocalChange) {
LocalChange = false;
+ NotDiscardableComdats.clear();
+ for (const GlobalVariable &GV : M.globals())
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.isDiscardableIfUnused() || !GV.use_empty())
+ NotDiscardableComdats.insert(C);
+ for (Function &F : M)
+ if (const Comdat *C = F.getComdat())
+ if (!F.isDefTriviallyDead())
+ NotDiscardableComdats.insert(C);
+ for (GlobalAlias &GA : M.aliases())
+ if (const Comdat *C = GA.getComdat())
+ if (!GA.isDiscardableIfUnused() || !GA.use_empty())
+ NotDiscardableComdats.insert(C);
+
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M);
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 624cb90..819b2e0 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -14,6 +14,8 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -65,6 +67,8 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index d189756..d9a2b9e 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -73,6 +75,8 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 9087ab2..3abe7a8 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -16,6 +16,8 @@
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -74,6 +76,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
/// the call graph. If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionTracker>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -215,7 +219,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// If the inlined function already uses this alloca then we can't reuse
// it.
- if (!UsedAllocas.insert(AvailableAlloca))
+ if (!UsedAllocas.insert(AvailableAlloca).second)
continue;
// Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
@@ -357,8 +361,7 @@ bool Inliner::shouldInline(CallSite CS) {
// FIXME: All of this logic should be sunk into getInlineCost. It relies on
// the internal implementation of the inline cost metrics rather than
// treating them as truly abstract units etc.
- if (Caller->hasLocalLinkage() ||
- Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
+ if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
int TotalSecondaryCost = 0;
// The candidate cost to be imposed upon the current function.
int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
@@ -440,9 +443,11 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -501,7 +506,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, DL);
+ InlineFunctionInfo InlineInfo(&CG, DL, AA, AT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -664,6 +669,13 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
if (!F->isDefTriviallyDead())
continue;
+
+ // It is unsafe to drop a function with discardable linkage from a COMDAT
+ // without also dropping the other members of the COMDAT.
+ // The inliner doesn't visit non-function entities which are in COMDAT
+ // groups so it is unsafe to do so *unless* the linkage is local.
+ if (!F->hasLocalLinkage() && F->hasComdat())
+ continue;
// Remove any call graph edges from the function to its callees.
CGN->removeAllCalledFunctions();
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index c970a1a..7950163 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -148,9 +148,7 @@ bool InternalizePass::runOnModule(Module &M) {
// we don't see references from function local inline assembly. To be
// conservative, we internalize symbols in llvm.compiler.used, but we
// keep llvm.compiler.used so that the symbol is not deleted by llvm.
- for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.begin(), E = Used.end();
- I != E; ++I) {
- GlobalValue *V = *I;
+ for (GlobalValue *V : Used) {
ExternalNames.insert(V->getName());
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 559ef0b..b91ebf2 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -286,7 +286,7 @@ private:
/// 6.4.Load: range metadata (as integer numbers)
/// On this stage its better to see the code, since its not more than 10-15
/// strings for particular instruction, and could change sometimes.
- int cmpOperation(const Instruction *L, const Instruction *R) const;
+ int cmpOperations(const Instruction *L, const Instruction *R) const;
/// Compare two GEPs for equivalent pointer arithmetic.
/// Parts to be compared for each comparison stage,
@@ -297,9 +297,9 @@ private:
/// 3. Pointer operand type (using cmpType method).
/// 4. Number of operands.
/// 5. Compare operands, using cmpValues method.
- int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR);
- int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
- return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+ int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR);
+ int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+ return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
}
/// cmpType - compares two types,
@@ -342,12 +342,12 @@ private:
/// be checked with the same way. If we get Res != 0 on some stage, return it.
/// Otherwise return 0.
/// 6. For all other cases put llvm_unreachable.
- int cmpType(Type *TyL, Type *TyR) const;
+ int cmpTypes(Type *TyL, Type *TyR) const;
int cmpNumbers(uint64_t L, uint64_t R) const;
- int cmpAPInt(const APInt &L, const APInt &R) const;
- int cmpAPFloat(const APFloat &L, const APFloat &R) const;
+ int cmpAPInts(const APInt &L, const APInt &R) const;
+ int cmpAPFloats(const APFloat &L, const APFloat &R) const;
int cmpStrings(StringRef L, StringRef R) const;
int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
@@ -392,15 +392,15 @@ private:
DenseMap<const Value*, int> sn_mapL, sn_mapR;
};
-class FunctionPtr {
+class FunctionNode {
AssertingVH<Function> F;
const DataLayout *DL;
public:
- FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+ FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
Function *getFunc() const { return F; }
void release() { F = 0; }
- bool operator<(const FunctionPtr &RHS) const {
+ bool operator<(const FunctionNode &RHS) const {
return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
}
};
@@ -412,7 +412,7 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
return 0;
}
-int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const {
+int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
return Res;
if (L.ugt(R)) return 1;
@@ -420,11 +420,11 @@ int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const {
return 0;
}
-int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const {
+int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
(uint64_t)&R.getSemantics()))
return Res;
- return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt());
+ return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
}
int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
@@ -474,7 +474,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
// Check whether types are bitcastable. This part is just re-factored
// Type::canLosslesslyBitCastTo method, but instead of returning true/false,
// we also pack into result which type is "less" for us.
- int TypesRes = cmpType(TyL, TyR);
+ int TypesRes = cmpTypes(TyL, TyR);
if (TypesRes != 0) {
// Types are different, but check whether we can bitcast them.
if (!TyL->isFirstClassType()) {
@@ -541,12 +541,12 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
case Value::ConstantIntVal: {
const APInt &LInt = cast<ConstantInt>(L)->getValue();
const APInt &RInt = cast<ConstantInt>(R)->getValue();
- return cmpAPInt(LInt, RInt);
+ return cmpAPInts(LInt, RInt);
}
case Value::ConstantFPVal: {
const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
- return cmpAPFloat(LAPF, RAPF);
+ return cmpAPFloats(LAPF, RAPF);
}
case Value::ConstantArrayVal: {
const ConstantArray *LA = cast<ConstantArray>(L);
@@ -615,7 +615,7 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
-int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
@@ -665,8 +665,7 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
return cmpNumbers(STyL->isPacked(), STyR->isPacked());
for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
- if (int Res = cmpType(STyL->getElementType(i),
- STyR->getElementType(i)))
+ if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
return Res;
}
return 0;
@@ -681,11 +680,11 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
if (FTyL->isVarArg() != FTyR->isVarArg())
return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
- if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType()))
+ if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
return Res;
for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
- if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i)))
+ if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
return Res;
}
return 0;
@@ -696,7 +695,7 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
ArrayType *ATyR = cast<ArrayType>(TyR);
if (ATyL->getNumElements() != ATyR->getNumElements())
return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
- return cmpType(ATyL->getElementType(), ATyR->getElementType());
+ return cmpTypes(ATyL->getElementType(), ATyR->getElementType());
}
}
}
@@ -705,8 +704,8 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
// and pointer-to-B are equivalent. This should be kept in sync with
// Instruction::isSameOperationAs.
// Read method declaration comments for more details.
-int FunctionComparator::cmpOperation(const Instruction *L,
- const Instruction *R) const {
+int FunctionComparator::cmpOperations(const Instruction *L,
+ const Instruction *R) const {
// Differences from Instruction::isSameOperationAs:
// * replace type comparison with calls to isEquivalentType.
// * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
@@ -717,7 +716,7 @@ int FunctionComparator::cmpOperation(const Instruction *L,
if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
return Res;
- if (int Res = cmpType(L->getType(), R->getType()))
+ if (int Res = cmpTypes(L->getType(), R->getType()))
return Res;
if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
@@ -728,7 +727,7 @@ int FunctionComparator::cmpOperation(const Instruction *L,
// if all operands are the same type
for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
if (int Res =
- cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
return Res;
}
@@ -766,13 +765,23 @@ int FunctionComparator::cmpOperation(const Instruction *L,
if (int Res = cmpNumbers(CI->getCallingConv(),
cast<CallInst>(R)->getCallingConv()))
return Res;
- return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes());
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+ return Res;
+ return cmpNumbers(
+ (uint64_t)CI->getMetadata(LLVMContext::MD_range),
+ (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
if (int Res = cmpNumbers(CI->getCallingConv(),
cast<InvokeInst>(R)->getCallingConv()))
return Res;
- return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes());
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ return cmpNumbers(
+ (uint64_t)CI->getMetadata(LLVMContext::MD_range),
+ (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
@@ -835,7 +844,7 @@ int FunctionComparator::cmpOperation(const Instruction *L,
// Determine whether two GEP operations perform the same underlying arithmetic.
// Read method declaration comments for more details.
-int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
const GEPOperator *GEPR) {
unsigned int ASL = GEPL->getPointerAddressSpace();
@@ -851,7 +860,7 @@ int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
GEPR->accumulateConstantOffset(*DL, OffsetR))
- return cmpAPInt(OffsetL, OffsetR);
+ return cmpAPInts(OffsetL, OffsetR);
}
if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
@@ -935,10 +944,10 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
if (int Res =
cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
return Res;
- if (int Res = cmpGEP(GEPL, GEPR))
+ if (int Res = cmpGEPs(GEPL, GEPR))
return Res;
} else {
- if (int Res = cmpOperation(InstL, InstR))
+ if (int Res = cmpOperations(InstL, InstR))
return Res;
assert(InstL->getNumOperands() == InstR->getNumOperands());
@@ -950,7 +959,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
return Res;
// TODO: Already checked in cmpOperation
- if (int Res = cmpType(OpL->getType(), OpR->getType()))
+ if (int Res = cmpTypes(OpL->getType(), OpR->getType()))
return Res;
}
}
@@ -998,7 +1007,7 @@ int FunctionComparator::compare() {
if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
return Res;
- if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType()))
+ if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
return Res;
assert(FnL->arg_size() == FnR->arg_size() &&
@@ -1040,7 +1049,7 @@ int FunctionComparator::compare() {
assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(TermL->getSuccessor(i)))
+ if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
continue;
FnLBBs.push_back(TermL->getSuccessor(i));
@@ -1068,7 +1077,7 @@ public:
bool runOnModule(Module &M) override;
private:
- typedef std::set<FunctionPtr> FnTreeType;
+ typedef std::set<FunctionNode> FnTreeType;
/// A work queue of functions that may have been modified and should be
/// analyzed again.
@@ -1291,11 +1300,11 @@ static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
Value *Result = UndefValue::get(DestTy);
for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
Value *Element = createCast(
- Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)),
+ Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
DestTy->getStructElementType(I));
Result =
- Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I));
+ Builder.CreateInsertValue(Result, Element, makeArrayRef(I));
}
return Result;
}
@@ -1411,14 +1420,14 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// that was already inserted.
bool MergeFunctions::insert(Function *NewFunction) {
std::pair<FnTreeType::iterator, bool> Result =
- FnTree.insert(FunctionPtr(NewFunction, DL));
+ FnTree.insert(FunctionNode(NewFunction, DL));
if (Result.second) {
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
return false;
}
- const FunctionPtr &OldF = *Result.first;
+ const FunctionNode &OldF = *Result.first;
// Don't merge tiny functions, since it can just end up making the function
// larger.
@@ -1448,7 +1457,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
void MergeFunctions::remove(Function *F) {
// We need to make sure we remove F, not a function "equal" to F per the
// function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL));
+ FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL));
size_t Erased = 0;
if (found != FnTree.end() && found->getFunc() == F) {
Erased = 1;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 46a3187..da85a91 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -17,11 +17,14 @@
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -45,6 +48,10 @@ UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
+static cl::opt<bool> ExtraVectorizerPasses(
+ "extra-vectorizer-passes", cl::init(false), cl::Hidden,
+ cl::desc("Run cleanup optimization passes after vectorization."));
+
static cl::opt<bool> UseNewSROA("use-new-sroa",
cl::init(true), cl::Hidden,
cl::desc("Enable the new, experimental SROA pass"));
@@ -57,6 +64,20 @@ static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
cl::Hidden,
cl::desc("Run the load combining pass"));
+static cl::opt<bool>
+RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
+ cl::init(true), cl::Hidden,
+ cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
+ "vectorizer instead of before"));
+
+static cl::opt<bool> UseCFLAA("use-cfl-aa",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis"));
+
+static cl::opt<bool>
+EnableMLSM("mlsm", cl::init(true), cl::Hidden,
+ cl::desc("Enable motion of merged load and store"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -70,6 +91,11 @@ PassManagerBuilder::PassManagerBuilder() {
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
LoadCombine = RunLoadCombine;
+ DisableGVNLoadPRE = false;
+ VerifyInput = false;
+ VerifyOutput = false;
+ StripDebug = false;
+ MergeFunctions = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -106,7 +132,10 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
+ if (UseCFLAA)
+ PM.add(createCFLAliasAnalysisPass());
PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createScopedNoAliasAAPass());
PM.add(createBasicAliasAnalysisPass());
}
@@ -130,18 +159,22 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
}
void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
- // If all optimizations are disabled, just run the always-inline pass.
+ // If all optimizations are disabled, just run the always-inline pass and,
+ // if enabled, the function merging pass.
if (OptLevel == 0) {
if (Inliner) {
MPM.add(Inliner);
Inliner = nullptr;
}
- // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
- // pass manager, but we don't want to add extensions into that pass manager.
- // To prevent this we must insert a no-op module pass to reset the pass
- // manager to get the same behavior as EP_OptimizerLast in non-O0 builds.
- if (!GlobalExtensions->empty() || !Extensions.empty())
+ // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
+ // creates a CGSCC pass manager, but we don't want to add extensions into
+ // that pass manager. To prevent this we insert a no-op module pass to reset
+ // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
+ // builds. The function merging pass is
+ if (MergeFunctions)
+ MPM.add(createMergeFunctionsPass());
+ else if (!GlobalExtensions->empty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
@@ -207,8 +240,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
- if (OptLevel > 1)
- MPM.add(createGVNPass()); // Remove redundancies
+ if (OptLevel > 1) {
+ if (EnableMLSM)
+ MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ }
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
@@ -224,21 +260,23 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (RerollLoops)
MPM.add(createLoopRerollPass());
- if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
- addExtensionsToPM(EP_Peephole, MPM);
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(createGVNPass()); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ if (!RunSLPAfterLoopVectorization) {
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
+ }
}
if (LoadCombine)
@@ -253,6 +291,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+
+ // Re-rotate loops in all our loop nests. These may have fallout out of
+ // rotated form due to GVN or other transformations, and the vectorizer relies
+ // on the rotated form.
+ if (ExtraVectorizerPasses)
+ MPM.add(createLoopRotatePass());
+
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
// FIXME: Because of #pragma vectorize enable, the passes below are always
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
@@ -260,12 +305,56 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// as function calls, so that we can only pass them when the vectorizer
// changed the code.
MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correllated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ MPM.add(createEarlyCSEPass());
+ MPM.add(createCorrelatedValuePropagationPass());
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createLICMPass());
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createCFGSimplificationPass());
+ MPM.add(createInstructionCombiningPass());
+ }
+
+ if (RunSLPAfterLoopVectorization) {
+ if (SLPVectorize) {
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
+ MPM.add(createEarlyCSEPass());
+ }
+ }
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
+ }
+ }
+
addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass());
+ MPM.add(createInstructionCombiningPass());
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
+ // After vectorization and unrolling, assume intrinsics may tell us more
+ // about pointer alignments.
+ MPM.add(createAlignmentFromAssumptionsPass());
+
if (!DisableUnitAtATime) {
// FIXME: We shouldn't bother with this anymore.
MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
@@ -277,22 +366,17 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createConstantMergePass()); // Merge dup global constants
}
}
+
+ if (MergeFunctions)
+ MPM.add(createMergeFunctionsPass());
+
addExtensionsToPM(EP_OptimizerLast, MPM);
}
-void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
- bool Internalize,
- bool RunInliner,
- bool DisableGVNLoadPRE) {
+void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
- // Now that composite has been compiled, scan through the module, looking
- // for a main function. If main is defined, mark all other functions
- // internal.
- if (Internalize)
- PM.add(createInternalizePass("main"));
-
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
@@ -316,8 +400,11 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
addExtensionsToPM(EP_Peephole, PM);
// Inline small functions
- if (RunInliner)
- PM.add(createFunctionInliningPass());
+ bool RunInliner = Inliner;
+ if (RunInliner) {
+ PM.add(Inliner);
+ Inliner = nullptr;
+ }
PM.add(createPruneEHPass()); // Remove dead EH info.
@@ -346,6 +433,8 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createGlobalsModRefPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
+ if (EnableMLSM)
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
@@ -355,10 +444,16 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// More loops are countable; try to optimize them.
PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());
- PM.add(createLoopVectorizePass(true, true));
+ PM.add(createLoopVectorizePass(true, LoopVectorize));
// More scalar chains could be vectorized due to more alias information
- PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (RunSLPAfterLoopVectorization)
+ if (SLPVectorize)
+ PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ // After vectorization, assume intrinsics may tell us more about pointer
+ // alignments.
+ PM.add(createAlignmentFromAssumptionsPass());
if (LoadCombine)
PM.add(createLoadCombinePass());
@@ -374,6 +469,39 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Now that we have optimized the program, discard unreachable functions.
PM.add(createGlobalDCEPass());
+
+ // FIXME: this is profitable (for compiler time) to do at -O0 too, but
+ // currently it damages debug info.
+ if (MergeFunctions)
+ PM.add(createMergeFunctionsPass());
+}
+
+void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
+ TargetMachine *TM) {
+ if (TM) {
+ PM.add(new DataLayoutPass());
+ TM->addAnalysisPasses(PM);
+ }
+
+ if (LibraryInfo)
+ PM.add(new TargetLibraryInfo(*LibraryInfo));
+
+ if (VerifyInput)
+ PM.add(createVerifierPass());
+
+ if (StripDebug)
+ PM.add(createStripSymbolsPass(true));
+
+ if (VerifyInput)
+ PM.add(createDebugInfoVerifierPass());
+
+ if (OptLevel != 0)
+ addLTOOptimizationPasses(PM);
+
+ if (VerifyOutput) {
+ PM.add(createVerifierPass());
+ PM.add(createDebugInfoVerifierPass());
+ }
}
inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
@@ -457,5 +585,11 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
PassManagerBase *LPM = unwrap(PM);
- Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0);
+
+ // A small backwards compatibility hack. populateLTOPassManager used to take
+ // an RunInliner option.
+ if (RunInliner && !Builder->Inliner)
+ Builder->Inliner = createFunctionInliningPass();
+
+ Builder->populateLTOPassManager(*LPM);
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 1abbccc..3412b9e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -154,9 +154,8 @@ static void RemoveDeadConstant(Constant *C) {
C->destroyConstant();
// If the constant referenced anything, see if we can delete it as well.
- for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
- OE = Operands.end(); OI != OE; ++OI)
- RemoveDeadConstant(*OI);
+ for (Constant *O : Operands)
+ RemoveDeadConstant(O);
}
// Strip the symbol table of its names.
@@ -191,7 +190,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
/// Find values that are marked as llvm.used.
static void findUsedValues(GlobalVariable *LLVMUsed,
- SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
if (!LLVMUsed) return;
UsedValues.insert(LLVMUsed);
@@ -350,28 +349,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
// subprogram list/global variable list with our new live subprogram/global
// variable list.
if (SubprogramChange) {
- // Make sure that 9 is still the index of the subprograms. This is to make
- // sure that an assert is hit if the location of the subprogram array
- // changes. This is just to make sure that this is updated if such an
- // event occurs.
- assert(DIC->getNumOperands() >= 10 &&
- SPs == DIC->getOperand(9) &&
- "DICompileUnits is expected to store Subprograms in operand "
- "9.");
- DIC->replaceOperandWith(9, MDNode::get(C, LiveSubprograms));
+ DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms)));
Changed = true;
}
if (GlobalVariableChange) {
- // Make sure that 10 is still the index of global variables. This is to
- // make sure that an assert is hit if the location of the subprogram array
- // changes. This is just to make sure that this index is updated if such
- // an event occurs.
- assert(DIC->getNumOperands() >= 11 &&
- GVs == DIC->getOperand(10) &&
- "DICompileUnits is expected to store Global Variables in operand "
- "10.");
- DIC->replaceOperandWith(10, MDNode::get(C, LiveGlobalVariables));
+ DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables)));
Changed = true;
}
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index ab4dc1c..d4b252b 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -7,16 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef INSTCOMBINE_INSTCOMBINE_H
-#define INSTCOMBINE_INSTCOMBINE_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
+#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
#include "InstCombineWorklist.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
@@ -25,6 +27,7 @@
namespace llvm {
class CallSite;
class DataLayout;
+class DominatorTree;
class TargetLibraryInfo;
class DbgDeclareInst;
class MemIntrinsic;
@@ -71,14 +74,20 @@ static inline Constant *SubOne(Constant *C) {
class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
: public IRBuilderDefaultInserter<true> {
InstCombineWorklist &Worklist;
+ AssumptionTracker *AT;
public:
- InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+ InstCombineIRInserter(InstCombineWorklist &WL, AssumptionTracker *AT)
+ : Worklist(WL), AT(AT) {}
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
BasicBlock::iterator InsertPt) const {
IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
Worklist.Add(I);
+
+ using namespace llvm::PatternMatch;
+ if (match(I, m_Intrinsic<Intrinsic::assume>()))
+ AT->registerAssumption(cast<CallInst>(I));
}
};
@@ -86,8 +95,10 @@ public:
class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction *> {
+ AssumptionTracker *AT;
const DataLayout *DL;
TargetLibraryInfo *TLI;
+ DominatorTree *DT; // not required
bool MadeIRChange;
LibCallSimplifier *Simplifier;
bool MinimizeSize;
@@ -114,7 +125,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ AssumptionTracker *getAssumptionTracker() const { return AT; }
+
const DataLayout *getDataLayout() const { return DL; }
+
+ DominatorTree *getDominatorTree() const { return DT; }
TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
@@ -148,10 +163,12 @@ public:
Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *visitAnd(BinaryOperator &I);
- Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI);
Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
Value *B, Value *C);
+ Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A,
+ Value *B, Value *C);
Instruction *visitOr(BinaryOperator &I);
Instruction *visitXor(BinaryOperator &I);
Instruction *visitShl(BinaryOperator &I);
@@ -172,6 +189,10 @@ public:
ConstantInt *DivRHS);
Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
+ Instruction *FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1, ConstantInt *CI2);
+ Instruction *FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1, ConstantInt *CI2);
Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
@@ -213,6 +234,7 @@ public:
Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitReturnInst(ReturnInst &RI);
Instruction *visitInsertValueInst(InsertValueInst &IV);
Instruction *visitInsertElementInst(InsertElementInst &IE);
Instruction *visitExtractElementInst(ExtractElementInst &EI);
@@ -246,8 +268,10 @@ private:
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
- bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
- bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
+ bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
+ bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
+ bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
@@ -316,16 +340,19 @@ public:
}
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- unsigned Depth = 0) const {
- return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth);
+ unsigned Depth = 0, Instruction *CxtI = nullptr) const {
+ return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth,
+ AT, CxtI, DT);
}
bool MaskedValueIsZero(Value *V, const APInt &Mask,
- unsigned Depth = 0) const {
- return llvm::MaskedValueIsZero(V, Mask, DL, Depth);
+ unsigned Depth = 0,
+ Instruction *CxtI = nullptr) const {
+ return llvm::MaskedValueIsZero(V, Mask, DL, Depth, AT, CxtI, DT);
}
- unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
- return llvm::ComputeNumSignBits(Op, DL, Depth);
+ unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0,
+ Instruction *CxtI = nullptr) const {
+ return llvm::ComputeNumSignBits(Op, DL, Depth, AT, CxtI, DT);
}
private:
@@ -343,7 +370,8 @@ private:
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth);
+ APInt &KnownOne, unsigned Depth,
+ Instruction *CxtI = nullptr);
bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 99f0f1f..902b640 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -32,7 +32,7 @@ namespace {
///
class FAddendCoef {
public:
- // The constructor has to initialize a APFloat, which is uncessary for
+ // The constructor has to initialize a APFloat, which is unnecessary for
// most addends which have coefficient either 1 or -1. So, the constructor
// is expensive. In order to avoid the cost of the constructor, we should
// reuse some instances whenever possible. The pre-created instances
@@ -895,7 +895,8 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
/// TODO: Handle this for Vectors.
-bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
+ Instruction *CxtI) {
// There are different heuristics we can use for this. Here are some simple
// ones.
@@ -913,18 +914,19 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
//
// Since the carry into the most significant position is always equal to
// the carry out of the addition, there is no signed overflow.
- if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+ if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, CxtI) > 1)
return true;
if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) {
int BitWidth = IT->getBitWidth();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
// Addition of two 2's compliment numbers having opposite signs will never
// overflow.
@@ -943,19 +945,69 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
/// WillNotOverflowUnsignedAdd - Return true if we can prove that:
/// (zext (add LHS, RHS)) === (add (zext LHS), (zext RHS))
-bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) {
+bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS,
+ Instruction *CxtI) {
// There are different heuristics we can use for this. Here is a simple one.
// If the sign bit of LHS and that of RHS are both zero, no unsigned wrap.
bool LHSKnownNonNegative, LHSKnownNegative;
bool RHSKnownNonNegative, RHSKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0);
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0, AT, CxtI, DT);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0, AT, CxtI, DT);
if (LHSKnownNonNegative && RHSKnownNonNegative)
return true;
return false;
}
+/// \brief Return true if we can prove that:
+/// (sub LHS, RHS) === (sub nsw LHS, RHS)
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+/// TODO: Handle this for Vectors.
+bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
+ Instruction *CxtI) {
+ // If LHS and RHS each have at least two sign bits, the subtraction
+ // cannot overflow.
+ if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ return true;
+
+ if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) {
+ unsigned BitWidth = IT->getBitWidth();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+
+ // Subtraction of two 2's compliment numbers having identical signs will
+ // never overflow.
+ if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) ||
+ (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1]))
+ return true;
+
+ // TODO: implement logic similar to checkRippleForAdd
+ }
+ return false;
+}
+
+/// \brief Return true if we can prove that:
+/// (sub LHS, RHS) === (sub nuw LHS, RHS)
+bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
+ Instruction *CxtI) {
+ // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ bool RHSKnownNonNegative, RHSKnownNegative;
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0, AT, CxtI, DT);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0, AT, CxtI, DT);
+ if (LHSKnownNegative && RHSKnownNonNegative)
+ return true;
+
+ return false;
+}
+
// Checks if any operand is negative and we can convert add to sub.
// This function checks for following negative patterns
// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
@@ -1025,7 +1077,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL))
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// (A*B)+(A*C) -> A*(B+C) etc
@@ -1064,7 +1116,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (ExtendAmt) {
APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
- if (!MaskedValueIsZero(XorLHS, Mask))
+ if (!MaskedValueIsZero(XorLHS, Mask, 0, &I))
ExtendAmt = 0;
}
@@ -1080,7 +1132,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
IntegerType *IT = cast<IntegerType>(I.getType());
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I);
if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
@@ -1133,11 +1185,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &I);
if (LHSKnownZero != 0) {
APInt RHSKnownOne(IT->getBitWidth(), 0);
APInt RHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &I);
// No bits in common -> bitwise or.
if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
@@ -1215,7 +1267,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
// Insert the new, smaller add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1231,7 +1283,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0))) {
+ RHSConv->getOperand(0), &I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
@@ -1240,7 +1292,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- // Check for (x & y) + (x ^ y)
+ // (add (xor A, B) (and A, B)) --> (or A, B)
{
Value *A = nullptr, *B = nullptr;
if (match(RHS, m_Xor(m_Value(A), m_Value(B))) &&
@@ -1254,14 +1306,36 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateOr(A, B);
}
+ // (add (or A, B) (and A, B)) --> (add A, B)
+ {
+ Value *A = nullptr, *B = nullptr;
+ if (match(RHS, m_Or(m_Value(A), m_Value(B))) &&
+ (match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(LHS, m_And(m_Specific(B), m_Specific(A))))) {
+ auto *New = BinaryOperator::CreateAdd(A, B);
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
+ }
+
+ if (match(LHS, m_Or(m_Value(A), m_Value(B))) &&
+ (match(RHS, m_And(m_Specific(A), m_Specific(B))) ||
+ match(RHS, m_And(m_Specific(B), m_Specific(A))))) {
+ auto *New = BinaryOperator::CreateAdd(A, B);
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
+ }
+ }
+
// TODO(jingyue): Consider WillNotOverflowSignedAdd and
// WillNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS)) {
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS, &I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
@@ -1276,7 +1350,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL))
+ if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL,
+ TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(RHS)) {
@@ -1318,7 +1393,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1334,7 +1409,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0))) {
+ RHSConv->getOperand(0), &I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0),"addconv");
@@ -1356,11 +1431,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
Z2 = dyn_cast<Constant>(B2); B = B1;
} else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
Z1 = dyn_cast<Constant>(B1); B = B2;
- Z2 = dyn_cast<Constant>(A2); A = A1;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
}
-
- if (Z1 && Z2 &&
- (I.hasNoSignedZeros() ||
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
(Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
return SelectInst::Create(C, A, B);
}
@@ -1447,7 +1522,6 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
return Builder->CreateIntCast(Result, Ty, true);
}
-
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -1455,18 +1529,27 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL))
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// (A*B)-(A*C) -> A*(B-C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return ReplaceInstUsesWith(I, V);
- // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
+ // If this is a 'B = x-(-A)', change to B = x+A.
if (Value *V = dyn_castNegVal(Op1)) {
BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
- Res->setHasNoSignedWrap(I.hasNoSignedWrap());
- Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+
+ if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) {
+ assert(BO->getOpcode() == Instruction::Sub &&
+ "Expected a subtraction operator!");
+ if (BO->hasNoSignedWrap() && I.hasNoSignedWrap())
+ Res->setHasNoSignedWrap(true);
+ } else {
+ if (cast<Constant>(Op1)->isNotMinSignedValue() && I.hasNoSignedWrap())
+ Res->setHasNoSignedWrap(true);
+ }
+
return Res;
}
@@ -1511,21 +1594,23 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// -(X >>u 31) -> (X >>s 31)
// -(X >>s 31) -> (X >>u 31)
if (C->isZero()) {
- Value *X; ConstantInt *CI;
+ Value *X;
+ ConstantInt *CI;
if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
// Verify we are shifting out everything but the sign bit.
- CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
return BinaryOperator::CreateAShr(X, CI);
if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
// Verify we are shifting out everything but the sign bit.
- CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
return BinaryOperator::CreateLShr(X, CI);
}
}
- { Value *Y;
+ {
+ Value *Y;
// X-(X+Y) == -Y X-(Y+X) == -Y
if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
@@ -1536,6 +1621,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateNeg(Y);
}
+ // (sub (or A, B) (xor A, B)) --> (and A, B)
+ {
+ Value *A = nullptr, *B = nullptr;
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(Op0, m_Or(m_Specific(A), m_Specific(B))) ||
+ match(Op0, m_Or(m_Specific(B), m_Specific(A)))))
+ return BinaryOperator::CreateAnd(A, B);
+ }
+
+ if (Op0->hasOneUse()) {
+ Value *Y = nullptr;
+ // ((X | Y) - X) --> (~X & Y)
+ if (match(Op0, m_Or(m_Value(Y), m_Specific(Op1))) ||
+ match(Op0, m_Or(m_Specific(Op1), m_Value(Y))))
+ return BinaryOperator::CreateAnd(
+ Y, Builder->CreateNot(Op1, Op1->getName() + ".not"));
+ }
+
if (Op1->hasOneUse()) {
Value *X = nullptr, *Y = nullptr, *Z = nullptr;
Constant *C = nullptr;
@@ -1555,7 +1658,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
- !C->isMinSignedValue())
+ C->isNotMinSignedValue() && !C->isOneValue())
return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
// 0 - (X << Y) -> (-X << Y) when X is freely negatable.
@@ -1595,7 +1698,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Res);
}
- return nullptr;
+ bool Changed = false;
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) {
+ Changed = true;
+ I.setHasNoSignedWrap(true);
+ }
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) {
+ Changed = true;
+ I.setHasNoUnsignedWrap(true);
+ }
+
+ return Changed ? &I : nullptr;
}
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
@@ -1604,7 +1717,8 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL))
+ if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL,
+ TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(Op0))
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b23a606..55ebced 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -355,7 +355,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
- if (MaskedValueIsZero(RHS, Mask))
+ if (MaskedValueIsZero(RHS, Mask, 0, &I))
break;
}
}
@@ -614,7 +614,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
} else if (R1->getType()->isIntegerTy()) {
if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
// As before, model no mask as a trivial mask if it'll let us do an
- // optimisation.
+ // optimization.
R11 = R1;
R12 = Constant::getAllOnesValue(R1->getType());
}
@@ -665,8 +665,8 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
/// foldLogOpOfMaskedICmps:
/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y)
-static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
- llvm::InstCombiner::BuilderTy* Builder) {
+static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+ llvm::InstCombiner::BuilderTy *Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
@@ -697,26 +697,26 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
if (mask & FoldMskICmp_Mask_AllZeroes) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
// -> (icmp eq (A & (B|D)), 0)
- Value* newOr = Builder->CreateOr(B, D);
- Value* newAnd = Builder->CreateAnd(A, newOr);
+ Value *newOr = Builder->CreateOr(B, D);
+ Value *newAnd = Builder->CreateAnd(A, newOr);
// we can't use C as zero, because we might actually handle
// (icmp ne (A & B), B) & (icmp ne (A & D), D)
// with B and D, having a single bit set
- Value* zero = Constant::getNullValue(A->getType());
+ Value *zero = Constant::getNullValue(A->getType());
return Builder->CreateICmp(NEWCC, newAnd, zero);
}
if (mask & FoldMskICmp_BMask_AllOnes) {
// (icmp eq (A & B), B) & (icmp eq (A & D), D)
// -> (icmp eq (A & (B|D)), (B|D))
- Value* newOr = Builder->CreateOr(B, D);
- Value* newAnd = Builder->CreateAnd(A, newOr);
+ Value *newOr = Builder->CreateOr(B, D);
+ Value *newAnd = Builder->CreateAnd(A, newOr);
return Builder->CreateICmp(NEWCC, newAnd, newOr);
}
if (mask & FoldMskICmp_AMask_AllOnes) {
// (icmp eq (A & B), A) & (icmp eq (A & D), A)
// -> (icmp eq (A & (B&D)), A)
- Value* newAnd1 = Builder->CreateAnd(B, D);
- Value* newAnd = Builder->CreateAnd(A, newAnd1);
+ Value *newAnd1 = Builder->CreateAnd(B, D);
+ Value *newAnd = Builder->CreateAnd(A, newAnd1);
return Builder->CreateICmp(NEWCC, newAnd, A);
}
@@ -766,19 +766,17 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// with B and D, having a single bit set
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
if (!CCst) return nullptr;
- if (LHSCC != NEWCC)
- CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
ConstantInt *ECst = dyn_cast<ConstantInt>(E);
if (!ECst) return nullptr;
+ if (LHSCC != NEWCC)
+ CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
if (RHSCC != NEWCC)
- ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
- ConstantInt* MCst = dyn_cast<ConstantInt>(
- ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
- ConstantExpr::getXor(CCst, ECst)) );
+ ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
// if there is a conflict we should actually return a false for the
// whole construct
- if (!MCst->isZero())
- return nullptr;
+ if (((BCst->getValue() & DCst->getValue()) &
+ (CCst->getValue() ^ ECst->getValue())) != 0)
+ return ConstantInt::get(LHS->getType(), !IsAnd);
Value *newOr1 = Builder->CreateOr(B, D);
Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
Value *newAnd = Builder->CreateAnd(A, newOr1);
@@ -930,6 +928,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_ULT:
if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
return Builder->CreateICmpULT(Val, LHSCst);
+ if (LHSCst->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13
+ return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
break; // (X != 13 & X u< 15) -> no change
case ICmpInst::ICMP_SLT:
if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
@@ -1101,7 +1101,6 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
return nullptr;
}
-
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -1109,7 +1108,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyAndInst(Op0, Op1, DL))
+ if (Value *V = SimplifyAndInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// (A|B)&(A|C) -> A|(B&C) etc
@@ -1136,14 +1135,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (!Op0I->hasOneUse()) break;
APInt NotAndRHS(~AndRHSMask);
- if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+ if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) {
// Not masking anything out for the LHS, move to RHS.
Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
Op0RHS->getName()+".masked");
return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
}
if (!isa<Constant>(Op0RHS) &&
- MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+ MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) {
// Not masking anything out for the RHS, move to LHS.
Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
Op0LHS->getName()+".masked");
@@ -1176,7 +1175,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
uint32_t Zeros = AndRHSMask.countLeadingZeros();
APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
- if (MaskedValueIsZero(Op0LHS, Mask)) {
+ if (MaskedValueIsZero(Op0LHS, Mask, 0, &I)) {
Value *NewNeg = Builder->CreateNeg(Op0RHS);
return BinaryOperator::CreateAnd(NewNeg, AndRHS);
}
@@ -1283,13 +1282,58 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
return BinaryOperator::CreateAnd(A, Op0);
+
+ // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
+ if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
+ if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
+ return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C));
+
+ // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
+ if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
+ if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
+ if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
+ return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
+
+ // (A | B) & ((~A) ^ B) -> (A & B)
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateAnd(A, B);
+
+ // ((~A) ^ B) & (A | B) -> (A & B)
+ if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Or(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateAnd(A, B);
}
- if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
- if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+ {
+ ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
+ ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
+ if (LHS && RHS)
if (Value *Res = FoldAndOfICmps(LHS, RHS))
return ReplaceInstUsesWith(I, Res);
+ // TODO: Make this recursive; it's a little tricky because an arbitrary
+ // number of 'and' instructions might have to be created.
+ Value *X, *Y;
+ if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
+ if (auto *Cmp = dyn_cast<ICmpInst>(X))
+ if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+ if (auto *Cmp = dyn_cast<ICmpInst>(Y))
+ if (Value *Res = FoldAndOfICmps(LHS, Cmp))
+ return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+ }
+ if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
+ if (auto *Cmp = dyn_cast<ICmpInst>(X))
+ if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+ if (auto *Cmp = dyn_cast<ICmpInst>(Y))
+ if (Value *Res = FoldAndOfICmps(Cmp, RHS))
+ return ReplaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+ }
+ }
+
// If and'ing two fcmp, try combine them into one.
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
@@ -1329,20 +1373,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
- // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
- if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
- if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
- if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
- SI0->getOperand(1) == SI1->getOperand(1) &&
- (SI0->hasOneUse() || SI1->hasOneUse())) {
- Value *NewOp =
- Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
- SI0->getName());
- return BinaryOperator::Create(SI1->getOpcode(), NewOp,
- SI1->getOperand(1));
- }
- }
-
{
Value *X = nullptr;
bool OpsSwapped = false;
@@ -1554,7 +1584,8 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
}
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
-Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ Instruction *CxtI) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
@@ -1574,13 +1605,15 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *Mask = nullptr;
Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AT, CxtI, DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AT, CxtI, DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
} else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(0)) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(0))) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0),
+ false, 0, AT, CxtI, DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0),
+ false, 0, AT, CxtI, DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
}
@@ -1590,6 +1623,61 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
+ // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
+ // The original condition actually refers to the following two ranges:
+ // [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3]
+ // We can fold these two ranges if:
+ // 1) C1 and C2 is unsigned greater than C3.
+ // 2) The two ranges are separated.
+ // 3) C1 ^ C2 is one-bit mask.
+ // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask.
+ // This implies all values in the two ranges differ by exactly one bit.
+
+ if ((LHSCC == ICmpInst::ICMP_ULT || LHSCC == ICmpInst::ICMP_ULE) &&
+ LHSCC == RHSCC && LHSCst && RHSCst && LHS->hasOneUse() &&
+ RHS->hasOneUse() && LHSCst->getType() == RHSCst->getType() &&
+ LHSCst->getValue() == (RHSCst->getValue())) {
+
+ Value *LAdd = LHS->getOperand(0);
+ Value *RAdd = RHS->getOperand(0);
+
+ Value *LAddOpnd, *RAddOpnd;
+ ConstantInt *LAddCst, *RAddCst;
+ if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddCst))) &&
+ match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddCst))) &&
+ LAddCst->getValue().ugt(LHSCst->getValue()) &&
+ RAddCst->getValue().ugt(LHSCst->getValue())) {
+
+ APInt DiffCst = LAddCst->getValue() ^ RAddCst->getValue();
+ if (LAddOpnd == RAddOpnd && DiffCst.isPowerOf2()) {
+ ConstantInt *MaxAddCst = nullptr;
+ if (LAddCst->getValue().ult(RAddCst->getValue()))
+ MaxAddCst = RAddCst;
+ else
+ MaxAddCst = LAddCst;
+
+ APInt RRangeLow = -RAddCst->getValue();
+ APInt RRangeHigh = RRangeLow + LHSCst->getValue();
+ APInt LRangeLow = -LAddCst->getValue();
+ APInt LRangeHigh = LRangeLow + LHSCst->getValue();
+ APInt LowRangeDiff = RRangeLow ^ LRangeLow;
+ APInt HighRangeDiff = RRangeHigh ^ LRangeHigh;
+ APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow
+ : RRangeLow - LRangeLow;
+
+ if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff &&
+ RangeDiff.ugt(LHSCst->getValue())) {
+ Value *MaskCst = ConstantInt::get(LAddCst->getType(), ~DiffCst);
+
+ Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskCst);
+ Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddCst);
+ return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSCst));
+ }
+ }
+ }
+ }
+
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (PredicatesFoldable(LHSCC, RHSCC)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
@@ -1906,6 +1994,38 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
return nullptr;
}
+/// \brief This helper function folds:
+///
+/// ((A | B) & C1) ^ (B & C2)
+///
+/// into:
+///
+/// (A & C1) ^ B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+ if (!CI1)
+ return nullptr;
+
+ Value *V1 = nullptr;
+ ConstantInt *CI2 = nullptr;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2))))
+ return nullptr;
+
+ APInt Xor = CI1->getValue() ^ CI2->getValue();
+ if (!Xor.isAllOnesValue())
+ return nullptr;
+
+ if (V1 == A || V1 == B) {
+ Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1);
+ return BinaryOperator::CreateXor(NewOp, V1);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -1913,7 +2033,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyOrInst(Op0, Op1, DL))
+ if (Value *V = SimplifyOrInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// (A&B)|(A&C) -> A&(B|C) etc
@@ -1973,7 +2093,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (X^C)|Y -> (X|Y)^C iff Y&C == 0
if (Op0->hasOneUse() &&
match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
- MaskedValueIsZero(Op1, C1->getValue())) {
+ MaskedValueIsZero(Op1, C1->getValue(), 0, &I)) {
Value *NOr = Builder->CreateOr(A, Op1);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr, C1);
@@ -1982,12 +2102,32 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// Y|(X^C) -> (X|Y)^C iff Y&C == 0
if (Op1->hasOneUse() &&
match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
- MaskedValueIsZero(Op0, C1->getValue())) {
+ MaskedValueIsZero(Op0, C1->getValue(), 0, &I)) {
Value *NOr = Builder->CreateOr(A, Op0);
NOr->takeName(Op0);
return BinaryOperator::CreateXor(NOr, C1);
}
+ // ((~A & B) | A) -> (A | B)
+ if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Specific(A)))
+ return BinaryOperator::CreateOr(A, B);
+
+ // ((A & B) | ~A) -> (~A | B)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_Specific(A))))
+ return BinaryOperator::CreateOr(Builder->CreateNot(A), B);
+
+ // (A & (~B)) | (A ^ B) -> (A ^ B)
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Xor(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateXor(A, B);
+
+ // (A ^ B) | ( A & (~B)) -> (A ^ B)
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))
+ return BinaryOperator::CreateXor(A, B);
+
// (A & C)|(B & D)
Value *C = nullptr, *D = nullptr;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
@@ -2000,14 +2140,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
// iff (C1&C2) == 0 and (N&~C1) == 0
if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
- ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) || // (V|N)
- (V2 == B && MaskedValueIsZero(V1, ~C1->getValue())))) // (N|V)
+ ((V1 == B &&
+ MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N)
+ (V2 == B &&
+ MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V)
return BinaryOperator::CreateAnd(A,
Builder->getInt(C1->getValue()|C2->getValue()));
// Or commutes, try both ways.
if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
- ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) || // (V|N)
- (V2 == A && MaskedValueIsZero(V1, ~C2->getValue())))) // (N|V)
+ ((V1 == A &&
+ MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N)
+ (V2 == A &&
+ MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V)
return BinaryOperator::CreateAnd(B,
Builder->getInt(C1->getValue()|C2->getValue()));
@@ -2068,20 +2212,35 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
if (Ret) return Ret;
}
+ // ((A^B)&1)|(B&-2) -> (A&1) ^ B
+ if (match(A, m_Xor(m_Value(V1), m_Specific(B))) ||
+ match(A, m_Xor(m_Specific(B), m_Value(V1)))) {
+ Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A^B)&1) -> (A&1) ^ B
+ if (match(B, m_Xor(m_Specific(A), m_Value(V1))) ||
+ match(B, m_Xor(m_Value(V1), m_Specific(A)))) {
+ Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D);
+ if (Ret) return Ret;
+ }
}
- // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
- if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
- if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
- if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
- SI0->getOperand(1) == SI1->getOperand(1) &&
- (SI0->hasOneUse() || SI1->hasOneUse())) {
- Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
- SI0->getName());
- return BinaryOperator::Create(SI1->getOpcode(), NewOp,
- SI1->getOperand(1));
- }
- }
+ // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
+ if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
+ if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
+ return BinaryOperator::CreateOr(Op0, C);
+
+ // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
+ if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
+ if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
+ if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
+ return BinaryOperator::CreateOr(Op1, C);
+
+ // ((B | C) & A) | B -> B | (A & C)
+ if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
+ return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C));
// (~A | ~B) == (~(A & B)) - De Morgan's Law
if (Value *Op0NotVal = dyn_castNotVal(Op0))
@@ -2133,12 +2292,22 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(Not, Op0);
}
+ // (A & B) | ((~A) ^ B) -> (~A ^ B)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
+
+ // ((~A) ^ B) | (A & B) -> (~A ^ B)
+ if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_And(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
+
if (SwappedForXor)
std::swap(Op0, Op1);
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
- if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS, &I))
return ReplaceInstUsesWith(I, Res);
// (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y)
@@ -2169,7 +2338,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// cast is otherwise not optimizable. This happens for vector sexts.
if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
- if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS, &I))
return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
// If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
@@ -2225,7 +2394,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyXorInst(Op0, Op1, DL))
+ if (Value *V = SimplifyXorInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// (A&B)^(A&C) -> A&(B^C) etc
@@ -2327,7 +2496,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
} else if (Op0I->getOpcode() == Instruction::Or) {
// (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
- if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+ if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue(),
+ 0, &I)) {
Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
// Anything in both C1 and C2 is known to be zero, remove it from
// NewRHS.
@@ -2418,18 +2588,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
- if (Op0I && Op1I && Op0I->isShift() &&
- Op0I->getOpcode() == Op1I->getOpcode() &&
- Op0I->getOperand(1) == Op1I->getOperand(1) &&
- (Op0I->hasOneUse() || Op1I->hasOneUse())) {
- Value *NewOp =
- Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
- Op0I->getName());
- return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
- Op1I->getOperand(1));
- }
-
if (Op0I && Op1I) {
Value *A, *B, *C, *D;
// (A & B)^(A | B) -> A ^ B
@@ -2444,8 +2602,62 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if ((A == C && B == D) || (A == D && B == C))
return BinaryOperator::CreateXor(A, B);
}
+ // (A | ~B) ^ (~A | B) -> A ^ B
+ if (match(Op0I, m_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1I, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (~A | B) ^ (A | ~B) -> A ^ B
+ if (match(Op0I, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1I, m_Or(m_Specific(A), m_Not(m_Specific(B))))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A & ~B) ^ (~A & B) -> A ^ B
+ if (match(Op0I, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1I, m_And(m_Not(m_Specific(A)), m_Specific(B)))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (~A & B) ^ (A & ~B) -> A ^ B
+ if (match(Op0I, m_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1I, m_And(m_Specific(A), m_Not(m_Specific(B))))) {
+ return BinaryOperator::CreateXor(A, B);
+ }
+ // (A ^ C)^(A | B) -> ((~A) & B) ^ C
+ if (match(Op0I, m_Xor(m_Value(D), m_Value(C))) &&
+ match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (D == A)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(A), B), C);
+ if (D == B)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(B), A), C);
+ }
+ // (A | B)^(A ^ C) -> ((~A) & B) ^ C
+ if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Xor(m_Value(D), m_Value(C)))) {
+ if (D == A)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(A), B), C);
+ if (D == B)
+ return BinaryOperator::CreateXor(
+ Builder->CreateAnd(Builder->CreateNot(B), A), C);
+ }
+ // (A & B) ^ (A ^ B) -> (A | B)
+ if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_Xor(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateOr(A, B);
+ // (A ^ B) ^ (A & B) -> (A | B)
+ if (match(Op0I, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1I, m_And(m_Specific(A), m_Specific(B))))
+ return BinaryOperator::CreateOr(A, B);
}
+ Value *A = nullptr, *B = nullptr;
+ // (A & ~B) ^ (~A) -> ~(A & B)
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Not(m_Specific(A))))
+ return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
+
// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 658178d..87e49a1 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -58,8 +59,8 @@ static Type *reduceToSingleValueType(Type *T) {
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AT, MI, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AT, MI, DT);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -154,7 +155,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AT, MI, DT);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -322,7 +323,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
unsigned TrailingZeros = KnownOne.countTrailingZeros();
APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
if ((Mask & KnownZero) == Mask)
@@ -340,7 +341,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
unsigned LeadingZeros = KnownOne.countLeadingZeros();
APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
if ((Mask & KnownZero) == Mask)
@@ -355,14 +356,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II);
bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
if (LHSKnownNegative || LHSKnownPositive) {
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II);
bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
if (LHSKnownNegative && RHSKnownNegative) {
@@ -426,7 +427,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// can prove that it will never overflow.
if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedAdd(LHS, RHS)) {
+ if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
Value *Add = Builder->CreateNSWAdd(LHS, RHS);
Add->takeName(&CI);
Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()};
@@ -464,10 +465,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II);
// Get the largest possible values for each operand.
APInt LHSMax = ~LHSKnownZero;
@@ -518,30 +519,131 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
break;
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum: {
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+
+ // fmin(x, x) -> x
+ if (Arg0 == Arg1)
+ return ReplaceInstUsesWith(CI, Arg0);
+
+ const ConstantFP *C0 = dyn_cast<ConstantFP>(Arg0);
+ const ConstantFP *C1 = dyn_cast<ConstantFP>(Arg1);
+
+ // Canonicalize constants into the RHS.
+ if (C0 && !C1) {
+ II->setArgOperand(0, Arg1);
+ II->setArgOperand(1, Arg0);
+ return II;
+ }
+
+ // fmin(x, nan) -> x
+ if (C1 && C1->isNaN())
+ return ReplaceInstUsesWith(CI, Arg0);
+
+ // This is the value because if undef were NaN, we would return the other
+ // value and cannot return a NaN unless both operands are.
+ //
+ // fmin(undef, x) -> x
+ if (isa<UndefValue>(Arg0))
+ return ReplaceInstUsesWith(CI, Arg1);
+
+ // fmin(x, undef) -> x
+ if (isa<UndefValue>(Arg1))
+ return ReplaceInstUsesWith(CI, Arg0);
+
+ Value *X = nullptr;
+ Value *Y = nullptr;
+ if (II->getIntrinsicID() == Intrinsic::minnum) {
+ // fmin(x, fmin(x, y)) -> fmin(x, y)
+ // fmin(y, fmin(x, y)) -> fmin(x, y)
+ if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
+ if (Arg0 == X || Arg0 == Y)
+ return ReplaceInstUsesWith(CI, Arg1);
+ }
+
+ // fmin(fmin(x, y), x) -> fmin(x, y)
+ // fmin(fmin(x, y), y) -> fmin(x, y)
+ if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
+ if (Arg1 == X || Arg1 == Y)
+ return ReplaceInstUsesWith(CI, Arg0);
+ }
+
+ // TODO: fmin(nnan x, inf) -> x
+ // TODO: fmin(nnan ninf x, flt_max) -> x
+ if (C1 && C1->isInfinity()) {
+ // fmin(x, -inf) -> -inf
+ if (C1->isNegative())
+ return ReplaceInstUsesWith(CI, Arg1);
+ }
+ } else {
+ assert(II->getIntrinsicID() == Intrinsic::maxnum);
+ // fmax(x, fmax(x, y)) -> fmax(x, y)
+ // fmax(y, fmax(x, y)) -> fmax(x, y)
+ if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
+ if (Arg0 == X || Arg0 == Y)
+ return ReplaceInstUsesWith(CI, Arg1);
+ }
+
+ // fmax(fmax(x, y), x) -> fmax(x, y)
+ // fmax(fmax(x, y), y) -> fmax(x, y)
+ if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
+ if (Arg1 == X || Arg1 == Y)
+ return ReplaceInstUsesWith(CI, Arg0);
+ }
+
+ // TODO: fmax(nnan x, -inf) -> x
+ // TODO: fmax(nnan ninf x, -flt_max) -> x
+ if (C1 && C1->isInfinity()) {
+ // fmax(x, inf) -> inf
+ if (!C1->isNegative())
+ return ReplaceInstUsesWith(CI, Arg1);
+ }
+ }
+ break;
+ }
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16,
+ DL, AT, II, DT) >= 16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
}
break;
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x: {
+ // Turn PPC VSX loads into normal loads.
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr, Twine(""), false, 1);
+ }
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16,
+ DL, AT, II, DT) >= 16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr);
}
break;
+ case Intrinsic::ppc_vsx_stxvw4x:
+ case Intrinsic::ppc_vsx_stxvd2x: {
+ // Turn PPC VSX stores into normal stores.
+ Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
+ }
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16,
+ DL, AT, II, DT) >= 16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
@@ -680,7 +782,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
CI,
Builder->CreateShuffleVector(
Vec, Undef, ConstantDataVector::get(
- II->getContext(), ArrayRef<uint32_t>(Mask))));
+ II->getContext(), makeArrayRef(Mask))));
} else if (auto Source =
dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
@@ -886,7 +988,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AT, II, DT);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -994,6 +1096,55 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return EraseInstFromFunction(CI);
break;
}
+ case Intrinsic::assume: {
+ // Canonicalize assume(a && b) -> assume(a); assume(b);
+ // Note: New assumption intrinsics created here are registered by
+ // the InstCombineIRInserter object.
+ Value *IIOperand = II->getArgOperand(0), *A, *B,
+ *AssumeIntrinsic = II->getCalledValue();
+ if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
+ Builder->CreateCall(AssumeIntrinsic, A, II->getName());
+ Builder->CreateCall(AssumeIntrinsic, B, II->getName());
+ return EraseInstFromFunction(*II);
+ }
+ // assume(!(a || b)) -> assume(!a); assume(!b);
+ if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
+ Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
+ II->getName());
+ Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
+ II->getName());
+ return EraseInstFromFunction(*II);
+ }
+
+ // assume( (load addr) != null ) -> add 'nonnull' metadata to load
+ // (if assume is valid at the load)
+ if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
+ Value *LHS = ICmp->getOperand(0);
+ Value *RHS = ICmp->getOperand(1);
+ if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
+ isa<LoadInst>(LHS) &&
+ isa<Constant>(RHS) &&
+ RHS->getType()->isPointerTy() &&
+ cast<Constant>(RHS)->isNullValue()) {
+ LoadInst* LI = cast<LoadInst>(LHS);
+ if (isValidAssumeForContext(II, LI, DL, DT)) {
+ MDNode* MD = MDNode::get(II->getContext(), ArrayRef<Value*>());
+ LI->setMetadata(LLVMContext::MD_nonnull, MD);
+ return EraseInstFromFunction(*II);
+ }
+ }
+ // TODO: apply nonnull return attributes to calls and invokes
+ // TODO: apply range metadata for range check patterns?
+ }
+ // If there is a dominating assume with the same condition as this one,
+ // then this one is redundant, and should be removed.
+ APInt KnownZero(1, 0), KnownOne(1, 0);
+ computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
+ if (KnownOne.isAllOnesValue())
+ return EraseInstFromFunction(*II);
+
+ break;
+ }
}
return visitCallSite(II);
@@ -1253,7 +1404,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!Caller->use_empty() &&
// void -> non-void is handled specially
!NewRetTy->isVoidTy())
- return false; // Cannot transform this return value.
+ return false; // Cannot transform this return value.
}
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
@@ -1472,8 +1623,14 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!Caller->use_empty())
ReplaceInstUsesWith(*Caller, NV);
- else if (Caller->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(Caller, NV);
+ else if (Caller->hasValueHandle()) {
+ if (OldRetTy == NV->getType())
+ ValueHandleBase::ValueIsRAUWd(Caller, NV);
+ else
+ // We cannot call ValueIsRAUWd with a different type, and the
+ // actual tracked value will disappear.
+ ValueHandleBase::ValueIsDeleted(Caller);
+ }
EraseInstFromFunction(*Caller);
return true;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index ff083d7..aba77bb 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -335,7 +335,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
///
/// This function works on both vectors and scalars.
///
-static bool CanEvaluateTruncated(Value *V, Type *Ty) {
+static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
+ Instruction *CxtI) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
return true;
@@ -364,8 +365,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
case Instruction::Or:
case Instruction::Xor:
// These operators can all arbitrarily be extended or truncated.
- return CanEvaluateTruncated(I->getOperand(0), Ty) &&
- CanEvaluateTruncated(I->getOperand(1), Ty);
+ return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
case Instruction::UDiv:
case Instruction::URem: {
@@ -374,10 +375,10 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
uint32_t BitWidth = Ty->getScalarSizeInBits();
if (BitWidth < OrigBitWidth) {
APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
- if (MaskedValueIsZero(I->getOperand(0), Mask) &&
- MaskedValueIsZero(I->getOperand(1), Mask)) {
- return CanEvaluateTruncated(I->getOperand(0), Ty) &&
- CanEvaluateTruncated(I->getOperand(1), Ty);
+ if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
+ IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+ CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
}
}
break;
@@ -388,7 +389,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint32_t BitWidth = Ty->getScalarSizeInBits();
if (CI->getLimitedValue(BitWidth) < BitWidth)
- return CanEvaluateTruncated(I->getOperand(0), Ty);
+ return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
break;
case Instruction::LShr:
@@ -398,10 +399,10 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
uint32_t BitWidth = Ty->getScalarSizeInBits();
- if (MaskedValueIsZero(I->getOperand(0),
- APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ if (IC.MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) &&
CI->getLimitedValue(BitWidth) < BitWidth) {
- return CanEvaluateTruncated(I->getOperand(0), Ty);
+ return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
}
}
break;
@@ -415,8 +416,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
- CanEvaluateTruncated(SI->getFalseValue(), Ty);
+ return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
+ CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
}
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -424,7 +425,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+ if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty, IC, CxtI))
return false;
return true;
}
@@ -453,7 +454,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateTruncated(Src, DestTy)) {
+ CanEvaluateTruncated(Src, DestTy, *this, &CI)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
@@ -553,7 +554,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
@@ -601,8 +602,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS);
- computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS);
+ computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI);
+ computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI);
if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
APInt KnownBits = KnownZeroLHS | KnownOneLHS;
@@ -651,7 +652,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
/// clear the top bits anyway, doing this has no extra cost.
///
/// This function works on both vectors and scalars.
-static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
+static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
+ InstCombiner &IC, Instruction *CxtI) {
BitsToClear = 0;
if (isa<Constant>(V))
return true;
@@ -680,8 +682,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
- !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
+ !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
return false;
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
@@ -695,8 +697,9 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// We use MaskedValueIsZero here for generality, but the case we care
// about the most is constant RHS.
unsigned VSize = V->getType()->getScalarSizeInBits();
- if (MaskedValueIsZero(I->getOperand(1),
- APInt::getHighBitsSet(VSize, BitsToClear)))
+ if (IC.MaskedValueIsZero(I->getOperand(1),
+ APInt::getHighBitsSet(VSize, BitsToClear),
+ 0, CxtI))
return true;
}
@@ -707,7 +710,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// We can promote shl(x, cst) if we can promote x. Since shl overwrites the
// upper bits we can reduce BitsToClear by the shift amount.
if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
uint64_t ShiftAmt = Amt->getZExtValue();
BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
@@ -718,7 +721,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+ if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
return false;
BitsToClear += Amt->getZExtValue();
if (BitsToClear > V->getType()->getScalarSizeInBits())
@@ -728,8 +731,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// Cannot promote variable LSHR.
return false;
case Instruction::Select:
- if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
- !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+ if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
+ !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear are
// known zero in the disagreeing side.
Tmp != BitsToClear)
@@ -741,10 +744,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+ if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
return false;
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+ if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
// TODO: If important, we could handle the case when the BitsToClear
// are known zero in the disagreeing input.
Tmp != BitsToClear)
@@ -781,7 +784,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// strange.
unsigned BitsToClear;
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ CanEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
@@ -796,8 +799,10 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// If the high bits are already filled with zeros, just replace this
// cast with the result.
- if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
- DestBitSize-SrcBitsKept)))
+ if (MaskedValueIsZero(Res,
+ APInt::getHighBitsSet(DestBitSize,
+ DestBitSize-SrcBitsKept),
+ 0, &CI))
return ReplaceInstUsesWith(CI, Res);
// We need to emit an AND to clear the high bits.
@@ -895,6 +900,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
ICmpInst::Predicate Pred = ICI->getPredicate();
+ // Don't bother if Op1 isn't of vector or integer type.
+ if (!Op1->getType()->isIntOrIntVectorTy())
+ return nullptr;
+
if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
// (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
// (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
@@ -921,7 +930,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
unsigned BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Op0, KnownZero, KnownOne);
+ computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) {
@@ -1072,7 +1081,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// If the high bits are already filled with sign bit, just replace this
// cast with the result.
- if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+ if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
return ReplaceInstUsesWith(CI, Res);
// We need to emit a shl + ashr to do the sign extend.
@@ -1264,10 +1273,12 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType());
else if (RHSWidth <= SrcWidth)
RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType());
- Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig);
- if (Instruction *RI = dyn_cast<Instruction>(ExactResult))
- RI->copyFastMathFlags(OpI);
- return CastInst::CreateFPCast(ExactResult, CI.getType());
+ if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) {
+ Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig);
+ if (Instruction *RI = dyn_cast<Instruction>(ExactResult))
+ RI->copyFastMathFlags(OpI);
+ return CastInst::CreateFPCast(ExactResult, CI.getType());
+ }
}
// (fptrunc (fneg x)) -> (fneg (fptrunc x))
@@ -1312,42 +1323,6 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
- // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
- // Note that we restrict this transformation based on
- // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because
- // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the
- // single-precision intrinsic can be expanded in the backend.
- CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
- if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
- (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) ||
- Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) &&
- Call->getNumArgOperands() == 1 &&
- Call->hasOneUse()) {
- CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
- if (Arg && Arg->getOpcode() == Instruction::FPExt &&
- CI.getType()->isFloatTy() &&
- Call->getType()->isDoubleTy() &&
- Arg->getType()->isDoubleTy() &&
- Arg->getOperand(0)->getType()->isFloatTy()) {
- Function *Callee = Call->getCalledFunction();
- Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ?
- Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) :
- M->getOrInsertFunction("sqrtf", Callee->getAttributes(),
- Builder->getFloatTy(), Builder->getFloatTy(),
- NULL);
- CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
- "sqrtfcall");
- ret->setAttributes(Callee->getAttributes());
-
-
- // Remove the old Call. With -fmath-errno, it won't get marked readnone.
- ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
- EraseInstFromFunction(*Call);
- return ret;
- }
- }
-
return nullptr;
}
@@ -1909,9 +1884,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
- // If the destination pointer element type is not the the same as the source's
- // do the addrspacecast to the same type, and then the bitcast in the new
- // address space. This allows the cast to be exposed to other transforms.
+ // If the destination pointer element type is not the same as the source's
+ // first do a bitcast to the destination type, and then the addrspacecast.
+ // This allows the cast to be exposed to other transforms.
Value *Src = CI.getOperand(0);
PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 5e71c5c..399f1c3 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -740,21 +740,6 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
Value *X, ConstantInt *CI,
ICmpInst::Predicate Pred) {
- // If we have X+0, exit early (simplifying logic below) and let it get folded
- // elsewhere. icmp X+0, X -> icmp X, X
- if (CI->isZero()) {
- bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
- return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
- }
-
- // (X+4) == X -> false.
- if (Pred == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(ICI, Builder->getFalse());
-
- // (X+4) != X -> true.
- if (Pred == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(ICI, Builder->getTrue());
-
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similarly for all other "or equals"
// operators.
@@ -1044,6 +1029,111 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
return nullptr;
}
+/// FoldICmpCstShrCst - Handle "(icmp eq/ne (ashr/lshr const2, A), const1)" ->
+/// (icmp eq/ne A, Log2(const2/const1)) ->
+/// (icmp eq/ne A, Log2(const2) - Log2(const1)).
+Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1,
+ ConstantInt *CI2) {
+ assert(I.isEquality() && "Cannot fold icmp gt/lt");
+
+ auto getConstant = [&I, this](bool IsTrue) {
+ if (I.getPredicate() == I.ICMP_NE)
+ IsTrue = !IsTrue;
+ return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
+ };
+
+ auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
+ if (I.getPredicate() == I.ICMP_NE)
+ Pred = CmpInst::getInversePredicate(Pred);
+ return new ICmpInst(Pred, LHS, RHS);
+ };
+
+ APInt AP1 = CI1->getValue();
+ APInt AP2 = CI2->getValue();
+
+ // Don't bother doing any work for cases which InstSimplify handles.
+ if (AP2 == 0)
+ return nullptr;
+ bool IsAShr = isa<AShrOperator>(Op);
+ if (IsAShr) {
+ if (AP2.isAllOnesValue())
+ return nullptr;
+ if (AP2.isNegative() != AP1.isNegative())
+ return nullptr;
+ if (AP2.sgt(AP1))
+ return nullptr;
+ }
+
+ if (!AP1)
+ // 'A' must be large enough to shift out the highest set bit.
+ return getICmp(I.ICMP_UGT, A,
+ ConstantInt::get(A->getType(), AP2.logBase2()));
+
+ if (AP1 == AP2)
+ return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+
+ // Get the distance between the highest bit that's set.
+ int Shift;
+ // Both the constants are negative, take their positive to calculate log.
+ if (IsAShr && AP1.isNegative())
+ // Get the ones' complement of AP2 and AP1 when computing the distance.
+ Shift = (~AP2).logBase2() - (~AP1).logBase2();
+ else
+ Shift = AP2.logBase2() - AP1.logBase2();
+
+ if (Shift > 0) {
+ if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift))
+ return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+ }
+ // Shifting const2 will never be equal to const1.
+ return getConstant(false);
+}
+
+/// FoldICmpCstShlCst - Handle "(icmp eq/ne (shl const2, A), const1)" ->
+/// (icmp eq/ne A, TrailingZeros(const1) - TrailingZeros(const2)).
+Instruction *InstCombiner::FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A,
+ ConstantInt *CI1,
+ ConstantInt *CI2) {
+ assert(I.isEquality() && "Cannot fold icmp gt/lt");
+
+ auto getConstant = [&I, this](bool IsTrue) {
+ if (I.getPredicate() == I.ICMP_NE)
+ IsTrue = !IsTrue;
+ return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
+ };
+
+ auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
+ if (I.getPredicate() == I.ICMP_NE)
+ Pred = CmpInst::getInversePredicate(Pred);
+ return new ICmpInst(Pred, LHS, RHS);
+ };
+
+ APInt AP1 = CI1->getValue();
+ APInt AP2 = CI2->getValue();
+
+ // Don't bother doing any work for cases which InstSimplify handles.
+ if (AP2 == 0)
+ return nullptr;
+
+ unsigned AP2TrailingZeros = AP2.countTrailingZeros();
+
+ if (!AP1 && AP2TrailingZeros != 0)
+ return getICmp(I.ICMP_UGE, A,
+ ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
+
+ if (AP1 == AP2)
+ return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+
+ // Get the distance between the lowest bits that are set.
+ int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
+
+ if (Shift > 0 && AP2.shl(Shift) == AP1)
+ return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+
+ // Shifting const2 will never be equal to const1.
+ return getConstant(false);
+}
/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
///
@@ -1060,7 +1150,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
- computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI);
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
@@ -1282,6 +1372,48 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return &ICI;
}
+ // (icmp pred (and (or (lshr X, Y), X), 1), 0) -->
+ // (icmp pred (and X, (or (shl 1, Y), 1), 0))
+ //
+ // iff pred isn't signed
+ {
+ Value *X, *Y, *LShr;
+ if (!ICI.isSigned() && RHSV == 0) {
+ if (match(LHSI->getOperand(1), m_One())) {
+ Constant *One = cast<Constant>(LHSI->getOperand(1));
+ Value *Or = LHSI->getOperand(0);
+ if (match(Or, m_Or(m_Value(LShr), m_Value(X))) &&
+ match(LShr, m_LShr(m_Specific(X), m_Value(Y)))) {
+ unsigned UsesRemoved = 0;
+ if (LHSI->hasOneUse())
+ ++UsesRemoved;
+ if (Or->hasOneUse())
+ ++UsesRemoved;
+ if (LShr->hasOneUse())
+ ++UsesRemoved;
+ Value *NewOr = nullptr;
+ // Compute X & ((1 << Y) | 1)
+ if (auto *C = dyn_cast<Constant>(Y)) {
+ if (UsesRemoved >= 1)
+ NewOr =
+ ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
+ } else {
+ if (UsesRemoved >= 3)
+ NewOr = Builder->CreateOr(Builder->CreateShl(One, Y,
+ LShr->getName(),
+ /*HasNUW=*/true),
+ One, Or->getName());
+ }
+ if (NewOr) {
+ Value *NewAnd = Builder->CreateAnd(X, NewOr, LHSI->getName());
+ ICI.setOperand(0, NewAnd);
+ return &ICI;
+ }
+ }
+ }
+ }
+ }
+
// Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any
// bit set in (X & AndCst) will produce a result greater than RHSV.
if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
@@ -1377,16 +1509,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
unsigned RHSLog2 = RHSV.logBase2();
// (1 << X) >= 2147483648 -> X >= 31 -> X == 31
- // (1 << X) > 2147483648 -> X > 31 -> false
- // (1 << X) <= 2147483648 -> X <= 31 -> true
// (1 << X) < 2147483648 -> X < 31 -> X != 31
if (RHSLog2 == TypeBits-1) {
if (Pred == ICmpInst::ICMP_UGE)
Pred = ICmpInst::ICMP_EQ;
- else if (Pred == ICmpInst::ICMP_UGT)
- return ReplaceInstUsesWith(ICI, Builder->getFalse());
- else if (Pred == ICmpInst::ICMP_ULE)
- return ReplaceInstUsesWith(ICI, Builder->getTrue());
else if (Pred == ICmpInst::ICMP_ULT)
Pred = ICmpInst::ICMP_NE;
}
@@ -1421,10 +1547,6 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (RHSVIsPowerOf2)
return new ICmpInst(
Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2()));
-
- return ReplaceInstUsesWith(
- ICI, Pred == ICmpInst::ICMP_EQ ? Builder->getFalse()
- : Builder->getTrue());
}
}
break;
@@ -1932,8 +2054,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// sign-extended; check for that condition. For example, if CI2 is 2^31 and
// the operands of the add are 64 bits wide, we need at least 33 sign bits.
unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
- if (IC.ComputeNumSignBits(A) < NeededSignBits ||
- IC.ComputeNumSignBits(B) < NeededSignBits)
+ if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
+ IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
return nullptr;
// In order to replace the original add with a narrower
@@ -2038,8 +2160,8 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
Instruction *MulInstr = cast<Instruction>(MulVal);
assert(MulInstr->getOpcode() == Instruction::Mul);
- Instruction *LHS = cast<Instruction>(MulInstr->getOperand(0)),
- *RHS = cast<Instruction>(MulInstr->getOperand(1));
+ auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
+ *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
assert(LHS->getOpcode() == Instruction::ZExt);
assert(RHS->getOpcode() == Instruction::ZExt);
Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
@@ -2341,7 +2463,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Changed = true;
}
- if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL))
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// comparing -val or val with non-zero is the same as just comparing val
@@ -2469,6 +2591,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->getInt(CI->getValue()-1));
}
+ if (I.isEquality()) {
+ ConstantInt *CI2;
+ if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) ||
+ match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) {
+ // (icmp eq/ne (ashr/lshr const2, A), const1)
+ if (Instruction *Inst = FoldICmpCstShrCst(I, Op0, A, CI, CI2))
+ return Inst;
+ }
+ if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) {
+ // (icmp eq/ne (shl const2, A), const1)
+ if (Instruction *Inst = FoldICmpCstShlCst(I, Op0, A, CI, CI2))
+ return Inst;
+ }
+ }
+
// If this comparison is a normal comparison, it demands all
// bits, if it is a sign bit comparison, it only demands the sign bit.
bool UnusedBit;
@@ -2878,6 +3015,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (BO1 && BO1->getOpcode() == Instruction::Add)
C = BO1->getOperand(0), D = BO1->getOperand(1);
+ // icmp (X+cst) < 0 --> X < -cst
+ if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero()))
+ if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B))
+ if (!RHSC->isMinValue(/*isSigned=*/true))
+ return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC));
+
// icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
return new ICmpInst(Pred, A == Op1 ? B : A,
@@ -3112,7 +3255,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// and (A & ~B) != 0 --> (A & B) == 0
// if A is a power of 2.
if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+ match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A, false,
+ 0, AT, &I, DT) &&
+ I.isEquality())
return new ICmpInst(I.getInversePredicate(),
Builder->CreateAnd(A, B),
Op1);
@@ -3273,6 +3418,22 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
+ // The 'cmpxchg' instruction returns an aggregate containing the old value and
+ // an i1 which indicates whether or not we successfully did the swap.
+ //
+ // Replace comparisons between the old value and the expected value with the
+ // indicator that 'cmpxchg' returns.
+ //
+ // N.B. This transform is only valid when the 'cmpxchg' is not permitted to
+ // spuriously fail. In those cases, the old value may equal the expected
+ // value but it is possible for the swap to not occur.
+ if (I.getPredicate() == ICmpInst::ICMP_EQ)
+ if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
+ if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
+ if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
+ !ACXI->isWeak())
+ return ExtractValueInst::Create(ACXI, 1);
+
{
Value *X; ConstantInt *Cst;
// icmp X+Cst, X
@@ -3502,7 +3663,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL))
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// Simplify 'fcmp pred X, X'
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c10e92a..f3ac44c 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -268,7 +269,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
- AI.getAlignment(), DL);
+ AI.getAlignment(),
+ DL, AT, &AI, DT);
if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
@@ -290,80 +292,112 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
return visitAllocSite(AI);
}
+/// \brief Helper to combine a load to a new type.
+///
+/// This just does the work of combining a load to a new type. It handles
+/// metadata, etc., and returns the new instruction. The \c NewTy should be the
+/// loaded *value* type. This will convert it to a pointer, cast the operand to
+/// that pointer type, load it, etc.
+///
+/// Note that this will create all of the instructions with whatever insert
+/// point the \c InstCombiner currently is using.
+static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) {
+ Value *Ptr = LI.getPointerOperand();
+ unsigned AS = LI.getPointerAddressSpace();
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ LI.getAllMetadata(MD);
+
+ LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
+ IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
+ LI.getAlignment(), LI.getName());
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a load instruction changing *only its type*.
+ // The only metadata it makes sense to drop is metadata which is invalidated
+ // when the pointer type changes. This should essentially never be the case
+ // in LLVM, but we explicitly switch over only known metadata to be
+ // conservatively correct. If you are adding metadata to LLVM which pertains
+ // to loads, you almost certainly want to add it here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ case LLVMContext::MD_nonnull:
+ // All of these directly apply.
+ NewLoad->setMetadata(ID, N);
+ break;
-/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
-static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
- const DataLayout *DL) {
- User *CI = cast<User>(LI.getOperand(0));
- Value *CastOp = CI->getOperand(0);
+ case LLVMContext::MD_range:
+ // FIXME: It would be nice to propagate this in some way, but the type
+ // conversions make it hard.
+ break;
+ }
+ }
+ return NewLoad;
+}
- PointerType *DestTy = cast<PointerType>(CI->getType());
- Type *DestPTy = DestTy->getElementType();
- if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
-
- // If the address spaces don't match, don't eliminate the cast.
- if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
- return nullptr;
-
- Type *SrcPTy = SrcTy->getElementType();
-
- if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
- DestPTy->isVectorTy()) {
- // If the source is an array, the code below will not succeed. Check to
- // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
- // constants.
- if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
- if (Constant *CSrc = dyn_cast<Constant>(CastOp))
- if (ASrcTy->getNumElements() != 0) {
- Type *IdxTy = DL
- ? DL->getIntPtrType(SrcTy)
- : Type::getInt64Ty(SrcTy->getContext());
- Value *Idx = Constant::getNullValue(IdxTy);
- Value *Idxs[2] = { Idx, Idx };
- CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
- SrcTy = cast<PointerType>(CastOp->getType());
- SrcPTy = SrcTy->getElementType();
- }
-
- if (IC.getDataLayout() &&
- (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
- SrcPTy->isVectorTy()) &&
- // Do not allow turning this into a load of an integer, which is then
- // casted to a pointer, this pessimizes pointer analysis a lot.
- (SrcPTy->isPtrOrPtrVectorTy() ==
- LI.getType()->isPtrOrPtrVectorTy()) &&
- IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
- IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
-
- // Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before the load, cast
- // the result of the loaded value.
- LoadInst *NewLoad =
- IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
- NewLoad->setAlignment(LI.getAlignment());
- NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
- // Now cast the result of the load.
- PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType());
- PointerType *NewTy = dyn_cast<PointerType>(LI.getType());
- if (OldTy && NewTy &&
- OldTy->getAddressSpace() != NewTy->getAddressSpace()) {
- return new AddrSpaceCastInst(NewLoad, LI.getType());
- }
+/// \brief Combine loads to match the type of value their uses after looking
+/// through intervening bitcasts.
+///
+/// The core idea here is that if the result of a load is used in an operation,
+/// we should load the type most conducive to that operation. For example, when
+/// loading an integer and converting that immediately to a pointer, we should
+/// instead directly load a pointer.
+///
+/// However, this routine must never change the width of a load or the number of
+/// loads as that would introduce a semantic change. This combine is expected to
+/// be a semantic no-op which just allows loads to more closely model the types
+/// of their consuming operations.
+///
+/// Currently, we also refuse to change the precise type used for an atomic load
+/// or a volatile load. This is debatable, and might be reasonable to change
+/// later. However, it is risky in case some backend or other part of LLVM is
+/// relying on the exact type loaded to select appropriate atomic operations.
+static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // loads here but it isn't clear that this is important.
+ if (!LI.isSimple())
+ return nullptr;
- return new BitCastInst(NewLoad, LI.getType());
- }
+ if (LI.use_empty())
+ return nullptr;
+
+
+ // Fold away bit casts of the loaded value by loading the desired type.
+ if (LI.hasOneUse())
+ if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy());
+ BC->replaceAllUsesWith(NewLoad);
+ IC.EraseInstFromFunction(*BC);
+ return &LI;
}
- }
+
+ // FIXME: We should also canonicalize loads of vectors when their elements are
+ // cast to other types.
return nullptr;
}
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
+ // Try to canonicalize the loaded type.
+ if (Instruction *Res = combineLoadToOperationType(*this, LI))
+ return Res;
+
// Attempt to improve the alignment.
if (DL) {
unsigned KnownAlign =
- getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL);
+ getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),
+ DL, AT, &LI, DT);
unsigned LoadAlign = LI.getAlignment();
unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
DL->getABITypeAlignment(LI.getType());
@@ -374,11 +408,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
LI.setAlignment(EffectiveLoadAlign);
}
- // load (cast X) --> cast (load X) iff safe.
- if (isa<CastInst>(Op))
- if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
- return Res;
-
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return nullptr;
@@ -388,7 +417,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// separated by a few arithmetic operations.
BasicBlock::iterator BBI = &LI;
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
- return ReplaceInstUsesWith(LI, AvailableVal);
+ return ReplaceInstUsesWith(
+ LI, Builder->CreateBitCast(AvailableVal, LI.getType()));
// load(gep null, ...) -> unreachable
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
@@ -417,12 +447,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
- // Instcombine load (constantexpr_cast global) -> cast (load global)
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
- if (CE->isCast())
- if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
- return Res;
-
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
@@ -473,7 +497,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
User *CI = cast<User>(SI.getOperand(1));
Value *CastOp = CI->getOperand(0);
- Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+ Type *DestPTy = CI->getType()->getPointerElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (!SrcTy) return nullptr;
@@ -518,8 +542,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// If the pointers point into different address spaces don't do the
// transformation.
- if (SrcTy->getAddressSpace() !=
- cast<PointerType>(CI->getType())->getAddressSpace())
+ if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace())
return nullptr;
// If the pointers point to values of different sizes don't do the
@@ -602,7 +625,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (DL) {
unsigned KnownAlign =
getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()),
- DL);
+ DL, AT, &SI, DT);
unsigned StoreAlign = SI.getAlignment();
unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
DL->getABITypeAlignment(Val->getType());
@@ -837,12 +860,13 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
InsertNewInstBefore(NewSI, *BBI);
NewSI->setDebugLoc(OtherStore->getDebugLoc());
- // If the two stores had the same TBAA tag, preserve it.
- if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
- if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
- OtherStore->getMetadata(LLVMContext::MD_tbaa))))
- NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+ // If the two stores had AA tags, merge them.
+ AAMDNodes AATags;
+ SI.getAAMetadata(AATags);
+ if (AATags) {
+ OtherStore->getAAMetadata(AATags, /* Merge = */ true);
+ NewSI->setAAMetadata(AATags);
+ }
// Nuke the old stores.
EraseInstFromFunction(SI);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 6c6e7d8..8c48dce 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -25,7 +25,8 @@ using namespace PatternMatch;
/// simplifyValueKnownNonZero - The specific integer value is used in a context
/// where it is known to be non-zero. If this allows us to simplify the
/// computation, do so and return the new operand, otherwise return null.
-static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
+static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
+ Instruction *CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
@@ -35,22 +36,23 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// ((1 << A) >>u B) --> (1 << (A-B))
// Because V cannot be zero, we know that B is less than A.
- Value *A = nullptr, *B = nullptr, *PowerOf2 = nullptr;
- if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
- m_Value(B))) &&
- // The "1" can be any value known to be a power of 2.
- isKnownToBeAPowerOfTwo(PowerOf2)) {
+ Value *A = nullptr, *B = nullptr, *One = nullptr;
+ if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) &&
+ match(One, m_One())) {
A = IC.Builder->CreateSub(A, B);
- return IC.Builder->CreateShl(PowerOf2, A);
+ return IC.Builder->CreateShl(One, A);
}
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
- if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) {
+ if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0), false,
+ 0, IC.getAssumptionTracker(),
+ CxtI,
+ IC.getDominatorTree())) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
- if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
+ if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) {
I->setOperand(0, V2);
MadeChange = true;
}
@@ -76,25 +78,30 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
/// MultiplyOverflows - True if the multiply can not be expressed in an int
/// this size.
-static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
- uint32_t W = C1->getBitWidth();
- APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
- if (sign) {
- LHSExt = LHSExt.sext(W * 2);
- RHSExt = RHSExt.sext(W * 2);
- } else {
- LHSExt = LHSExt.zext(W * 2);
- RHSExt = RHSExt.zext(W * 2);
- }
+static bool MultiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product,
+ bool IsSigned) {
+ bool Overflow;
+ if (IsSigned)
+ Product = C1.smul_ov(C2, Overflow);
+ else
+ Product = C1.umul_ov(C2, Overflow);
+
+ return Overflow;
+}
- APInt MulExt = LHSExt * RHSExt;
+/// \brief True if C2 is a multiple of C1. Quotient contains C2/C1.
+static bool IsMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
+ bool IsSigned) {
+ assert(C1.getBitWidth() == C2.getBitWidth() &&
+ "Inconsistent width of constants!");
- if (!sign)
- return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+ APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
+ if (IsSigned)
+ APInt::sdivrem(C1, C2, Quotient, Remainder);
+ else
+ APInt::udivrem(C1, C2, Quotient, Remainder);
- APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
- APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
- return MulExt.slt(Min) || MulExt.sgt(Max);
+ return Remainder.isMinValue();
}
/// \brief A helper routine of InstCombiner::visitMul().
@@ -123,7 +130,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyMulInst(Op0, Op1, DL))
+ if (Value *V = SimplifyMulInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -155,8 +162,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (NewCst) {
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
- if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
- if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+
+ if (I.hasNoUnsignedWrap())
+ Shl->setHasNoUnsignedWrap();
+
return Shl;
}
}
@@ -277,9 +286,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
Value *BoolCast = nullptr, *OtherOp = nullptr;
- if (MaskedValueIsZero(Op0, Negative2))
+ if (MaskedValueIsZero(Op0, Negative2, 0, &I))
BoolCast = Op0, OtherOp = Op1;
- else if (MaskedValueIsZero(Op1, Negative2))
+ else if (MaskedValueIsZero(Op1, Negative2, 0, &I))
BoolCast = Op1, OtherOp = Op0;
if (BoolCast) {
@@ -292,40 +301,32 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
-//
-// Detect pattern:
-//
-// log2(Y*0.5)
-//
-// And check for corresponding fast math flags
-//
-
+/// Detect pattern log2(Y * 0.5) with corresponding fast math flags.
static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
-
- if (!Op->hasOneUse())
- return;
-
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op);
- if (!II)
- return;
- if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra())
- return;
- Log2 = II;
-
- Value *OpLog2Of = II->getArgOperand(0);
- if (!OpLog2Of->hasOneUse())
- return;
-
- Instruction *I = dyn_cast<Instruction>(OpLog2Of);
- if (!I)
- return;
- if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
- return;
-
- if (match(I->getOperand(0), m_SpecificFP(0.5)))
- Y = I->getOperand(1);
- else if (match(I->getOperand(1), m_SpecificFP(0.5)))
- Y = I->getOperand(0);
+ if (!Op->hasOneUse())
+ return;
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op);
+ if (!II)
+ return;
+ if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra())
+ return;
+ Log2 = II;
+
+ Value *OpLog2Of = II->getArgOperand(0);
+ if (!OpLog2Of->hasOneUse())
+ return;
+
+ Instruction *I = dyn_cast<Instruction>(OpLog2Of);
+ if (!I)
+ return;
+ if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ return;
+
+ if (match(I->getOperand(0), m_SpecificFP(0.5)))
+ Y = I->getOperand(1);
+ else if (match(I->getOperand(1), m_SpecificFP(0.5)))
+ Y = I->getOperand(0);
}
static bool isFiniteNonZeroFp(Constant *C) {
@@ -440,7 +441,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
- if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL))
+ if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, TLI,
+ DT, AT))
return ReplaceInstUsesWith(I, V);
bool AllowReassociate = I.hasUnsafeAlgebra();
@@ -510,10 +512,15 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // sqrt(X) * sqrt(X) -> X
+ if (AllowReassociate && (Op0 == Op1))
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0))
+ if (II->getIntrinsicID() == Intrinsic::sqrt)
+ return ReplaceInstUsesWith(I, II->getOperand(0));
// Under unsafe algebra do:
// X * log2(0.5*Y) = X*log2(Y) - X
- if (I.hasUnsafeAlgebra()) {
+ if (AllowReassociate) {
Value *OpX = nullptr;
Value *OpY = nullptr;
IntrinsicInst *Log2;
@@ -596,36 +603,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
- // B * (uitofp i1 C) -> select C, B, 0
- if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
- Value *LHS = Op0, *RHS = Op1;
- Value *B, *C;
- if (!match(RHS, m_UIToFP(m_Value(C))))
- std::swap(LHS, RHS);
-
- if (match(RHS, m_UIToFP(m_Value(C))) &&
- C->getType()->getScalarType()->isIntegerTy(1)) {
- B = LHS;
- Value *Zero = ConstantFP::getNegativeZero(B->getType());
- return SelectInst::Create(C, B, Zero);
- }
- }
-
- // A * (1 - uitofp i1 C) -> select C, 0, A
- if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
- Value *LHS = Op0, *RHS = Op1;
- Value *A, *C;
- if (!match(RHS, m_FSub(m_FPOne(), m_UIToFP(m_Value(C)))))
- std::swap(LHS, RHS);
-
- if (match(RHS, m_FSub(m_FPOne(), m_UIToFP(m_Value(C)))) &&
- C->getType()->getScalarType()->isIntegerTy(1)) {
- A = LHS;
- Value *Zero = ConstantFP::getNegativeZero(A->getType());
- return SelectInst::Create(C, Zero, A);
- }
- }
-
if (!isa<Constant>(Op1))
std::swap(Opnd0, Opnd1);
else
@@ -714,7 +691,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
I.setOperand(1, V);
return &I;
}
@@ -724,25 +701,83 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
return &I;
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- // (X / C1) / C2 -> X / (C1*C2)
- if (Instruction *LHS = dyn_cast<Instruction>(Op0))
- if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
- if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
- if (MultiplyOverflows(RHS, LHSRHS,
- I.getOpcode() == Instruction::SDiv))
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
- ConstantExpr::getMul(RHS, LHSRHS));
+ if (Instruction *LHS = dyn_cast<Instruction>(Op0)) {
+ const APInt *C2;
+ if (match(Op1, m_APInt(C2))) {
+ Value *X;
+ const APInt *C1;
+ bool IsSigned = I.getOpcode() == Instruction::SDiv;
+
+ // (X / C1) / C2 -> X / (C1*C2)
+ if ((IsSigned && match(LHS, m_SDiv(m_Value(X), m_APInt(C1)))) ||
+ (!IsSigned && match(LHS, m_UDiv(m_Value(X), m_APInt(C1))))) {
+ APInt Product(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ if (!MultiplyOverflows(*C1, *C2, Product, IsSigned))
+ return BinaryOperator::Create(I.getOpcode(), X,
+ ConstantInt::get(I.getType(), Product));
+ }
+
+ if ((IsSigned && match(LHS, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
+ (!IsSigned && match(LHS, m_NUWMul(m_Value(X), m_APInt(C1))))) {
+ APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+
+ // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
+ if (IsMultiple(*C2, *C1, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ I.getOpcode(), X, ConstantInt::get(X->getType(), Quotient));
+ BO->setIsExact(I.isExact());
+ return BO;
}
- if (!RHS->isZero()) { // avoid X udiv 0
- if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
- if (Instruction *R = FoldOpIntoSelect(I, SI))
- return R;
- if (isa<PHINode>(Op0))
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
+ // (X * C1) / C2 -> X * (C1 / C2) if C1 is a multiple of C2.
+ if (IsMultiple(*C1, *C2, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ Instruction::Mul, X, ConstantInt::get(X->getType(), Quotient));
+ BO->setHasNoUnsignedWrap(
+ !IsSigned &&
+ cast<OverflowingBinaryOperator>(LHS)->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(
+ cast<OverflowingBinaryOperator>(LHS)->hasNoSignedWrap());
+ return BO;
+ }
+ }
+
+ if ((IsSigned && match(LHS, m_NSWShl(m_Value(X), m_APInt(C1))) &&
+ *C1 != C1->getBitWidth() - 1) ||
+ (!IsSigned && match(LHS, m_NUWShl(m_Value(X), m_APInt(C1))))) {
+ APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt C1Shifted = APInt::getOneBitSet(
+ C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
+
+ // (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of C1.
+ if (IsMultiple(*C2, C1Shifted, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ I.getOpcode(), X, ConstantInt::get(X->getType(), Quotient));
+ BO->setIsExact(I.isExact());
+ return BO;
+ }
+
+ // (X << C1) / C2 -> X * (C2 >> C1) if C1 is a multiple of C2.
+ if (IsMultiple(C1Shifted, *C2, Quotient, IsSigned)) {
+ BinaryOperator *BO = BinaryOperator::Create(
+ Instruction::Mul, X, ConstantInt::get(X->getType(), Quotient));
+ BO->setHasNoUnsignedWrap(
+ !IsSigned &&
+ cast<OverflowingBinaryOperator>(LHS)->hasNoUnsignedWrap());
+ BO->setHasNoSignedWrap(
+ cast<OverflowingBinaryOperator>(LHS)->hasNoSignedWrap());
+ return BO;
+ }
+ }
+
+ if (*C2 != 0) { // avoid X udiv 0
+ if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = FoldOpIntoSelect(I, SI))
+ return R;
+ if (isa<PHINode>(Op0))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+ }
}
}
@@ -828,7 +863,8 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
const APInt &C = cast<Constant>(Op1)->getUniqueInteger();
BinaryOperator *LShr = BinaryOperator::CreateLShr(
Op0, ConstantInt::get(Op0->getType(), C.logBase2()));
- if (I.isExact()) LShr->setIsExact();
+ if (I.isExact())
+ LShr->setIsExact();
return LShr;
}
@@ -856,7 +892,8 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
N = IC.Builder->CreateZExt(N, Z->getDestTy());
BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
- if (I.isExact()) LShr->setIsExact();
+ if (I.isExact())
+ LShr->setIsExact();
return LShr;
}
@@ -893,10 +930,10 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
return 0;
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
- if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions))
- if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) {
- Actions.push_back(UDivFoldAction((FoldUDivOperandCb)nullptr, Op1,
- LHSIdx-1));
+ if (size_t LHSIdx =
+ visitUDivOperand(Op0, SI->getOperand(1), I, Actions, Depth))
+ if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions, Depth)) {
+ Actions.push_back(UDivFoldAction(nullptr, Op1, LHSIdx - 1));
return Actions.size();
}
@@ -909,7 +946,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyUDivInst(Op0, Op1, DL))
+ if (Value *V = SimplifyUDivInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// Handle the integer div common cases
@@ -917,19 +954,25 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return Common;
// (x lshr C1) udiv C2 --> x udiv (C2 << C1)
- if (Constant *C2 = dyn_cast<Constant>(Op1)) {
+ {
Value *X;
- Constant *C1;
- if (match(Op0, m_LShr(m_Value(X), m_Constant(C1))))
- return BinaryOperator::CreateUDiv(X, ConstantExpr::getShl(C2, C1));
+ const APInt *C1, *C2;
+ if (match(Op0, m_LShr(m_Value(X), m_APInt(C1))) &&
+ match(Op1, m_APInt(C2))) {
+ bool Overflow;
+ APInt C2ShlC1 = C2->ushl_ov(*C1, Overflow);
+ if (!Overflow)
+ return BinaryOperator::CreateUDiv(
+ X, ConstantInt::get(X->getType(), C2ShlC1));
+ }
}
// (zext A) udiv (zext B) --> zext (A udiv B)
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
- return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div",
- I.isExact()),
- I.getType());
+ return new ZExtInst(
+ Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()),
+ I.getType());
// (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
SmallVector<UDivFoldAction, 6> UDivActions;
@@ -971,7 +1014,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifySDivInst(Op0, Op1, DL))
+ if (Value *V = SimplifySDivInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// Handle the integer div common cases
@@ -1008,8 +1051,8 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// unsigned inputs), turn this into a udiv.
if (I.getType()->isIntegerTy()) {
APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
- if (MaskedValueIsZero(Op0, Mask)) {
- if (MaskedValueIsZero(Op1, Mask)) {
+ if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
+ if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
@@ -1034,8 +1077,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
/// If the conversion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
///
-static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
- Constant *Divisor,
+static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Constant *Divisor,
bool AllowReciprocal) {
if (!isa<ConstantFP>(Divisor)) // TODO: handle vectors.
return nullptr;
@@ -1064,7 +1106,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFDivInst(Op0, Op1, DL))
+ if (Value *V = SimplifyFDivInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(Op0))
@@ -1195,7 +1237,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
I.setOperand(1, V);
return &I;
}
@@ -1229,7 +1271,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyURemInst(Op0, Op1, DL))
+ if (Value *V = SimplifyURemInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (Instruction *common = commonIRemTransforms(I))
@@ -1242,7 +1284,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
I.getType());
// X urem Y -> X and Y-1, where Y is a power of 2,
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) {
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true, 0, AT, &I, DT)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
@@ -1264,28 +1306,29 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifySRemInst(Op0, Op1, DL))
+ if (Value *V = SimplifySRemInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// Handle the integer rem common cases
if (Instruction *Common = commonIRemTransforms(I))
return Common;
- if (Value *RHSNeg = dyn_castNegVal(Op1))
- if (!isa<Constant>(RHSNeg) ||
- (isa<ConstantInt>(RHSNeg) &&
- cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
- // X % -Y -> X % Y
+ {
+ const APInt *Y;
+ // X % -Y -> X % Y
+ if (match(Op1, m_APInt(Y)) && Y->isNegative() && !Y->isMinSignedValue()) {
Worklist.AddValue(I.getOperand(1));
- I.setOperand(1, RHSNeg);
+ I.setOperand(1, ConstantInt::get(I.getType(), -*Y));
return &I;
}
+ }
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
if (I.getType()->isIntegerTy()) {
APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
- if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+ if (MaskedValueIsZero(Op1, Mask, 0, &I) &&
+ MaskedValueIsZero(Op0, Mask, 0, &I)) {
// X srem Y -> X urem Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateURem(Op0, Op1, I.getName());
}
@@ -1338,7 +1381,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFRemInst(Op0, Op1, DL))
+ if (Value *V = SimplifyFRemInst(Op0, Op1, DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
// Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 46f7b8a..794263a 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -506,12 +506,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
/// that is dead.
static bool DeadPHICycle(PHINode *PN,
- SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+ SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) {
if (PN->use_empty()) return true;
if (!PN->hasOneUse()) return false;
// Remember this node, and if we find the cycle, return.
- if (!PotentiallyDeadPHIs.insert(PN))
+ if (!PotentiallyDeadPHIs.insert(PN).second)
return true;
// Don't scan crazily complex things.
@@ -528,9 +528,9 @@ static bool DeadPHICycle(PHINode *PN,
/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
- SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+ SmallPtrSetImpl<PHINode*> &ValueEqualPHIs) {
// See if we already saw this PHI node.
- if (!ValueEqualPHIs.insert(PN))
+ if (!ValueEqualPHIs.insert(PN).second)
return true;
// Don't scan crazily complex things.
@@ -654,7 +654,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// If the user is a PHI, inspect its uses recursively.
if (PHINode *UserPN = dyn_cast<PHINode>(UserI)) {
- if (PHIsInspected.insert(UserPN))
+ if (PHIsInspected.insert(UserPN).second)
PHIsToSlice.push_back(UserPN);
continue;
}
@@ -788,7 +788,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, DL, TLI))
+ if (Value *V = SimplifyInstruction(&PN, DL, TLI, DT, AT))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 06c9e29..079ae34 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -313,7 +313,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ AssumptionTracker *AT) {
// Trivial replacement.
if (V == Op)
return RepOp;
@@ -334,10 +336,10 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (C->getOperand(0) == Op)
return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
- TLI);
+ TLI, DT, AT);
if (C->getOperand(1) == Op)
return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
- TLI);
+ TLI, DT, AT);
}
// TODO: We could hand off more cases to instsimplify here.
@@ -390,11 +392,11 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
///
/// This also tries to turn
/// --- Single bit tests:
-/// if ((x & C) == 0) x |= C to x |= C
-/// if ((x & C) != 0) x ^= C to x &= ~C
-/// if ((x & C) == 0) x ^= C to x |= C
-/// if ((x & C) != 0) x &= ~C to x &= ~C
-/// if ((x & C) == 0) x &= ~C to nothing
+/// if ((x & C) == 0) x |= C to x |= C
+/// if ((x & C) != 0) x ^= C to x &= ~C
+/// if ((x & C) == 0) x ^= C to x |= C
+/// if ((x & C) != 0) x &= ~C to x &= ~C
+/// if ((x & C) == 0) x &= ~C to nothing
static Value *foldSelectICmpAndOr(SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
@@ -605,18 +607,26 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI,
+ DT, AT) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI,
+ DT, AT) == TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI,
+ DT, AT) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI,
+ DT, AT) == FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI,
+ DT, AT) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI,
+ DT, AT) == FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI,
+ DT, AT) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI,
+ DT, AT) == TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -825,7 +835,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *TrueVal = SI.getTrueValue();
Value *FalseVal = SI.getFalseValue();
- if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, DL))
+ if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, TLI,
+ DT, AT))
return ReplaceInstUsesWith(SI, V);
if (SI.getType()->isIntegerTy(1)) {
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 2495747..afa907a 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -68,7 +68,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
/// this succeeds, the GetShiftedValue function will be called to produce the
/// value.
static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
- InstCombiner &IC) {
+ InstCombiner &IC, Instruction *CxtI) {
// We can always evaluate constants shifted.
if (isa<Constant>(V))
return true;
@@ -111,8 +111,8 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Or:
case Instruction::Xor:
// Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
- return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
- CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+ return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC, I) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC, I);
case Instruction::Shl: {
// We can often fold the shift into shifts-by-a-constant.
@@ -131,8 +131,9 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// profitable unless we know the and'd out bits are already zero.
if (CI->getZExtValue() > NumBits) {
unsigned LowBits = TypeWidth - CI->getZExtValue();
- if (MaskedValueIsZero(I->getOperand(0),
- APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+ if (IC.MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits,
+ 0, CxtI))
return true;
}
@@ -155,8 +156,9 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// profitable unless we know the and'd out bits are already zero.
if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) {
unsigned LowBits = CI->getZExtValue() - NumBits;
- if (MaskedValueIsZero(I->getOperand(0),
- APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+ if (IC.MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits,
+ 0, CxtI))
return true;
}
@@ -164,8 +166,9 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
- return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
- CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+ return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift,
+ IC, SI) &&
+ CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC, SI);
}
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -173,7 +176,8 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+ if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,
+ IC, PN))
return false;
return true;
}
@@ -329,7 +333,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// See if we can propagate this shift into the input, this covers the trivial
// cast of lshr(shl(x,c1),c2) as well as other more complex cases.
if (I.getOpcode() != Instruction::AShr &&
- CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) {
+ CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this, &I)) {
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
@@ -488,7 +492,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
}
- // If the operand is an bitwise operator with a constant RHS, and the
+ // If the operand is a bitwise operator with a constant RHS, and the
// shift is the only use, we can pull it out of the shift.
if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
bool isValid = true; // Valid only for And, Or, Xor
@@ -691,7 +695,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
- DL))
+ DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (Instruction *V = commonShiftTransforms(I))
@@ -703,14 +707,15 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
// If the shifted-out value is known-zero, then this is a NUW shift.
if (!I.hasNoUnsignedWrap() &&
MaskedValueIsZero(I.getOperand(0),
- APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+ APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt),
+ 0, &I)) {
I.setHasNoUnsignedWrap();
return &I;
}
// If the shifted out value is all signbits, this is a NSW shift.
if (!I.hasNoSignedWrap() &&
- ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+ ComputeNumSignBits(I.getOperand(0), 0, &I) > ShAmt) {
I.setHasNoSignedWrap();
return &I;
}
@@ -731,7 +736,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
- I.isExact(), DL))
+ I.isExact(), DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
@@ -760,7 +765,8 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
- MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ MaskedValueIsZero(Op0, APInt::getLowBitsSet(Op1C->getBitWidth(), ShAmt),
+ 0, &I)){
I.setIsExact();
return &I;
}
@@ -774,7 +780,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
- I.isExact(), DL))
+ I.isExact(), DL, TLI, DT, AT))
return ReplaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
@@ -804,7 +810,8 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
- MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt),
+ 0, &I)){
I.setIsExact();
return &I;
}
@@ -812,13 +819,9 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
// See if we can turn a signed shr into an unsigned shr.
if (MaskedValueIsZero(Op0,
- APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+ APInt::getSignBit(I.getType()->getScalarSizeInBits()),
+ 0, &I))
return BinaryOperator::CreateLShr(Op0, Op1);
- // Arithmetic shifting an all-sign-bit value is a no-op.
- unsigned NumSignBits = ComputeNumSignBits(Op0);
- if (NumSignBits == Op0->getType()->getScalarSizeInBits())
- return ReplaceInstUsesWith(I, Op0);
-
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 1b42d3d..ad6983a 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -43,6 +43,20 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
// This instruction is producing bits that are not demanded. Shrink the RHS.
Demanded &= OpC->getValue();
I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+
+ // If either 'nsw' or 'nuw' is set and the constant is negative,
+ // removing *any* bits from the constant could make overflow occur.
+ // Remove 'nsw' and 'nuw' from the instruction in this case.
+ if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
+ assert(OBO->getOpcode() == Instruction::Add);
+ if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
+ if (OpC->getValue().isNegative()) {
+ cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
+ cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
+ }
+ }
+ }
+
return true;
}
@@ -57,7 +71,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
- KnownZero, KnownOne, 0);
+ KnownZero, KnownOne, 0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
ReplaceInstUsesWith(Inst, V);
@@ -71,7 +85,8 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
- KnownZero, KnownOne, Depth);
+ KnownZero, KnownOne, Depth,
+ dyn_cast<Instruction>(U.getUser()));
if (!NewVal) return false;
U = NewVal;
return true;
@@ -101,7 +116,8 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
/// in the context where the specified bits are demanded, but not for all users.
Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
- unsigned Depth) {
+ unsigned Depth,
+ Instruction *CxtI) {
assert(V != nullptr && "Null pointer of Value???");
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
@@ -144,7 +160,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- computeKnownBits(V, KnownZero, KnownOne, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
return nullptr; // Only analyze instructions.
}
@@ -158,8 +174,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ CxtI);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
@@ -180,8 +198,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ CxtI);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
@@ -205,8 +225,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ CxtI);
// If all of the demanded bits are known zero on one side, return the
// other.
@@ -217,7 +239,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
// Compute the KnownZero/KnownOne bits to simplify things downstream.
- computeKnownBits(I, KnownZero, KnownOne, Depth);
+ computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
return nullptr;
}
@@ -230,7 +252,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
switch (I->getOpcode()) {
default:
- computeKnownBits(I, KnownZero, KnownOne, Depth);
+ computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
@@ -242,6 +264,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
+ (RHSKnownOne & LHSKnownOne))) == DemandedMask)
+ return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
+
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
@@ -274,6 +302,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
+ (RHSKnownOne | LHSKnownOne))) == DemandedMask)
+ return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
@@ -310,6 +344,18 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
+ (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
+ (RHSKnownOne & LHSKnownZero);
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, IKnownOne);
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if ((DemandedMask & RHSKnownZero) == DemandedMask)
@@ -581,7 +627,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Otherwise just hand the sub off to computeKnownBits to fill in
// the known zeros and ones.
- computeKnownBits(V, KnownZero, KnownOne, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
@@ -752,7 +798,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ CxtI);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(KnownZero.getBitWidth() - 1);
@@ -814,7 +861,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return nullptr;
}
}
- computeKnownBits(V, KnownZero, KnownOne, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
break;
}
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 1ab7db3..8d857d0 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef INSTCOMBINE_WORKLIST_H
-#define INSTCOMBINE_WORKLIST_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
+#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 08e2446..e4a4fef 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -39,12 +39,16 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -68,11 +72,6 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc , "Number of reassociations");
-static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
- cl::init(false),
- cl::desc("Enable unsafe double to float "
- "shrinking for math lib calls"));
-
// Initialization Routines
void llvm::initializeInstCombine(PassRegistry &Registry) {
initializeInstCombinerPass(Registry);
@@ -85,12 +84,14 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
char InstCombiner::ID = 0;
INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
"Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(InstCombiner, "instcombine",
"Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -390,6 +391,25 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
Instruction::BinaryOps ROp) {
if (Instruction::isCommutative(ROp))
return LeftDistributesOverRight(ROp, LOp);
+
+ switch (LOp) {
+ default:
+ return false;
+ // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts.
+ // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
+ // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts.
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return true;
+ }
+ }
// TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
// but this requires knowing that the addition does not overflow and other
// such subtleties.
@@ -411,26 +431,37 @@ static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
}
/// This function factors binary ops which can be combined using distributive
-/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4).
+/// laws. This function tries to transform 'Op' based TopLevelOpcode to enable
+/// factorization e.g for ADD(SHL(X , 2), MUL(X, 5)), When this function called
+/// with TopLevelOpcode == Instruction::Add and Op = SHL(X, 2), transforms
+/// SHL(X, 2) to MUL(X, 4) i.e. returns Instruction::Mul with LHS set to 'X' and
+/// RHS to 4.
static Instruction::BinaryOps
-getBinOpsForFactorization(BinaryOperator *Op, Value *&LHS, Value *&RHS) {
+getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
+ BinaryOperator *Op, Value *&LHS, Value *&RHS) {
if (!Op)
return Instruction::BinaryOpsEnd;
- if (Op->getOpcode() == Instruction::Shl) {
- if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) {
- // The multiplier is really 1 << CST.
- RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST);
- LHS = Op->getOperand(0);
- return Instruction::Mul;
+ LHS = Op->getOperand(0);
+ RHS = Op->getOperand(1);
+
+ switch (TopLevelOpcode) {
+ default:
+ return Op->getOpcode();
+
+ case Instruction::Add:
+ case Instruction::Sub:
+ if (Op->getOpcode() == Instruction::Shl) {
+ if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) {
+ // The multiplier is really 1 << CST.
+ RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST);
+ return Instruction::Mul;
+ }
}
+ return Op->getOpcode();
}
// TODO: We can add other conversions e.g. shr => div etc.
-
- LHS = Op->getOperand(0);
- RHS = Op->getOperand(1);
- return Op->getOpcode();
}
/// This tries to simplify binary operations by factorizing out common terms
@@ -529,8 +560,9 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
// Factorization.
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B);
- Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D);
+ auto TopLevelOpcode = I.getOpcode();
+ auto LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
+ auto RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
// The instruction has the form "(A op' B) op (C op' D)". Try to factorize
// a common term.
@@ -552,7 +584,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
return V;
// Expansion.
- Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
// The instruction has the form "(A op' B) op C". See if expanding it out
// to "(A op C) op' (B op C)" results in simplifications.
@@ -765,13 +796,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// If the incoming non-constant value is in I's block, we will remove one
// instruction, but insert another equivalent one, leading to infinite
// instcombine.
- if (NonConstBB == I.getParent())
+ if (isPotentiallyReachable(I.getParent(), NonConstBB, DT,
+ getAnalysisIfAvailable<LoopInfo>()))
return nullptr;
}
// If there is exactly one non-constant value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
- // inserting the computation one some other paths (e.g. inside a loop). Only
+ // inserting the computation on some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
if (NonConstBB != nullptr) {
BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
@@ -1284,7 +1316,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
- if (Value *V = SimplifyGEPInst(Ops, DL))
+ if (Value *V = SimplifyGEPInst(Ops, DL, TLI, DT, AT))
return ReplaceInstUsesWith(GEP, V);
Value *PtrOp = GEP.getOperand(0);
@@ -1478,19 +1510,50 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
}
- // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
- // The GEP pattern is emitted by the SCEV expander for certain kinds of
- // pointer arithmetic.
- if (DL && GEP.getNumIndices() == 1 &&
- match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
+ if (DL && GEP.getNumIndices() == 1) {
unsigned AS = GEP.getPointerAddressSpace();
- if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
- GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+ if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
DL->getPointerSizeInBits(AS)) {
- Operator *Index = cast<Operator>(GEP.getOperand(1));
- Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
- Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
- return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+ Type *PtrTy = GEP.getPointerOperandType();
+ Type *Ty = PtrTy->getPointerElementType();
+ uint64_t TyAllocSize = DL->getTypeAllocSize(Ty);
+
+ bool Matched = false;
+ uint64_t C;
+ Value *V = nullptr;
+ if (TyAllocSize == 1) {
+ V = GEP.getOperand(1);
+ Matched = true;
+ } else if (match(GEP.getOperand(1),
+ m_AShr(m_Value(V), m_ConstantInt(C)))) {
+ if (TyAllocSize == 1ULL << C)
+ Matched = true;
+ } else if (match(GEP.getOperand(1),
+ m_SDiv(m_Value(V), m_ConstantInt(C)))) {
+ if (TyAllocSize == C)
+ Matched = true;
+ }
+
+ if (Matched) {
+ // Canonicalize (gep i8* X, -(ptrtoint Y))
+ // to (inttoptr (sub (ptrtoint X), (ptrtoint Y)))
+ // The GEP pattern is emitted by the SCEV expander for certain kinds of
+ // pointer arithmetic.
+ if (match(V, m_Neg(m_PtrToInt(m_Value())))) {
+ Operator *Index = cast<Operator>(V);
+ Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+ Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+ return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+ }
+ // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X))
+ // to (bitcast Y)
+ Value *Y;
+ if (match(V, m_Sub(m_PtrToInt(m_Value(Y)),
+ m_PtrToInt(m_Specific(GEP.getOperand(0)))))) {
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y,
+ GEP.getType());
+ }
+ }
}
}
@@ -1582,9 +1645,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
- if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
- return new BitCastInst(NewGEP, GEP.getType());
- return new AddrSpaceCastInst(NewGEP, GEP.getType());
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
+ GEP.getType());
}
// Transform things like:
@@ -1616,9 +1678,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
- if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
- return new BitCastInst(NewGEP, GEP.getType());
- return new AddrSpaceCastInst(NewGEP, GEP.getType());
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
+ GEP.getType());
}
}
}
@@ -1658,9 +1719,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
- if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
- return new BitCastInst(NewGEP, GEP.getType());
- return new AddrSpaceCastInst(NewGEP, GEP.getType());
+ return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
+ GEP.getType());
}
}
}
@@ -1670,6 +1730,18 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!DL)
return nullptr;
+ // addrspacecast between types is canonicalized as a bitcast, then an
+ // addrspacecast. To take advantage of the below bitcast + struct GEP, look
+ // through the addrspacecast.
+ if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
+ // X = bitcast A addrspace(1)* to B addrspace(1)*
+ // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
+ // Z = gep Y, <...constant indices...>
+ // Into an addrspacecasted GEP of the struct.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
+ PtrOp = BC;
+ }
+
/// See if we can simplify:
/// X = bitcast A* to B*
/// Y = gep X, <...constant indices...>
@@ -1678,11 +1750,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
Value *Operand = BCI->getOperand(0);
PointerType *OpType = cast<PointerType>(Operand->getType());
- unsigned OffsetBits = DL->getPointerTypeSizeInBits(OpType);
+ unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType());
APInt Offset(OffsetBits, 0);
if (!isa<BitCastInst>(Operand) &&
- GEP.accumulateConstantOffset(*DL, Offset) &&
- StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
+ GEP.accumulateConstantOffset(*DL, Offset)) {
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
@@ -1700,6 +1771,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return &GEP;
}
}
+
+ if (Operand->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+ return new AddrSpaceCastInst(Operand, GEP.getType());
return new BitCastInst(Operand, GEP.getType());
}
@@ -1715,6 +1789,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
NGEP->takeName(&GEP);
+
+ if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+ return new AddrSpaceCastInst(NGEP, GEP.getType());
return new BitCastInst(NGEP, GEP.getType());
}
}
@@ -1925,7 +2002,25 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
return nullptr;
}
+Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
+ if (RI.getNumOperands() == 0) // ret void
+ return nullptr;
+
+ Value *ResultOp = RI.getOperand(0);
+ Type *VTy = ResultOp->getType();
+ if (!VTy->isIntegerTy())
+ return nullptr;
+
+ // There might be assume intrinsics dominating this return that completely
+ // determine the value. If so, constant fold it.
+ unsigned BitWidth = VTy->getPrimitiveSizeInBits();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI);
+ if ((KnownZero|KnownOne).isAllOnesValue())
+ RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne));
+ return nullptr;
+}
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
// Change br (not X), label True, label False to: br X, label False, True
@@ -1977,6 +2072,37 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
+ unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ computeKnownBits(Cond, KnownZero, KnownOne);
+ unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
+ unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
+
+ // Compute the number of leading bits we can ignore.
+ for (auto &C : SI.cases()) {
+ LeadingKnownZeros = std::min(
+ LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
+ LeadingKnownOnes = std::min(
+ LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
+ }
+
+ unsigned NewWidth = BitWidth - std::max(LeadingKnownZeros, LeadingKnownOnes);
+
+ // Truncate the condition operand if the new type is equal to or larger than
+ // the largest legal integer type. We need to be conservative here since
+ // x86 generates redundant zero-extenstion instructions if the operand is
+ // truncated to i8 or i16.
+ if (BitWidth > NewWidth && NewWidth >= DL->getLargestLegalIntTypeSize()) {
+ IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
+ Builder->SetInsertPoint(&SI);
+ Value *NewCond = Builder->CreateTrunc(SI.getCondition(), Ty, "trunc");
+ SI.setCondition(NewCond);
+
+ for (auto &C : SI.cases())
+ static_cast<SwitchInst::CaseIt *>(&C)->setValue(ConstantInt::get(
+ SI.getContext(), C.getCaseValue()->getValue().trunc(NewWidth)));
+ }
+
if (Instruction *I = dyn_cast<Instruction>(Cond)) {
if (I->getOpcode() == Instruction::Add)
if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -2215,7 +2341,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
// If we already saw this clause, there is no point in having a second
// copy of it.
- if (AlreadyCaught.insert(TypeInfo)) {
+ if (AlreadyCaught.insert(TypeInfo).second) {
// This catch clause was not already seen.
NewClauses.push_back(CatchClause);
} else {
@@ -2297,7 +2423,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
continue;
// There is no point in having multiple copies of the same typeinfo in
// a filter, so only add it if we didn't already.
- if (SeenInFilter.insert(TypeInfo))
+ if (SeenInFilter.insert(TypeInfo).second)
NewFilterElts.push_back(cast<Constant>(Elt));
}
// A filter containing a catch-all cannot match anything by definition.
@@ -2534,7 +2660,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
/// whose condition is a known constant, we only visit the reachable successors.
///
static bool AddReachableCodeToWorklist(BasicBlock *BB,
- SmallPtrSet<BasicBlock*, 64> &Visited,
+ SmallPtrSetImpl<BasicBlock*> &Visited,
InstCombiner &IC,
const DataLayout *DL,
const TargetLibraryInfo *TLI) {
@@ -2549,7 +2675,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
BB = Worklist.pop_back_val();
// We have now visited this block! If we've already been here, ignore it.
- if (!Visited.insert(BB)) continue;
+ if (!Visited.insert(BB).second)
+ continue;
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *Inst = BBI++;
@@ -2730,9 +2857,18 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// If the user is one of our immediate successors, and if that successor
// only has us as a predecessors (we'd have to split the critical edge
// otherwise), we can keep going.
- if (UserIsSuccessor && UserParent->getSinglePredecessor())
+ if (UserIsSuccessor && UserParent->getSinglePredecessor()) {
// Okay, the CFG is simple enough, try to sink this instruction.
- MadeIRChange |= TryToSinkInstruction(I, UserParent);
+ if (TryToSinkInstruction(I, UserParent)) {
+ MadeIRChange = true;
+ // We'll add uses of the sunk instruction below, but since sinking
+ // can expose opportunities for it's *operands* add them to the
+ // worklist
+ for (Use &U : I->operands())
+ if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
+ Worklist.Add(OpI);
+ }
+ }
}
}
@@ -2801,13 +2937,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
}
namespace {
-class InstCombinerLibCallSimplifier : public LibCallSimplifier {
+class InstCombinerLibCallSimplifier final : public LibCallSimplifier {
InstCombiner *IC;
public:
InstCombinerLibCallSimplifier(const DataLayout *DL,
const TargetLibraryInfo *TLI,
InstCombiner *IC)
- : LibCallSimplifier(DL, TLI, UnsafeFPShrink) {
+ : LibCallSimplifier(DL, TLI) {
this->IC = IC;
}
@@ -2823,9 +2959,15 @@ bool InstCombiner::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
+ AT = &getAnalysis<AssumptionTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
+
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
// Minimizing size?
MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::MinSize);
@@ -2834,7 +2976,7 @@ bool InstCombiner::runOnFunction(Function &F) {
/// instructions into the worklist when they are created.
IRBuilder<true, TargetFolder, InstCombineIRInserter>
TheBuilder(F.getContext(), TargetFolder(DL),
- InstCombineIRInserter(Worklist));
+ InstCombineIRInserter(Worklist, AT));
Builder = &TheBuilder;
InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 5e5ddc1..38f587f 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -59,7 +60,8 @@ static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
-static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa8000;
+static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
+static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 36;
static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
@@ -70,7 +72,7 @@ static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
static const char *const kAsanModuleCtorName = "asan.module_ctor";
static const char *const kAsanModuleDtorName = "asan.module_dtor";
-static const int kAsanCtorAndDtorPriority = 1;
+static const uint64_t kAsanCtorAndDtorPriority = 1;
static const char *const kAsanReportErrorTemplate = "__asan_report_";
static const char *const kAsanReportLoadN = "__asan_report_load_n";
static const char *const kAsanReportStoreN = "__asan_report_store_n";
@@ -80,8 +82,6 @@ static const char *const kAsanUnregisterGlobalsName =
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *const kAsanInitName = "__asan_init_v4";
-static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init";
-static const char *const kAsanCovName = "__sanitizer_cov";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
@@ -89,6 +89,7 @@ static const int kMaxAsanStackMallocSizeClass = 10;
static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
static const char *const kAsanGenPrefix = "__asan_gen_";
+static const char *const kSanCovGenPrefix = "__sancov_gen_";
static const char *const kAsanPoisonStackMemoryName =
"__asan_poison_stack_memory";
static const char *const kAsanUnpoisonStackMemoryName =
@@ -133,13 +134,6 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
-static cl::opt<int> ClCoverage("asan-coverage",
- cl::desc("ASan coverage. 0: none, 1: entry block, 2: all blocks"),
- cl::Hidden, cl::init(false));
-static cl::opt<int> ClCoverageBlockThreshold("asan-coverage-block-threshold",
- cl::desc("Add coverage instrumentation only to the entry block if there "
- "are more than this number of blocks."),
- cl::Hidden, cl::init(1500));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true));
static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
@@ -212,10 +206,40 @@ STATISTIC(NumOptimizedAccessesToGlobalVar,
"Number of optimized accesses to global vars");
namespace {
+/// Frontend-provided metadata for source location.
+struct LocationMetadata {
+ StringRef Filename;
+ int LineNo;
+ int ColumnNo;
+
+ LocationMetadata() : Filename(), LineNo(0), ColumnNo(0) {}
+
+ bool empty() const { return Filename.empty(); }
+
+ void parse(MDNode *MDN) {
+ assert(MDN->getNumOperands() == 3);
+ MDString *MDFilename = cast<MDString>(MDN->getOperand(0));
+ Filename = MDFilename->getString();
+ LineNo = cast<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
+ ColumnNo = cast<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
+ }
+};
+
/// Frontend-provided metadata for global variables.
class GlobalsMetadata {
public:
+ struct Entry {
+ Entry()
+ : SourceLoc(), Name(), IsDynInit(false),
+ IsBlacklisted(false) {}
+ LocationMetadata SourceLoc;
+ StringRef Name;
+ bool IsDynInit;
+ bool IsBlacklisted;
+ };
+
GlobalsMetadata() : inited_(false) {}
+
void init(Module& M) {
assert(!inited_);
inited_ = true;
@@ -223,76 +247,38 @@ class GlobalsMetadata {
if (!Globals)
return;
for (auto MDN : Globals->operands()) {
- // Format of the metadata node for the global:
- // {
- // global,
- // source_location,
- // i1 is_dynamically_initialized,
- // i1 is_blacklisted
- // }
- assert(MDN->getNumOperands() == 4);
+ // Metadata node contains the global and the fields of "Entry".
+ assert(MDN->getNumOperands() == 5);
Value *V = MDN->getOperand(0);
// The optimizer may optimize away a global entirely.
if (!V)
continue;
GlobalVariable *GV = cast<GlobalVariable>(V);
- if (Value *Loc = MDN->getOperand(1)) {
- GlobalVariable *GVLoc = cast<GlobalVariable>(Loc);
- // We may already know the source location for GV, if it was merged
- // with another global.
- if (SourceLocation.insert(std::make_pair(GV, GVLoc)).second)
- addSourceLocationGlobal(GVLoc);
+ // We can already have an entry for GV if it was merged with another
+ // global.
+ Entry &E = Entries[GV];
+ if (Value *Loc = MDN->getOperand(1))
+ E.SourceLoc.parse(cast<MDNode>(Loc));
+ if (Value *Name = MDN->getOperand(2)) {
+ MDString *MDName = cast<MDString>(Name);
+ E.Name = MDName->getString();
}
- ConstantInt *IsDynInit = cast<ConstantInt>(MDN->getOperand(2));
- if (IsDynInit->isOne())
- DynInitGlobals.insert(GV);
- ConstantInt *IsBlacklisted = cast<ConstantInt>(MDN->getOperand(3));
- if (IsBlacklisted->isOne())
- BlacklistedGlobals.insert(GV);
+ ConstantInt *IsDynInit = cast<ConstantInt>(MDN->getOperand(3));
+ E.IsDynInit |= IsDynInit->isOne();
+ ConstantInt *IsBlacklisted = cast<ConstantInt>(MDN->getOperand(4));
+ E.IsBlacklisted |= IsBlacklisted->isOne();
}
}
- GlobalVariable *getSourceLocation(GlobalVariable *G) const {
- auto Pos = SourceLocation.find(G);
- return (Pos != SourceLocation.end()) ? Pos->second : nullptr;
- }
-
- /// Check if the global is dynamically initialized.
- bool isDynInit(GlobalVariable *G) const {
- return DynInitGlobals.count(G);
- }
-
- /// Check if the global was blacklisted.
- bool isBlacklisted(GlobalVariable *G) const {
- return BlacklistedGlobals.count(G);
- }
-
- /// Check if the global was generated to describe source location of another
- /// global (we don't want to instrument them).
- bool isSourceLocationGlobal(GlobalVariable *G) const {
- return LocationGlobals.count(G);
+ /// Returns metadata entry for a given global.
+ Entry get(GlobalVariable *G) const {
+ auto Pos = Entries.find(G);
+ return (Pos != Entries.end()) ? Pos->second : Entry();
}
private:
bool inited_;
- DenseMap<GlobalVariable*, GlobalVariable*> SourceLocation;
- DenseSet<GlobalVariable*> DynInitGlobals;
- DenseSet<GlobalVariable*> BlacklistedGlobals;
- DenseSet<GlobalVariable*> LocationGlobals;
-
- void addSourceLocationGlobal(GlobalVariable *SourceLocGV) {
- // Source location global is a struct with layout:
- // {
- // filename,
- // i32 line_number,
- // i32 column_number,
- // }
- LocationGlobals.insert(SourceLocGV);
- ConstantStruct *Contents =
- cast<ConstantStruct>(SourceLocGV->getInitializer());
- GlobalVariable *FilenameGV = cast<GlobalVariable>(Contents->getOperand(0));
- LocationGlobals.insert(FilenameGV);
- }
+ DenseMap<GlobalVariable*, Entry> Entries;
};
/// This struct defines the shadow mapping using the rule:
@@ -306,7 +292,7 @@ struct ShadowMapping {
static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
llvm::Triple TargetTriple(M.getTargetTriple());
bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
- bool IsIOS = TargetTriple.getOS() == llvm::Triple::IOS;
+ bool IsIOS = TargetTriple.isiOS();
bool IsFreeBSD = TargetTriple.getOS() == llvm::Triple::FreeBSD;
bool IsLinux = TargetTriple.getOS() == llvm::Triple::Linux;
bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 ||
@@ -314,6 +300,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
bool IsMIPS32 = TargetTriple.getArch() == llvm::Triple::mips ||
TargetTriple.getArch() == llvm::Triple::mipsel;
+ bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
+ TargetTriple.getArch() == llvm::Triple::mips64el;
ShadowMapping Mapping;
@@ -335,6 +323,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
Mapping.Offset = kFreeBSD_ShadowOffset64;
else if (IsLinux && IsX86_64)
Mapping.Offset = kSmallX86_64ShadowOffset;
+ else if (IsMIPS64)
+ Mapping.Offset = kMIPS64_ShadowOffset64;
else
Mapping.Offset = kDefaultShadowOffset64;
}
@@ -386,8 +376,6 @@ struct AddressSanitizer : public FunctionPass {
bool LooksLikeCodeInBug11395(Instruction *I);
bool GlobalIsLinkerInitialized(GlobalVariable *G);
- bool InjectCoverage(Function &F, const ArrayRef<BasicBlock*> AllBlocks);
- void InjectCoverageAtBlock(Function &F, BasicBlock &BB);
LLVMContext *C;
const DataLayout *DL;
@@ -397,7 +385,6 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanCtorFunction;
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
- Function *AsanCovFunction;
Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
@@ -441,7 +428,6 @@ class AddressSanitizerModule : public ModulePass {
Function *AsanUnpoisonGlobals;
Function *AsanRegisterGlobals;
Function *AsanUnregisterGlobals;
- Function *AsanCovModuleInit;
};
// Stack poisoning does not play well with exception handling.
@@ -570,7 +556,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
}
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
- void poisonRedZones(const ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
+ void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
Value *ShadowBase, bool DoPoison);
void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
@@ -617,8 +603,25 @@ static GlobalVariable *createPrivateGlobalForString(
return GV;
}
+/// \brief Create a global describing a source location.
+static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
+ LocationMetadata MD) {
+ Constant *LocData[] = {
+ createPrivateGlobalForString(M, MD.Filename, true),
+ ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo),
+ ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo),
+ };
+ auto LocStruct = ConstantStruct::getAnon(LocData);
+ auto GV = new GlobalVariable(M, LocStruct->getType(), true,
+ GlobalValue::PrivateLinkage, LocStruct,
+ kAsanGenPrefix);
+ GV->setUnnamedAddr(true);
+ return GV;
+}
+
static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
- return G->getName().find(kAsanGenPrefix) == 0;
+ return G->getName().find(kAsanGenPrefix) == 0 ||
+ G->getName().find(kSanCovGenPrefix) == 0;
}
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
@@ -653,9 +656,12 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
}
// If I is an interesting memory access, return the PointerOperand
-// and set IsWrite/Alignment. Otherwise return NULL.
+// and set IsWrite/Alignment. Otherwise return nullptr.
static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
unsigned *Alignment) {
+ // Skip memory accesses inserted by another instrumentation.
+ if (I->getMetadata("nosanitize"))
+ return nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!ClInstrumentReads) return nullptr;
*IsWrite = false;
@@ -710,7 +716,7 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global does not have initializer
// at all, we assume it has dynamic initializer (in other TU).
- return G->hasInitializer() && !GlobalsMD.isDynInit(G);
+ return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
}
void
@@ -859,8 +865,11 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
TerminatorInst *CrashTerm = nullptr;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+ // We use branch weights for the slow path check, to indicate that the slow
+ // path is rarely taken. This seems to be the case for SPEC benchmarks.
TerminatorInst *CheckTerm =
- SplitBlockAndInsertIfThen(Cmp, InsertBefore, false);
+ SplitBlockAndInsertIfThen(Cmp, InsertBefore, false,
+ MDBuilder(*C).createBranchWeights(1, 100000));
assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
@@ -905,10 +914,12 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
ConstantStruct *CS = cast<ConstantStruct>(OP);
// Must have a function or null ptr.
- // (CS->getOperand(0) is the init priority.)
if (Function* F = dyn_cast<Function>(CS->getOperand(1))) {
- if (F->getName() != kAsanModuleCtorName)
- poisonOneInitializer(*F, ModuleName);
+ if (F->getName() == kAsanModuleCtorName) continue;
+ ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ // Don't instrument CTORs that will run before asan.module_ctor.
+ if (Priority->getLimitedValue() <= kAsanCtorAndDtorPriority) continue;
+ poisonOneInitializer(*F, ModuleName);
}
}
}
@@ -917,8 +928,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
- if (GlobalsMD.isBlacklisted(G)) return false;
- if (GlobalsMD.isSourceLocationGlobal(G)) return false;
+ if (GlobalsMD.get(G).IsBlacklisted) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
if (GlobalWasGeneratedByAsan(G)) return false; // Our own global.
@@ -939,16 +949,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// For now, just ignore this Global if the alignment is large.
if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
- // Ignore all the globals with the names starting with "\01L_OBJC_".
- // Many of those are put into the .cstring section. The linker compresses
- // that section by removing the spare \0s after the string terminator, so
- // our redzones get broken.
- if ((G->getName().find("\01L_OBJC_") == 0) ||
- (G->getName().find("\01l_OBJC_") == 0)) {
- DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G << "\n");
- return false;
- }
-
if (G->hasSection()) {
StringRef Section(G->getSection());
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
@@ -977,6 +977,11 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n");
return false;
}
+ if (Section.startswith("__TEXT,__objc_methname,cstring_literals")) {
+ DEBUG(dbgs() << "Ignoring objc_methname cstring global: " << *G << "\n");
+ return false;
+ }
+
// Callbacks put into the CRT initializer/terminator sections
// should not be instrumented.
@@ -998,24 +1003,20 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Declare our poisoning and unpoisoning functions.
AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL));
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+ kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr));
AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
// Declare functions that register/unregister globals.
AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
kAsanRegisterGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, NULL));
+ IntptrTy, IntptrTy, nullptr));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
kAsanUnregisterGlobalsName,
- IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
- AsanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanCovModuleInitName,
- IRB.getVoidTy(), IntptrTy, NULL));
- AsanCovModuleInit->setLinkage(Function::ExternalLinkage);
}
// This function replaces all global variables with new variables that have
@@ -1045,7 +1046,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy =
StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
- IntptrTy, IntptrTy, NULL);
+ IntptrTy, IntptrTy, nullptr);
SmallVector<Constant *, 16> Initializers(n);
bool HasDynamicallyInitializedGlobals = false;
@@ -1058,6 +1059,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
+
+ auto MD = GlobalsMD.get(G);
+ // Create string holding the global name (use global name from metadata
+ // if it's available, otherwise just write the name of global variable).
+ GlobalVariable *Name = createPrivateGlobalForString(
+ M, MD.Name.empty() ? G->getName() : MD.Name,
+ /*AllowMerging*/ true);
+
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
uint64_t SizeInBytes = DL->getTypeAllocSize(Ty);
@@ -1074,13 +1083,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
- StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
+ StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr);
Constant *NewInitializer = ConstantStruct::get(
NewTy, G->getInitializer(),
- Constant::getNullValue(RightRedZoneTy), NULL);
-
- GlobalVariable *Name =
- createPrivateGlobalForString(M, G->getName(), /*AllowMerging*/true);
+ Constant::getNullValue(RightRedZoneTy), nullptr);
// Create a new global variable with enough space for a redzone.
GlobalValue::LinkageTypes Linkage = G->getLinkage();
@@ -1101,8 +1107,13 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
NewGlobal->takeName(G);
G->eraseFromParent();
- bool GlobalHasDynamicInitializer = GlobalsMD.isDynInit(G);
- GlobalVariable *SourceLoc = GlobalsMD.getSourceLocation(G);
+ Constant *SourceLoc;
+ if (!MD.SourceLoc.empty()) {
+ auto SourceLocGlobal = createPrivateGlobalForSourceLoc(M, MD.SourceLoc);
+ SourceLoc = ConstantExpr::getPointerCast(SourceLocGlobal, IntptrTy);
+ } else {
+ SourceLoc = ConstantInt::get(IntptrTy, 0);
+ }
Initializers[i] = ConstantStruct::get(
GlobalStructTy, ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
@@ -1110,12 +1121,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
ConstantExpr::getPointerCast(ModuleName, IntptrTy),
- ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
- SourceLoc ? ConstantExpr::getPointerCast(SourceLoc, IntptrTy)
- : ConstantInt::get(IntptrTy, 0),
- NULL);
+ ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, nullptr);
- if (ClInitializers && GlobalHasDynamicInitializer)
+ if (ClInitializers && MD.IsDynInit)
HasDynamicallyInitializedGlobals = true;
DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
@@ -1166,13 +1174,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
assert(CtorFunc);
IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
- if (ClCoverage > 0) {
- Function *CovFunc = M.getFunction(kAsanCovName);
- int nCov = CovFunc ? CovFunc->getNumUses() : 0;
- IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov));
- Changed = true;
- }
-
if (ClGlobals)
Changed |= InstrumentGlobals(IRB, M);
@@ -1191,43 +1192,42 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
checkInterfaceFunction(
M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix,
- IRB.getVoidTy(), IntptrTy, NULL));
+ IRB.getVoidTy(), IntptrTy, nullptr));
AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
checkInterfaceFunction(
M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix,
- IRB.getVoidTy(), IntptrTy, NULL));
+ IRB.getVoidTy(), IntptrTy, nullptr));
}
}
AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction(
M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN",
- IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction(
M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN",
- IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL));
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL));
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
AsanMemset = checkInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, NULL));
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr));
AsanHandleNoReturnFunc = checkInterfaceFunction(
- M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
- AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanCovName, IRB.getVoidTy(), NULL));
+ M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr));
+
AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -1255,7 +1255,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
// call __asan_init in the module ctor.
IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
AsanInitFunction = checkInterfaceFunction(
- M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
+ M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr));
AsanInitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(AsanInitFunction);
@@ -1281,74 +1281,6 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
return false;
}
-void AddressSanitizer::InjectCoverageAtBlock(Function &F, BasicBlock &BB) {
- BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
- // Skip static allocas at the top of the entry block so they don't become
- // dynamic when we split the block. If we used our optimized stack layout,
- // then there will only be one alloca and it will come first.
- for (; IP != BE; ++IP) {
- AllocaInst *AI = dyn_cast<AllocaInst>(IP);
- if (!AI || !AI->isStaticAlloca())
- break;
- }
-
- DebugLoc EntryLoc = IP->getDebugLoc().getFnDebugLoc(*C);
- IRBuilder<> IRB(IP);
- IRB.SetCurrentDebugLocation(EntryLoc);
- Type *Int8Ty = IRB.getInt8Ty();
- GlobalVariable *Guard = new GlobalVariable(
- *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
- Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
- LoadInst *Load = IRB.CreateLoad(Guard);
- Load->setAtomic(Monotonic);
- Load->setAlignment(1);
- Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
- Instruction *Ins = SplitBlockAndInsertIfThen(
- Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
- IRB.SetInsertPoint(Ins);
- IRB.SetCurrentDebugLocation(EntryLoc);
- // We pass &F to __sanitizer_cov. We could avoid this and rely on
- // GET_CALLER_PC, but having the PC of the first instruction is just nice.
- IRB.CreateCall(AsanCovFunction);
- StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
- Store->setAtomic(Monotonic);
- Store->setAlignment(1);
-}
-
-// Poor man's coverage that works with ASan.
-// We create a Guard boolean variable with the same linkage
-// as the function and inject this code into the entry block (-asan-coverage=1)
-// or all blocks (-asan-coverage=2):
-// if (*Guard) {
-// __sanitizer_cov(&F);
-// *Guard = 1;
-// }
-// The accesses to Guard are atomic. The rest of the logic is
-// in __sanitizer_cov (it's fine to call it more than once).
-//
-// This coverage implementation provides very limited data:
-// it only tells if a given function (block) was ever executed.
-// No counters, no per-edge data.
-// But for many use cases this is what we need and the added slowdown
-// is negligible. This simple implementation will probably be obsoleted
-// by the upcoming Clang-based coverage implementation.
-// By having it here and now we hope to
-// a) get the functionality to users earlier and
-// b) collect usage statistics to help improve Clang coverage design.
-bool AddressSanitizer::InjectCoverage(Function &F,
- const ArrayRef<BasicBlock *> AllBlocks) {
- if (!ClCoverage) return false;
-
- if (ClCoverage == 1 ||
- (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
- InjectCoverageAtBlock(F, F.getEntryBlock());
- } else {
- for (auto BB : AllBlocks)
- InjectCoverageAtBlock(F, *BB);
- }
- return true;
-}
-
bool AddressSanitizer::runOnFunction(Function &F) {
if (&F == AsanCtorFunction) return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
@@ -1385,7 +1317,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
if (Value *Addr =
isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) {
if (ClOpt && ClOptSameTemp) {
- if (!TempsToInstrument.insert(Addr))
+ if (!TempsToInstrument.insert(Addr).second)
continue; // We've seen this temp in the current BB.
}
} else if (ClInvalidPointerPairs &&
@@ -1459,9 +1391,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
- if (InjectCoverage(F, AllBlocks))
- res = true;
-
DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
if (ClKeepUninstrumented) {
@@ -1499,19 +1428,21 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
std::string Suffix = itostr(i);
AsanStackMallocFunc[i] = checkInterfaceFunction(
M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
- IntptrTy, IntptrTy, NULL));
+ IntptrTy, IntptrTy, nullptr));
AsanStackFreeFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy,
- IntptrTy, IntptrTy, NULL));
+ IntptrTy, IntptrTy, nullptr));
}
- AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
- AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanPoisonStackMemoryFunc = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, nullptr));
+ AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, nullptr));
}
void
-FunctionStackPoisoner::poisonRedZones(const ArrayRef<uint8_t> ShadowBytes,
+FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
IRBuilder<> &IRB, Value *ShadowBase,
bool DoPoison) {
size_t n = ShadowBytes.size();
diff --git a/lib/Transforms/Instrumentation/Android.mk b/lib/Transforms/Instrumentation/Android.mk
index f9a55c7..1f21028 100644
--- a/lib/Transforms/Instrumentation/Android.mk
+++ b/lib/Transforms/Instrumentation/Android.mk
@@ -8,6 +8,7 @@ instrumentation_SRC_FILES := \
GCOVProfiling.cpp \
Instrumentation.cpp \
MemorySanitizer.cpp \
+ SanitizerCoverage.cpp \
ThreadSanitizer.cpp
# For the host
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 3563593..139e514 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMInstrumentation
GCOVProfiling.cpp
MemorySanitizer.cpp
Instrumentation.cpp
+ SanitizerCoverage.cpp
ThreadSanitizer.cpp
)
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 799e14b..c5a4860 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -50,6 +50,8 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstVisitor.h"
@@ -62,7 +64,10 @@
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
#include <iterator>
+#include <set>
+#include <utility>
using namespace llvm;
@@ -135,11 +140,11 @@ class DFSanABIList {
std::unique_ptr<SpecialCaseList> SCL;
public:
- DFSanABIList(SpecialCaseList *SCL) : SCL(SCL) {}
+ DFSanABIList(std::unique_ptr<SpecialCaseList> SCL) : SCL(std::move(SCL)) {}
/// Returns whether either this function or its source file are listed in the
/// given category.
- bool isIn(const Function &F, const StringRef Category) const {
+ bool isIn(const Function &F, StringRef Category) const {
return isIn(*F.getParent(), Category) ||
SCL->inSection("fun", F.getName(), Category);
}
@@ -148,7 +153,7 @@ class DFSanABIList {
///
/// If GA aliases a function, the alias's name is matched as a function name
/// would be. Similarly, aliases of globals are matched like globals.
- bool isIn(const GlobalAlias &GA, const StringRef Category) const {
+ bool isIn(const GlobalAlias &GA, StringRef Category) const {
if (isIn(*GA.getParent(), Category))
return true;
@@ -160,7 +165,7 @@ class DFSanABIList {
}
/// Returns whether this module is listed in the given category.
- bool isIn(const Module &M, const StringRef Category) const {
+ bool isIn(const Module &M, StringRef Category) const {
return SCL->inSection("src", M.getModuleIdentifier(), Category);
}
};
@@ -229,18 +234,21 @@ class DataFlowSanitizer : public ModulePass {
FunctionType *DFSanUnimplementedFnTy;
FunctionType *DFSanSetLabelFnTy;
FunctionType *DFSanNonzeroLabelFnTy;
+ FunctionType *DFSanVarargWrapperFnTy;
Constant *DFSanUnionFn;
+ Constant *DFSanCheckedUnionFn;
Constant *DFSanUnionLoadFn;
Constant *DFSanUnimplementedFn;
Constant *DFSanSetLabelFn;
Constant *DFSanNonzeroLabelFn;
+ Constant *DFSanVarargWrapperFn;
MDNode *ColdCallWeights;
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeSet ReadOnlyNoneAttrs;
+ DenseMap<const Function *, DISubprogram> FunctionDIs;
Value *getShadowAddress(Value *Addr, Instruction *Pos);
- Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
bool isInstrumented(const Function *F);
bool isInstrumented(const GlobalAlias *GA);
FunctionType *getArgsFunctionType(FunctionType *T);
@@ -266,6 +274,7 @@ class DataFlowSanitizer : public ModulePass {
struct DFSanFunction {
DataFlowSanitizer &DFS;
Function *F;
+ DominatorTree DT;
DataFlowSanitizer::InstrumentedABI IA;
bool IsNativeABI;
Value *ArgTLSPtr;
@@ -275,17 +284,32 @@ struct DFSanFunction {
DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
std::vector<std::pair<PHINode *, PHINode *> > PHIFixups;
DenseSet<Instruction *> SkipInsts;
- DenseSet<Value *> NonZeroChecks;
+ std::vector<Value *> NonZeroChecks;
+ bool AvoidNewBlocks;
+
+ struct CachedCombinedShadow {
+ BasicBlock *Block;
+ Value *Shadow;
+ };
+ DenseMap<std::pair<Value *, Value *>, CachedCombinedShadow>
+ CachedCombinedShadows;
+ DenseMap<Value *, std::set<Value *>> ShadowElements;
DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
: DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr),
- LabelReturnAlloca(nullptr) {}
+ LabelReturnAlloca(nullptr) {
+ DT.recalculate(*F);
+ // FIXME: Need to track down the register allocator issue which causes poor
+ // performance in pathological cases with large numbers of basic blocks.
+ AvoidNewBlocks = F->size() > 1000;
+ }
Value *getArgTLSPtr();
Value *getArgTLS(unsigned Index, Instruction *Pos);
Value *getRetvalTLS();
Value *getShadow(Value *V);
void setShadow(Instruction *I, Value *Shadow);
+ Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
Value *combineOperandShadows(Instruction *Inst);
Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
Instruction *Pos);
@@ -367,7 +391,6 @@ FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
}
FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
- assert(!T->isVarArg());
llvm::SmallVector<Type *, 4> ArgTypes;
for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end();
i != e; ++i) {
@@ -382,10 +405,12 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
}
for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
ArgTypes.push_back(ShadowTy);
+ if (T->isVarArg())
+ ArgTypes.push_back(ShadowPtrTy);
Type *RetType = T->getReturnType();
if (!RetType->isVoidTy())
ArgTypes.push_back(ShadowPtrTy);
- return FunctionType::get(T->getReturnType(), ArgTypes, false);
+ return FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg());
}
bool DataFlowSanitizer::doInitialization(Module &M) {
@@ -415,7 +440,9 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
DFSanSetLabelArgs, /*isVarArg=*/false);
DFSanNonzeroLabelFnTy = FunctionType::get(
- Type::getVoidTy(*Ctx), ArrayRef<Type *>(), /*isVarArg=*/false);
+ Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
+ DFSanVarargWrapperFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
if (GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
@@ -495,15 +522,26 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
AttributeSet::ReturnIndex));
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
- std::vector<Value *> Args;
- unsigned n = FT->getNumParams();
- for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
- Args.push_back(&*ai);
- CallInst *CI = CallInst::Create(F, Args, "", BB);
- if (FT->getReturnType()->isVoidTy())
- ReturnInst::Create(*Ctx, BB);
- else
- ReturnInst::Create(*Ctx, CI, BB);
+ if (F->isVarArg()) {
+ NewF->removeAttributes(
+ AttributeSet::FunctionIndex,
+ AttributeSet().addAttribute(*Ctx, AttributeSet::FunctionIndex,
+ "split-stack"));
+ CallInst::Create(DFSanVarargWrapperFn,
+ IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
+ BB);
+ new UnreachableInst(*Ctx, BB);
+ } else {
+ std::vector<Value *> Args;
+ unsigned n = FT->getNumParams();
+ for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+ Args.push_back(&*ai);
+ CallInst *CI = CallInst::Create(F, Args, "", BB);
+ if (FT->getReturnType()->isVoidTy())
+ ReturnInst::Create(*Ctx, BB);
+ else
+ ReturnInst::Create(*Ctx, CI, BB);
+ }
return NewF;
}
@@ -548,6 +586,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
if (ABIList.isIn(M, "skip"))
return false;
+ FunctionDIs = makeSubprogramMap(M);
+
if (!GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
@@ -562,6 +602,15 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ F->addAttribute(1, Attribute::ZExt);
+ F->addAttribute(2, Attribute::ZExt);
+ }
+ DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) {
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
F->addAttribute(1, Attribute::ZExt);
@@ -570,6 +619,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
DFSanUnionLoadFn =
Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
}
@@ -582,16 +632,20 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
}
DFSanNonzeroLabelFn =
Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
+ DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
+ DFSanVarargWrapperFnTy);
std::vector<Function *> FnsToInstrument;
llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
if (!i->isIntrinsic() &&
i != DFSanUnionFn &&
+ i != DFSanCheckedUnionFn &&
i != DFSanUnionLoadFn &&
i != DFSanUnimplementedFn &&
i != DFSanSetLabelFn &&
- i != DFSanNonzeroLabelFn)
+ i != DFSanNonzeroLabelFn &&
+ i != DFSanVarargWrapperFn)
FnsToInstrument.push_back(&*i);
}
@@ -673,11 +727,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
} else {
addGlobalNamePrefix(&F);
}
- // Hopefully, nobody will try to indirectly call a vararg
- // function... yet.
- } else if (FT->isVarArg()) {
- UnwrappedFnMap[&F] = &F;
- *i = nullptr;
} else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
// Build a wrapper function for F. The wrapper simply calls F, and is
// added to FnsToInstrument so that any instrumentation according to its
@@ -694,6 +743,12 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
Value *WrappedFnCst =
ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
F.replaceAllUsesWith(WrappedFnCst);
+
+ // Patch the pointer to LLVM function in debug info descriptor.
+ auto DI = FunctionDIs.find(&F);
+ if (DI != FunctionDIs.end())
+ DI->second.replaceFunction(&F);
+
UnwrappedFnMap[WrappedFnCst] = &F;
*i = NewF;
@@ -713,6 +768,11 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
i = FnsToInstrument.begin() + N;
e = FnsToInstrument.begin() + Count;
}
+ // Hopefully, nobody will try to indirectly call a vararg
+ // function... yet.
+ } else if (FT->isVarArg()) {
+ UnwrappedFnMap[&F] = &F;
+ *i = nullptr;
}
}
@@ -771,18 +831,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// yet). To make our life easier, do this work in a pass after the main
// instrumentation.
if (ClDebugNonzeroLabels) {
- for (DenseSet<Value *>::iterator i = DFSF.NonZeroChecks.begin(),
- e = DFSF.NonZeroChecks.end();
- i != e; ++i) {
+ for (Value *V : DFSF.NonZeroChecks) {
Instruction *Pos;
- if (Instruction *I = dyn_cast<Instruction>(*i))
+ if (Instruction *I = dyn_cast<Instruction>(V))
Pos = I->getNextNode();
else
Pos = DFSF.F->getEntryBlock().begin();
while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
Pos = Pos->getNextNode();
IRBuilder<> IRB(Pos);
- Value *Ne = IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow);
+ Value *Ne = IRB.CreateICmpNE(V, DFSF.DFS.ZeroShadow);
BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
IRBuilder<> ThenIRB(BI);
@@ -847,7 +905,7 @@ Value *DFSanFunction::getShadow(Value *V) {
break;
}
}
- NonZeroChecks.insert(Shadow);
+ NonZeroChecks.push_back(Shadow);
} else {
Shadow = DFS.ZeroShadow;
}
@@ -873,30 +931,82 @@ Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
// Generates IR to compute the union of the two given shadows, inserting it
// before Pos. Returns the computed union Value.
-Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
- Instruction *Pos) {
- if (V1 == ZeroShadow)
+Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
+ if (V1 == DFS.ZeroShadow)
return V2;
- if (V2 == ZeroShadow)
+ if (V2 == DFS.ZeroShadow)
return V1;
if (V1 == V2)
return V1;
+
+ auto V1Elems = ShadowElements.find(V1);
+ auto V2Elems = ShadowElements.find(V2);
+ if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
+ if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
+ V2Elems->second.begin(), V2Elems->second.end())) {
+ return V1;
+ } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
+ V1Elems->second.begin(), V1Elems->second.end())) {
+ return V2;
+ }
+ } else if (V1Elems != ShadowElements.end()) {
+ if (V1Elems->second.count(V2))
+ return V1;
+ } else if (V2Elems != ShadowElements.end()) {
+ if (V2Elems->second.count(V1))
+ return V2;
+ }
+
+ auto Key = std::make_pair(V1, V2);
+ if (V1 > V2)
+ std::swap(Key.first, Key.second);
+ CachedCombinedShadow &CCS = CachedCombinedShadows[Key];
+ if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
+ return CCS.Shadow;
+
IRBuilder<> IRB(Pos);
- BasicBlock *Head = Pos->getParent();
- Value *Ne = IRB.CreateICmpNE(V1, V2);
- BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
- Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
- IRBuilder<> ThenIRB(BI);
- CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
- Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
- Call->addAttribute(1, Attribute::ZExt);
- Call->addAttribute(2, Attribute::ZExt);
-
- BasicBlock *Tail = BI->getSuccessor(0);
- PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
- Phi->addIncoming(Call, Call->getParent());
- Phi->addIncoming(V1, Head);
- return Phi;
+ if (AvoidNewBlocks) {
+ CallInst *Call = IRB.CreateCall2(DFS.DFSanCheckedUnionFn, V1, V2);
+ Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(1, Attribute::ZExt);
+ Call->addAttribute(2, Attribute::ZExt);
+
+ CCS.Block = Pos->getParent();
+ CCS.Shadow = Call;
+ } else {
+ BasicBlock *Head = Pos->getParent();
+ Value *Ne = IRB.CreateICmpNE(V1, V2);
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
+ IRBuilder<> ThenIRB(BI);
+ CallInst *Call = ThenIRB.CreateCall2(DFS.DFSanUnionFn, V1, V2);
+ Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(1, Attribute::ZExt);
+ Call->addAttribute(2, Attribute::ZExt);
+
+ BasicBlock *Tail = BI->getSuccessor(0);
+ PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", Tail->begin());
+ Phi->addIncoming(Call, Call->getParent());
+ Phi->addIncoming(V1, Head);
+
+ CCS.Block = Tail;
+ CCS.Shadow = Phi;
+ }
+
+ std::set<Value *> UnionElems;
+ if (V1Elems != ShadowElements.end()) {
+ UnionElems = V1Elems->second;
+ } else {
+ UnionElems.insert(V1);
+ }
+ if (V2Elems != ShadowElements.end()) {
+ UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
+ } else {
+ UnionElems.insert(V2);
+ }
+ ShadowElements[CCS.Shadow] = std::move(UnionElems);
+
+ return CCS.Shadow;
}
// A convenience function which folds the shadows of each of the operands
@@ -908,7 +1018,7 @@ Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
Value *Shadow = getShadow(Inst->getOperand(0));
for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
- Shadow = DFS.combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
+ Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
}
return Shadow;
}
@@ -961,12 +1071,11 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> IRB(Pos);
Value *ShadowAddr1 =
IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
- return DFS.combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
- IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign),
- Pos);
+ return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
}
}
- if (Size % (64 / DFS.ShadowWidth) == 0) {
+ if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) {
// Fast path for the common case where each byte has identical shadow: load
// shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
// shadow is non-equal.
@@ -990,16 +1099,27 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
BasicBlock *Head = Pos->getParent();
BasicBlock *Tail = Head->splitBasicBlock(Pos);
+
+ if (DomTreeNode *OldNode = DT.getNode(Head)) {
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+ DomTreeNode *NewNode = DT.addNewBlock(Tail, Head);
+ for (auto Child : Children)
+ DT.changeImmediateDominator(Child, NewNode);
+ }
+
// In the following code LastBr will refer to the previous basic block's
// conditional branch instruction, whose true successor is fixed up to point
// to the next block during the loop below or to the tail after the final
// iteration.
BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
ReplaceInstWithInst(Head->getTerminator(), LastBr);
+ DT.addNewBlock(FallbackBB, Head);
for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size;
Ofs += 64 / DFS.ShadowWidth) {
BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ DT.addNewBlock(NextBB, LastBr->getParent());
IRBuilder<> NextIRB(NextBB);
WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
@@ -1025,6 +1145,11 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
void DFSanVisitor::visitLoadInst(LoadInst &LI) {
uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+ if (Size == 0) {
+ DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
+ return;
+ }
+
uint64_t Align;
if (ClPreserveAlignment) {
Align = LI.getAlignment();
@@ -1037,10 +1162,10 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
if (ClCombinePointerLabelsOnLoad) {
Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
- Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &LI);
+ Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI);
}
if (Shadow != DFSF.DFS.ZeroShadow)
- DFSF.NonZeroChecks.insert(Shadow);
+ DFSF.NonZeroChecks.push_back(Shadow);
DFSF.setShadow(&LI, Shadow);
}
@@ -1099,6 +1224,9 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
void DFSanVisitor::visitStoreInst(StoreInst &SI) {
uint64_t Size =
DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+ if (Size == 0)
+ return;
+
uint64_t Align;
if (ClPreserveAlignment) {
Align = SI.getAlignment();
@@ -1111,7 +1239,7 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
Value* Shadow = DFSF.getShadow(SI.getValueOperand());
if (ClCombinePointerLabelsOnStore) {
Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
- Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &SI);
+ Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
}
DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
}
@@ -1176,9 +1304,9 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) {
if (isa<VectorType>(I.getCondition()->getType())) {
DFSF.setShadow(
- &I, DFSF.DFS.combineShadows(
- CondShadow,
- DFSF.DFS.combineShadows(TrueShadow, FalseShadow, &I), &I));
+ &I,
+ DFSF.combineShadows(
+ CondShadow, DFSF.combineShadows(TrueShadow, FalseShadow, &I), &I));
} else {
Value *ShadowSel;
if (TrueShadow == FalseShadow) {
@@ -1187,7 +1315,7 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) {
ShadowSel =
SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
}
- DFSF.setShadow(&I, DFSF.DFS.combineShadows(CondShadow, ShadowSel, &I));
+ DFSF.setShadow(&I, DFSF.combineShadows(CondShadow, ShadowSel, &I));
}
}
@@ -1253,6 +1381,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
return;
}
+ // Calls to this function are synthesized in wrappers, and we shouldn't
+ // instrument them.
+ if (F == DFSF.DFS.DFSanVarargWrapperFn)
+ return;
+
+ assert(!(cast<FunctionType>(
+ CS.getCalledValue()->getType()->getPointerElementType())->isVarArg() &&
+ dyn_cast<InvokeInst>(CS.getInstruction())));
+
IRBuilder<> IRB(CS.getInstruction());
DenseMap<Value *, Function *>::iterator i =
@@ -1324,6 +1461,20 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
Args.push_back(DFSF.getShadow(*i));
+ if (FT->isVarArg()) {
+ auto LabelVAAlloca =
+ new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy,
+ CS.arg_size() - FT->getNumParams()),
+ "labelva", DFSF.F->getEntryBlock().begin());
+
+ for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
+ auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n);
+ IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
+ }
+
+ Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0));
+ }
+
if (!FT->getReturnType()->isVoidTy()) {
if (!DFSF.LabelReturnAlloca) {
DFSF.LabelReturnAlloca =
@@ -1333,6 +1484,9 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
Args.push_back(DFSF.LabelReturnAlloca);
}
+ for (i = CS.arg_begin() + FT->getNumParams(); i != CS.arg_end(); ++i)
+ Args.push_back(*i);
+
CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
CustomCI->setCallingConv(CI->getCallingConv());
CustomCI->setAttributes(CI->getAttributes());
@@ -1379,7 +1533,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
DFSF.SkipInsts.insert(LI);
DFSF.setShadow(CS.getInstruction(), LI);
- DFSF.NonZeroChecks.insert(LI);
+ DFSF.NonZeroChecks.push_back(LI);
}
}
@@ -1433,7 +1587,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
DFSF.SkipInsts.insert(ExShadow);
DFSF.setShadow(ExVal, ExShadow);
- DFSF.NonZeroChecks.insert(ExShadow);
+ DFSF.NonZeroChecks.push_back(ExShadow);
CS.getInstruction()->replaceAllUsesWith(ExVal);
}
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index f2f1738..5234341 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -396,7 +396,7 @@ private:
Elements.push_back(getOrCreateType(T->getStructElementType(i)));
// set struct elements
- StructDescriptor.setTypeArray(Builder.getOrCreateArray(Elements));
+ StructDescriptor.setArrays(Builder.getOrCreateArray(Elements));
} else if (T->isPointerTy()) {
Type *PointeeTy = T->getPointerElementType();
if (!(N = getType(PointeeTy)))
@@ -440,7 +440,7 @@ private:
Params.push_back(getOrCreateType(T));
}
- DIArray ParamArray = Builder.getOrCreateArray(Params);
+ DITypeArray ParamArray = Builder.getOrCreateTypeArray(Params);
return Builder.createSubroutineType(DIFile(FileNode), ParamArray);
}
@@ -525,11 +525,11 @@ std::string DebugIR::getPath() {
void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
std::unique_ptr<raw_fd_ostream> Out;
- std::string error;
+ std::error_code EC;
if (!fd) {
std::string Path = getPath();
- Out.reset(new raw_fd_ostream(Path.c_str(), error, sys::fs::F_Text));
+ Out.reset(new raw_fd_ostream(Path, EC, sys::fs::F_Text));
DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file "
<< Path << "\n");
} else {
diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h
index 02831ed..8d74a4d 100644
--- a/lib/Transforms/Instrumentation/DebugIR.h
+++ b/lib/Transforms/Instrumentation/DebugIR.h
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
-#define LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
#include "llvm/Pass.h"
@@ -95,4 +95,4 @@ private:
} // llvm namespace
-#endif // LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
+#endif
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index cfeb62e..220d7f8 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -480,12 +480,12 @@ void GCOVProfiler::emitProfileNotes() {
// LTO, we'll generate the same .gcno files.
DICompileUnit CU(CU_Nodes->getOperand(i));
- std::string ErrorInfo;
- raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
- sys::fs::F_None);
+ std::error_code EC;
+ raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);
std::string EdgeDestinations;
DIArray SPs = CU.getSubprograms();
+ unsigned FunctionIdent = 0;
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
DISubprogram SP(SPs.getElement(i));
assert((!SP || SP.isSubprogram()) &&
@@ -505,8 +505,8 @@ void GCOVProfiler::emitProfileNotes() {
++It;
EntryBlock.splitBasicBlock(It);
- Funcs.push_back(
- make_unique<GCOVFunction>(SP, &out, i, Options.UseCfgChecksum));
+ Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
+ Options.UseCfgChecksum));
GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
@@ -738,11 +738,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
Edge += Successors;
}
- ArrayRef<Constant*> V(&EdgeTable[0], TableSize);
GlobalVariable *EdgeTableGV =
new GlobalVariable(
*M, EdgeTableTy, true, GlobalValue::InternalLinkage,
- ConstantArray::get(EdgeTableTy, V),
+ ConstantArray::get(EdgeTableTy,
+ makeArrayRef(&EdgeTable[0],TableSize)),
"__llvm_gcda_edge_table");
EdgeTableGV->setUnnamedAddr(true);
return EdgeTableGV;
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index ac1dd43..8e95367 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -27,6 +27,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeGCOVProfilerPass(Registry);
initializeMemorySanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
+ initializeSanitizerCoverageModulePass(Registry);
initializeDataFlowSanitizerPass(Registry);
}
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 496ab48..1261259 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -127,6 +127,10 @@ static const uint64_t kOriginOffset64 = 1ULL << 45;
static const unsigned kMinOriginAlignment = 4;
static const unsigned kShadowTLSAlignment = 8;
+// These constants must be kept in sync with the ones in msan.h.
+static const unsigned kParamTLSSize = 800;
+static const unsigned kRetvalTLSSize = 800;
+
// Accesses sizes are powers of two: 1, 2, 4, 8.
static const size_t kNumberOfAccessSizes = 4;
@@ -195,6 +199,13 @@ static cl::opt<bool> ClWrapIndirectCallsFast("msan-wrap-indirect-calls-fast",
cl::desc("Do not wrap indirect calls with target in the same module"),
cl::Hidden, cl::init(true));
+// This is an experiment to enable handling of cases where shadow is a non-zero
+// compile-time constant. For some unexplainable reason they were silently
+// ignored in the instrumentation.
+static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
+ cl::desc("Insert checks for constant shadow values"),
+ cl::Hidden, cl::init(false));
+
namespace {
/// \brief An instrumentation pass implementing detection of uninitialized
@@ -321,7 +332,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
// which is not yet implemented.
StringRef WarningFnName = ClKeepGoing ? "__msan_warning"
: "__msan_warning_noreturn";
- WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
+ WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), nullptr);
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
@@ -329,34 +340,35 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
- IRB.getInt32Ty(), NULL);
+ IRB.getInt32Ty(), nullptr);
FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), NULL);
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), nullptr);
}
MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
"__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
- IRB.getInt8PtrTy(), IntptrTy, NULL);
- MsanPoisonStackFn = M.getOrInsertFunction(
- "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
+ IRB.getInt8PtrTy(), IntptrTy, nullptr);
+ MsanPoisonStackFn =
+ M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IntptrTy, nullptr);
MsanChainOriginFn = M.getOrInsertFunction(
- "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty(), NULL);
+ "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty(), nullptr);
MemmoveFn = M.getOrInsertFunction(
"__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, NULL);
+ IRB.getInt8PtrTy(), IntptrTy, nullptr);
MemcpyFn = M.getOrInsertFunction(
"__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IntptrTy, NULL);
+ IntptrTy, nullptr);
MemsetFn = M.getOrInsertFunction(
"__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
- IntptrTy, NULL);
+ IntptrTy, nullptr);
// Create globals.
RetvalTLS = new GlobalVariable(
- M, ArrayType::get(IRB.getInt64Ty(), 8), false,
+ M, ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8), false,
GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
RetvalOriginTLS = new GlobalVariable(
@@ -364,16 +376,16 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
"__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
ParamTLS = new GlobalVariable(
- M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+ M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
ParamOriginTLS = new GlobalVariable(
- M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
- nullptr, "__msan_param_origin_tls", nullptr,
- GlobalVariable::InitialExecTLSModel);
+ M, ArrayType::get(OriginTy, kParamTLSSize / 4), false,
+ GlobalVariable::ExternalLinkage, nullptr, "__msan_param_origin_tls",
+ nullptr, GlobalVariable::InitialExecTLSModel);
VAArgTLS = new GlobalVariable(
- M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+ M, ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), false,
GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
VAArgOverflowSizeTLS = new GlobalVariable(
@@ -393,7 +405,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
AnyFunctionPtrTy =
PointerType::getUnqual(FunctionType::get(IRB.getVoidTy(), false));
IndirectCallWrapperFn = M.getOrInsertFunction(
- ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
+ ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, nullptr);
}
if (WrapIndirectCalls && ClWrapIndirectCallsFast) {
@@ -442,7 +454,7 @@ bool MemorySanitizer::doInitialization(Module &M) {
// Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
- "__msan_init", IRB.getVoidTy(), NULL)), 0);
+ "__msan_init", IRB.getVoidTy(), nullptr)), 0);
if (TrackOrigins)
new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
@@ -559,7 +571,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// TODO(eugenis): handle non-zero constant shadow by inserting an
// unconditional check (can not simply fail compilation as this could
// be in the dead code).
- if (isa<Constant>(ConvertedShadow)) return;
+ if (!ClCheckConstantShadow)
+ if (isa<Constant>(ConvertedShadow)) return;
unsigned TypeSizeInBits =
MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
@@ -615,8 +628,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
- // See the comment in materializeStores().
- if (isa<Constant>(ConvertedShadow)) return;
+ // See the comment in storeOrigin().
+ if (!ClCheckConstantShadow)
+ if (isa<Constant>(ConvertedShadow)) return;
unsigned TypeSizeInBits =
MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
@@ -763,6 +777,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return VectorType::get(IntegerType::get(*MS.C, EltSize),
VT->getNumElements());
}
+ if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
+ return ArrayType::get(getShadowTy(AT->getElementType()),
+ AT->getNumElements());
+ }
if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
SmallVector<Type*, 4> Elements;
for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
@@ -882,11 +900,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
assert(ShadowTy);
if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
return Constant::getAllOnesValue(ShadowTy);
- StructType *ST = cast<StructType>(ShadowTy);
- SmallVector<Constant *, 4> Vals;
- for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
- Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
- return ConstantStruct::get(ST, Vals);
+ if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
+ SmallVector<Constant *, 4> Vals(AT->getNumElements(),
+ getPoisonedShadow(AT->getElementType()));
+ return ConstantArray::get(AT, Vals);
+ }
+ if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
+ SmallVector<Constant *, 4> Vals;
+ for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+ Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
+ return ConstantStruct::get(ST, Vals);
+ }
+ llvm_unreachable("Unexpected shadow type");
}
/// \brief Create a dirty shadow for a given value.
@@ -941,6 +966,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType())
: MS.DL->getTypeAllocSize(FArg.getType());
if (A == &FArg) {
+ bool Overflow = ArgOffset + Size > kParamTLSSize;
Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
if (FArg.hasByValAttr()) {
// ByVal pointer itself has clean shadow. We copy the actual
@@ -951,25 +977,38 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Type *EltType = A->getType()->getPointerElementType();
ArgAlign = MS.DL->getABITypeAlignment(EltType);
}
- unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
- Value *Cpy = EntryIRB.CreateMemCpy(
- getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size,
- CopyAlign);
- DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
- (void)Cpy;
+ if (Overflow) {
+ // ParamTLS overflow.
+ EntryIRB.CreateMemSet(
+ getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
+ Constant::getNullValue(EntryIRB.getInt8Ty()), Size, ArgAlign);
+ } else {
+ unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
+ Value *Cpy = EntryIRB.CreateMemCpy(
+ getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size,
+ CopyAlign);
+ DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
+ (void)Cpy;
+ }
*ShadowPtr = getCleanShadow(V);
} else {
- *ShadowPtr = EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
+ if (Overflow) {
+ // ParamTLS overflow.
+ *ShadowPtr = getCleanShadow(V);
+ } else {
+ *ShadowPtr =
+ EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
+ }
}
DEBUG(dbgs() << " ARG: " << FArg << " ==> " <<
**ShadowPtr << "\n");
- if (MS.TrackOrigins) {
+ if (MS.TrackOrigins && !Overflow) {
Value *OriginPtr =
getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
}
}
- ArgOffset += DataLayout::RoundUpAlignment(Size, kShadowTLSAlignment);
+ ArgOffset += RoundUpToAlignment(Size, kShadowTLSAlignment);
}
assert(*ShadowPtr && "Could not find shadow for an argument");
return *ShadowPtr;
@@ -1024,9 +1063,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// UMR warning in runtime if the value is not fully defined.
void insertShadowCheck(Value *Val, Instruction *OrigIns) {
assert(Val);
- Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
- if (!Shadow) return;
- Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+ Value *Shadow, *Origin;
+ if (ClCheckConstantShadow) {
+ Shadow = getShadow(Val);
+ if (!Shadow) return;
+ Origin = getOrigin(Val);
+ } else {
+ Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+ if (!Shadow) return;
+ Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+ }
insertShadowCheck(Shadow, Origin, OrigIns);
}
@@ -1859,7 +1905,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Op = I.getArgOperand(0);
Type *OpType = Op->getType();
Function *BswapFunc = Intrinsic::getDeclaration(
- F.getParent(), Intrinsic::bswap, ArrayRef<Type*>(&OpType, 1));
+ F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
setOrigin(&I, getOrigin(Op));
}
@@ -2313,26 +2359,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
DEBUG(dbgs() << " Arg#" << i << ": " << *A <<
" Shadow: " << *ArgShadow << "\n");
+ bool ArgIsInitialized = false;
if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
assert(A->getType()->isPointerTy() &&
"ByVal argument is not a pointer!");
Size = MS.DL->getTypeAllocSize(A->getType()->getPointerElementType());
- unsigned Alignment = CS.getParamAlignment(i + 1);
+ if (ArgOffset + Size > kParamTLSSize) break;
+ unsigned ParamAlignment = CS.getParamAlignment(i + 1);
+ unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
Store = IRB.CreateMemCpy(ArgShadowBase,
getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
Size, Alignment);
} else {
Size = MS.DL->getTypeAllocSize(A->getType());
+ if (ArgOffset + Size > kParamTLSSize) break;
Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
kShadowTLSAlignment);
+ Constant *Cst = dyn_cast<Constant>(ArgShadow);
+ if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
}
- if (MS.TrackOrigins)
+ if (MS.TrackOrigins && !ArgIsInitialized)
IRB.CreateStore(getOrigin(A),
getOriginPtrForArgument(A, IRB, ArgOffset));
(void)Store;
assert(Size != 0 && Store != nullptr);
DEBUG(dbgs() << " Param:" << *Store << "\n");
- ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+ ArgOffset += RoundUpToAlignment(Size, 8);
}
DEBUG(dbgs() << " done with call args\n");
@@ -2613,7 +2665,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
Type *RealTy = A->getType()->getPointerElementType();
uint64_t ArgSize = MS.DL->getTypeAllocSize(RealTy);
Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
- OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
+ OverflowOffset += RoundUpToAlignment(ArgSize, 8);
IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB),
ArgSize, kShadowTLSAlignment);
} else {
@@ -2635,7 +2687,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
case AK_Memory:
uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType());
Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
- OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
+ OverflowOffset += RoundUpToAlignment(ArgSize, 8);
}
IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
new file mode 100644
index 0000000..f882072
--- /dev/null
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -0,0 +1,294 @@
+//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Coverage instrumentation that works with AddressSanitizer
+// and potentially with other Sanitizers.
+//
+// We create a Guard boolean variable with the same linkage
+// as the function and inject this code into the entry block (CoverageLevel=1)
+// or all blocks (CoverageLevel>=2):
+// if (*Guard) {
+// __sanitizer_cov();
+// *Guard = 1;
+// }
+// The accesses to Guard are atomic. The rest of the logic is
+// in __sanitizer_cov (it's fine to call it more than once).
+//
+// With CoverageLevel>=3 we also split critical edges this effectively
+// instrumenting all edges.
+//
+// CoverageLevel>=4 add indirect call profiling implented as a function call.
+//
+// This coverage implementation provides very limited data:
+// it only tells if a given function (block) was ever executed. No counters.
+// But for many use cases this is what we need and the added slowdown small.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sancov"
+
+static const char *const kSanCovModuleInitName = "__sanitizer_cov_module_init";
+static const char *const kSanCovName = "__sanitizer_cov";
+static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
+static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
+static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
+static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
+static const uint64_t kSanCtorAndDtorPriority = 1;
+
+static cl::opt<int> ClCoverageLevel("sanitizer-coverage-level",
+ cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
+ "3: all blocks and critical edges, "
+ "4: above plus indirect calls"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<int> ClCoverageBlockThreshold(
+ "sanitizer-coverage-block-threshold",
+ cl::desc("Add coverage instrumentation only to the entry block if there "
+ "are more than this number of blocks."),
+ cl::Hidden, cl::init(1500));
+
+static cl::opt<bool>
+ ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
+ cl::desc("Experimental basic-block tracing: insert "
+ "callbacks at every basic block"),
+ cl::Hidden, cl::init(false));
+
+namespace {
+
+class SanitizerCoverageModule : public ModulePass {
+ public:
+ SanitizerCoverageModule(int CoverageLevel = 0)
+ : ModulePass(ID),
+ CoverageLevel(std::max(CoverageLevel, (int)ClCoverageLevel)) {}
+ bool runOnModule(Module &M) override;
+ bool runOnFunction(Function &F);
+ static char ID; // Pass identification, replacement for typeid
+ const char *getPassName() const override {
+ return "SanitizerCoverageModule";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DataLayoutPass>();
+ }
+
+ private:
+ void InjectCoverageForIndirectCalls(Function &F,
+ ArrayRef<Instruction *> IndirCalls);
+ bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
+ ArrayRef<Instruction *> IndirCalls);
+ bool InjectTracing(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void InjectCoverageAtBlock(Function &F, BasicBlock &BB);
+ Function *SanCovFunction;
+ Function *SanCovIndirCallFunction;
+ Function *SanCovModuleInit;
+ Function *SanCovTraceEnter, *SanCovTraceBB;
+ Type *IntptrTy;
+ LLVMContext *C;
+
+ int CoverageLevel;
+};
+
+} // namespace
+
+static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
+ if (Function *F = dyn_cast<Function>(FuncOrBitcast))
+ return F;
+ std::string Err;
+ raw_string_ostream Stream(Err);
+ Stream << "SanitizerCoverage interface function redefined: "
+ << *FuncOrBitcast;
+ report_fatal_error(Err);
+}
+
+bool SanitizerCoverageModule::runOnModule(Module &M) {
+ if (!CoverageLevel) return false;
+ C = &(M.getContext());
+ DataLayoutPass *DLP = &getAnalysis<DataLayoutPass>();
+ IntptrTy = Type::getIntNTy(*C, DLP->getDataLayout().getPointerSizeInBits());
+ Type *VoidTy = Type::getVoidTy(*C);
+
+ Function *CtorFunc =
+ Function::Create(FunctionType::get(VoidTy, false),
+ GlobalValue::InternalLinkage, kSanCovModuleCtorName, &M);
+ ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc));
+ appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
+
+ SanCovFunction =
+ checkInterfaceFunction(M.getOrInsertFunction(kSanCovName, VoidTy, nullptr));
+ SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
+ SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction(
+ kSanCovModuleInitName, Type::getVoidTy(*C), IntptrTy, nullptr));
+ SanCovModuleInit->setLinkage(Function::ExternalLinkage);
+
+ if (ClExperimentalTracing) {
+ SanCovTraceEnter = checkInterfaceFunction(
+ M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, IntptrTy, nullptr));
+ SanCovTraceBB = checkInterfaceFunction(
+ M.getOrInsertFunction(kSanCovTraceBB, VoidTy, IntptrTy, nullptr));
+ }
+
+ for (auto &F : M)
+ runOnFunction(F);
+
+ IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+ IRB.CreateCall(SanCovModuleInit,
+ ConstantInt::get(IntptrTy, SanCovFunction->getNumUses()));
+ return true;
+}
+
+bool SanitizerCoverageModule::runOnFunction(Function &F) {
+ if (F.empty()) return false;
+ // For now instrument only functions that will also be asan-instrumented.
+ if (!F.hasFnAttribute(Attribute::SanitizeAddress))
+ return false;
+ if (CoverageLevel >= 3)
+ SplitAllCriticalEdges(F, this);
+ SmallVector<Instruction*, 8> IndirCalls;
+ SmallVector<BasicBlock*, 16> AllBlocks;
+ for (auto &BB : F) {
+ AllBlocks.push_back(&BB);
+ if (CoverageLevel >= 4)
+ for (auto &Inst : BB) {
+ CallSite CS(&Inst);
+ if (CS && !CS.getCalledFunction())
+ IndirCalls.push_back(&Inst);
+ }
+ }
+ InjectCoverage(F, AllBlocks, IndirCalls);
+ InjectTracing(F, AllBlocks);
+ return true;
+}
+
+// Experimental support for tracing.
+// Basicaly, insert a callback at the beginning of every basic block.
+// Every callback gets a pointer to a uniqie global for internal storage.
+bool SanitizerCoverageModule::InjectTracing(Function &F,
+ ArrayRef<BasicBlock *> AllBlocks) {
+ if (!ClExperimentalTracing) return false;
+ Type *Ty = ArrayType::get(IntptrTy, 1); // May need to use more words later.
+ for (auto BB : AllBlocks) {
+ IRBuilder<> IRB(BB->getFirstInsertionPt());
+ GlobalVariable *TraceCache = new GlobalVariable(
+ *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Ty), "__sancov_gen_trace_cache");
+ IRB.CreateCall(&F.getEntryBlock() == BB ? SanCovTraceEnter : SanCovTraceBB,
+ IRB.CreatePointerCast(TraceCache, IntptrTy));
+ }
+ return true;
+}
+
+bool
+SanitizerCoverageModule::InjectCoverage(Function &F,
+ ArrayRef<BasicBlock *> AllBlocks,
+ ArrayRef<Instruction *> IndirCalls) {
+ if (!CoverageLevel) return false;
+
+ if (CoverageLevel == 1 ||
+ (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
+ InjectCoverageAtBlock(F, F.getEntryBlock());
+ } else {
+ for (auto BB : AllBlocks)
+ InjectCoverageAtBlock(F, *BB);
+ }
+ InjectCoverageForIndirectCalls(F, IndirCalls);
+ return true;
+}
+
+// On every indirect call we call a run-time function
+// __sanitizer_cov_indir_call* with two parameters:
+// - callee address,
+// - global cache array that contains kCacheSize pointers (zero-initialized).
+// The cache is used to speed up recording the caller-callee pairs.
+// The address of the caller is passed implicitly via caller PC.
+// kCacheSize is encoded in the name of the run-time function.
+void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
+ Function &F, ArrayRef<Instruction *> IndirCalls) {
+ if (IndirCalls.empty()) return;
+ const int kCacheSize = 16;
+ const int kCacheAlignment = 64; // Align for better performance.
+ Type *Ty = ArrayType::get(IntptrTy, kCacheSize);
+ for (auto I : IndirCalls) {
+ IRBuilder<> IRB(I);
+ CallSite CS(I);
+ Value *Callee = CS.getCalledValue();
+ if (dyn_cast<InlineAsm>(Callee)) continue;
+ GlobalVariable *CalleeCache = new GlobalVariable(
+ *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
+ CalleeCache->setAlignment(kCacheAlignment);
+ IRB.CreateCall2(SanCovIndirCallFunction,
+ IRB.CreatePointerCast(Callee, IntptrTy),
+ IRB.CreatePointerCast(CalleeCache, IntptrTy));
+ }
+}
+
+void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F,
+ BasicBlock &BB) {
+ BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
+ // Skip static allocas at the top of the entry block so they don't become
+ // dynamic when we split the block. If we used our optimized stack layout,
+ // then there will only be one alloca and it will come first.
+ for (; IP != BE; ++IP) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(IP);
+ if (!AI || !AI->isStaticAlloca())
+ break;
+ }
+
+ DebugLoc EntryLoc = &BB == &F.getEntryBlock()
+ ? IP->getDebugLoc().getFnDebugLoc(*C)
+ : IP->getDebugLoc();
+ IRBuilder<> IRB(IP);
+ IRB.SetCurrentDebugLocation(EntryLoc);
+ Type *Int8Ty = IRB.getInt8Ty();
+ GlobalVariable *Guard = new GlobalVariable(
+ *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Int8Ty), "__sancov_gen_cov_" + F.getName());
+ LoadInst *Load = IRB.CreateLoad(Guard);
+ Load->setAtomic(Monotonic);
+ Load->setAlignment(1);
+ Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
+ Instruction *Ins = SplitBlockAndInsertIfThen(
+ Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ IRB.SetInsertPoint(Ins);
+ IRB.SetCurrentDebugLocation(EntryLoc);
+ // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
+ IRB.CreateCall(SanCovFunction);
+ StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
+ Store->setAtomic(Monotonic);
+ Store->setAlignment(1);
+}
+
+char SanitizerCoverageModule::ID = 0;
+INITIALIZE_PASS(SanitizerCoverageModule, "sancov",
+ "SanitizerCoverage: TODO."
+ "ModulePass", false, false)
+ModulePass *llvm::createSanitizerCoverageModulePass(int CoverageLevel) {
+ return new SanitizerCoverageModule(CoverageLevel);
+}
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 89386a6..8a56a1f 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -135,33 +135,33 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(M.getContext());
// Initialize the callbacks.
TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
- "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+ "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction(
- "__tsan_func_exit", IRB.getVoidTy(), NULL));
+ "__tsan_func_exit", IRB.getVoidTy(), nullptr));
OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
const size_t ByteSize = 1 << i;
const size_t BitSize = ByteSize * 8;
SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
- ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+ ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
- WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+ WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
"_load");
TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction(
- AtomicLoadName, Ty, PtrTy, OrdTy, NULL));
+ AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
"_store");
TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
- NULL));
+ nullptr));
for (int op = AtomicRMWInst::FIRST_BINOP;
op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -185,33 +185,33 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
continue;
SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction(
- RMWName, Ty, PtrTy, Ty, OrdTy, NULL));
+ RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
}
SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
"_compare_exchange_val");
TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
- AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, NULL));
+ AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
}
TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), NULL));
+ IRB.getInt8PtrTy(), nullptr));
TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
- "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+ "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
+ "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr));
TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
+ "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr));
MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
"memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, NULL));
+ IRB.getInt8PtrTy(), IntptrTy, nullptr));
MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
"memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IntptrTy, NULL));
+ IntptrTy, nullptr));
MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
"memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
- IntptrTy, NULL));
+ IntptrTy, nullptr));
}
bool ThreadSanitizer::doInitialization(Module &M) {
@@ -224,7 +224,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
IRBuilder<> IRB(M.getContext());
IntptrTy = IRB.getIntPtrTy(DL);
Value *TsanInit = M.getOrInsertFunction("__tsan_init",
- IRB.getVoidTy(), NULL);
+ IRB.getVoidTy(), nullptr);
appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
return true;
@@ -481,8 +481,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
Type *PtrTy = Ty->getPointerTo();
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
createOrdering(&IRB, LI->getOrdering())};
- CallInst *C = CallInst::Create(TsanAtomicLoad[Idx],
- ArrayRef<Value*>(Args));
+ CallInst *C = CallInst::Create(TsanAtomicLoad[Idx], Args);
ReplaceInstWithInst(I, C);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
@@ -497,8 +496,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
IRB.CreateIntCast(SI->getValueOperand(), Ty, false),
createOrdering(&IRB, SI->getOrdering())};
- CallInst *C = CallInst::Create(TsanAtomicStore[Idx],
- ArrayRef<Value*>(Args));
+ CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args);
ReplaceInstWithInst(I, C);
} else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
Value *Addr = RMWI->getPointerOperand();
@@ -515,7 +513,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
createOrdering(&IRB, RMWI->getOrdering())};
- CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args));
+ CallInst *C = CallInst::Create(F, Args);
ReplaceInstWithInst(I, C);
} else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
Value *Addr = CASI->getPointerOperand();
@@ -543,7 +541,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
Function *F = FI->getSynchScope() == SingleThread ?
TsanAtomicSignalFence : TsanAtomicThreadFence;
- CallInst *C = CallInst::Create(F, ArrayRef<Value*>(Args));
+ CallInst *C = CallInst::Create(F, Args);
ReplaceInstWithInst(I, C);
}
return true;
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 4098428..e286dbc 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -19,8 +19,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_SCALAR_ARCRUNTIMEENTRYPOINTS_H
-#define LLVM_TRANSFORMS_SCALAR_ARCRUNTIMEENTRYPOINTS_H
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_ARCRUNTIMEENTRYPOINTS_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_ARCRUNTIMEENTRYPOINTS_H
#include "ObjCARC.h"
@@ -183,4 +183,4 @@ private:
} // namespace objcarc
} // namespace llvm
-#endif // LLVM_TRANSFORMS_SCALAR_ARCRUNTIMEENTRYPOINTS_H
+#endif
diff --git a/lib/Transforms/ObjCARC/Android.mk b/lib/Transforms/ObjCARC/Android.mk
index 226e9e1..cf45a95 100644
--- a/lib/Transforms/ObjCARC/Android.mk
+++ b/lib/Transforms/ObjCARC/Android.mk
@@ -9,7 +9,8 @@ transforms_objcarc_SRC_FILES := \
ObjCARCExpand.cpp \
ObjCARCOpts.cpp \
ObjCARCUtil.cpp \
- ProvenanceAnalysis.cpp
+ ProvenanceAnalysis.cpp \
+ ProvenanceAnalysisEvaluator.cpp
# For the host
# =====================================================
diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt
index 233deb3..b449fac 100644
--- a/lib/Transforms/ObjCARC/CMakeLists.txt
+++ b/lib/Transforms/ObjCARC/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMObjCARCOpts
ObjCARCContract.cpp
DependencyAnalysis.cpp
ProvenanceAnalysis.cpp
+ ProvenanceAnalysisEvaluator.cpp
)
add_dependencies(LLVMObjCARCOpts intrinsics_gen)
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 08c8842..f6c236c 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -206,8 +206,8 @@ void
llvm::objcarc::FindDependencies(DependenceKind Flavor,
const Value *Arg,
BasicBlock *StartBB, Instruction *StartInst,
- SmallPtrSet<Instruction *, 4> &DependingInsts,
- SmallPtrSet<const BasicBlock *, 4> &Visited,
+ SmallPtrSetImpl<Instruction *> &DependingInsts,
+ SmallPtrSetImpl<const BasicBlock *> &Visited,
ProvenanceAnalysis &PA) {
BasicBlock::iterator StartPos = StartInst;
@@ -229,7 +229,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
// Add the predecessors to the worklist.
do {
BasicBlock *PredBB = *PI;
- if (Visited.insert(PredBB))
+ if (Visited.insert(PredBB).second)
Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
} while (++PI != PE);
break;
@@ -246,9 +246,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
// Determine whether the original StartBB post-dominates all of the blocks we
// visited. If not, insert a sentinal indicating that most optimizations are
// not safe.
- for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
- E = Visited.end(); I != E; ++I) {
- const BasicBlock *BB = *I;
+ for (const BasicBlock *BB : Visited) {
if (BB == StartBB)
continue;
const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h
index 617cdf3..7b5601a 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -20,8 +20,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
-#define LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_DEPENDENCYANALYSIS_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_DEPENDENCYANALYSIS_H
#include "llvm/ADT/SmallPtrSet.h"
@@ -53,8 +53,8 @@ enum DependenceKind {
void FindDependencies(DependenceKind Flavor,
const Value *Arg,
BasicBlock *StartBB, Instruction *StartInst,
- SmallPtrSet<Instruction *, 4> &DependingInstructions,
- SmallPtrSet<const BasicBlock *, 4> &Visited,
+ SmallPtrSetImpl<Instruction *> &DependingInstructions,
+ SmallPtrSetImpl<const BasicBlock *> &Visited,
ProvenanceAnalysis &PA);
bool
@@ -76,4 +76,4 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
} // namespace objcarc
} // namespace llvm
-#endif // LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+#endif
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index 373168e..6ea038b 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -42,6 +42,7 @@ void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
initializeObjCARCExpandPass(Registry);
initializeObjCARCContractPass(Registry);
initializeObjCARCOptPass(Registry);
+ initializePAEvalPass(Registry);
}
void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index f71cf2b..7a7eae8 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -20,8 +20,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_SCALAR_OBJCARC_H
-#define LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -380,11 +380,15 @@ static inline bool IsObjCIdentifiedObject(const Value *V) {
StringRef Name = GV->getName();
// These special variables are known to hold values which are not
// reference-counted pointers.
- if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
- Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
- Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
- Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
- Name.startswith("\01l_objc_msgSend_fixup_"))
+ if (Name.startswith("\01l_objc_msgSend_fixup_"))
+ return true;
+
+ StringRef Section = GV->getSection();
+ if (Section.find("__message_refs") != StringRef::npos ||
+ Section.find("__objc_classrefs") != StringRef::npos ||
+ Section.find("__objc_superrefs") != StringRef::npos ||
+ Section.find("__objc_methname") != StringRef::npos ||
+ Section.find("__cstring") != StringRef::npos)
return true;
}
}
@@ -395,4 +399,4 @@ static inline bool IsObjCIdentifiedObject(const Value *V) {
} // end namespace objcarc
} // end namespace llvm
-#endif // LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+#endif
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index 2c09e70..c61b6b0 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -62,8 +62,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
AliasResult Result =
- AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
- Location(SB, LocB.Size, LocB.TBAATag));
+ AliasAnalysis::alias(Location(SA, LocA.Size, LocA.AATags),
+ Location(SB, LocB.Size, LocB.AATags));
if (Result != MayAlias)
return Result;
@@ -93,7 +93,7 @@ ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
// First, strip off no-ops, including ObjC-specific no-ops, and try making
// a precise alias query.
const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
- if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+ if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.AATags),
OrLocal))
return true;
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 97b565b..3fcea4e 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -20,8 +20,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
-#define LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Pass.h"
@@ -71,4 +71,4 @@ namespace objcarc {
} // namespace objcarc
} // namespace llvm
-#endif // LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+#endif
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index f48d53d..eb325eb 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -72,9 +72,9 @@ namespace {
bool ContractAutorelease(Function &F, Instruction *Autorelease,
InstructionClass Class,
- SmallPtrSet<Instruction *, 4>
+ SmallPtrSetImpl<Instruction *>
&DependingInstructions,
- SmallPtrSet<const BasicBlock *, 4>
+ SmallPtrSetImpl<const BasicBlock *>
&Visited);
void ContractRelease(Instruction *Release,
@@ -150,9 +150,9 @@ ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
bool
ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
InstructionClass Class,
- SmallPtrSet<Instruction *, 4>
+ SmallPtrSetImpl<Instruction *>
&DependingInstructions,
- SmallPtrSet<const BasicBlock *, 4>
+ SmallPtrSetImpl<const BasicBlock *>
&Visited) {
const Value *Arg = GetObjCArg(Autorelease);
@@ -508,9 +508,8 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// If this function has no escaping allocas or suspicious vararg usage,
// objc_storeStrong calls can be marked with the "tail" keyword.
if (TailOkForStoreStrongs)
- for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
- E = StoreStrongCalls.end(); I != E; ++I)
- (*I)->setTailCall();
+ for (CallInst *CI : StoreStrongCalls)
+ CI->setTailCall();
StoreStrongCalls.clear();
return Changed;
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index dd4dd50..95c6674 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -188,7 +188,7 @@ static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
if (isa<AllocaInst>(P))
return true;
- if (!Visited.insert(P))
+ if (!Visited.insert(P).second)
continue;
if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) {
@@ -411,10 +411,8 @@ bool RRInfo::Merge(const RRInfo &Other) {
// Merge the insert point sets. If there are any differences,
// that makes this a partial merge.
bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- I = Other.ReverseInsertPts.begin(),
- E = Other.ReverseInsertPts.end(); I != E; ++I)
- Partial |= ReverseInsertPts.insert(*I);
+ for (Instruction *Inst : Other.ReverseInsertPts)
+ Partial |= ReverseInsertPts.insert(Inst).second;
return Partial;
}
@@ -887,8 +885,7 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId,
OldSeq),
SequenceToMDString(Inst->getContext(),
NewSeq)};
- Node = MDNode::get(Inst->getContext(),
- ArrayRef<Value*>(tmp, 3));
+ Node = MDNode::get(Inst->getContext(), tmp);
Inst->setMetadata(NodeId, Node);
}
@@ -908,8 +905,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *I8XX = PointerType::getUnqual(I8X);
Type *Params[] = {I8XX, I8XX};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
- ArrayRef<Type*>(Params, 2),
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
/*isVarArg=*/false);
Constant *Callee = M->getOrInsertFunction(Name, FTy);
@@ -951,8 +947,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *I8XX = PointerType::getUnqual(I8X);
Type *Params[] = {I8XX, I8XX};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
- ArrayRef<Type*>(Params, 2),
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
/*isVarArg=*/false);
Constant *Callee = M->getOrInsertFunction(Name, FTy);
@@ -2199,7 +2194,7 @@ ComputePostOrders(Function &F,
while (SuccStack.back().second != SE) {
BasicBlock *SuccBB = *SuccStack.back().second++;
- if (Visited.insert(SuccBB)) {
+ if (Visited.insert(SuccBB).second) {
TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
BBStates[CurrBB].addSucc(SuccBB);
@@ -2240,7 +2235,7 @@ ComputePostOrders(Function &F,
BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
while (PredStack.back().second != PE) {
BasicBlock *BB = *PredStack.back().second++;
- if (Visited.insert(BB)) {
+ if (Visited.insert(BB).second) {
PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
goto reverse_dfs_next_succ;
}
@@ -2299,10 +2294,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
// Insert the new retain and release calls.
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- PI = ReleasesToMove.ReverseInsertPts.begin(),
- PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
- Instruction *InsertPt = *PI;
+ for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
@@ -2313,10 +2305,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n"
"At insertion point: " << *InsertPt << "\n");
}
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- PI = RetainsToMove.ReverseInsertPts.begin(),
- PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
- Instruction *InsertPt = *PI;
+ for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
@@ -2333,18 +2322,12 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
}
// Delete the original retain and release calls.
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- AI = RetainsToMove.Calls.begin(),
- AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
- Instruction *OrigRetain = *AI;
+ for (Instruction *OrigRetain : RetainsToMove.Calls) {
Retains.blot(OrigRetain);
DeadInsts.push_back(OrigRetain);
DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
}
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- AI = ReleasesToMove.Calls.begin(),
- AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
- Instruction *OrigRelease = *AI;
+ for (Instruction *OrigRelease : ReleasesToMove.Calls) {
Releases.erase(OrigRelease);
DeadInsts.push_back(OrigRelease);
DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
@@ -2392,10 +2375,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
KnownSafeTD &= NewRetainRRI.KnownSafe;
MultipleOwners =
MultipleOwners || MultiOwnersSet.count(GetObjCArg(NewRetain));
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- LI = NewRetainRRI.Calls.begin(),
- LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
- Instruction *NewRetainRelease = *LI;
+ for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
DenseMap<Value *, RRInfo>::const_iterator Jt =
Releases.find(NewRetainRelease);
if (Jt == Releases.end())
@@ -2410,7 +2390,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (!NewRetainReleaseRRI.Calls.count(NewRetain))
return false;
- if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+ if (ReleasesToMove.Calls.insert(NewRetainRelease).second) {
// If we overflow when we compute the path count, don't remove/move
// anything.
@@ -2441,12 +2421,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// Collect the optimal insertion points.
if (!KnownSafe)
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
- RE = NewRetainReleaseRRI.ReverseInsertPts.end();
- RI != RE; ++RI) {
- Instruction *RIP = *RI;
- if (ReleasesToMove.ReverseInsertPts.insert(RIP)) {
+ for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) {
+ if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) {
// If we overflow when we compute the path count, don't
// remove/move anything.
const BBState &RIPBBState = BBStates[RIP->getParent()];
@@ -2476,10 +2452,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
const RRInfo &NewReleaseRRI = It->second;
KnownSafeBU &= NewReleaseRRI.KnownSafe;
CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- LI = NewReleaseRRI.Calls.begin(),
- LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
- Instruction *NewReleaseRetain = *LI;
+ for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
MapVector<Value *, RRInfo>::const_iterator Jt =
Retains.find(NewReleaseRetain);
if (Jt == Retains.end())
@@ -2494,7 +2467,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (!NewReleaseRetainRRI.Calls.count(NewRelease))
return false;
- if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+ if (RetainsToMove.Calls.insert(NewReleaseRetain).second) {
// If we overflow when we compute the path count, don't remove/move
// anything.
const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
@@ -2509,12 +2482,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// Collect the optimal insertion points.
if (!KnownSafe)
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
- RE = NewReleaseRetainRRI.ReverseInsertPts.end();
- RI != RE; ++RI) {
- Instruction *RIP = *RI;
- if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+ for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) {
+ if (RetainsToMove.ReverseInsertPts.insert(RIP).second) {
// If we overflow when we compute the path count, don't
// remove/move anything.
const BBState &RIPBBState = BBStates[RIP->getParent()];
@@ -2850,8 +2819,8 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
/// shared pointer argument. Note that Retain need not be in BB.
static bool
HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
- SmallPtrSet<Instruction *, 4> &DepInsts,
- SmallPtrSet<const BasicBlock *, 4> &Visited,
+ SmallPtrSetImpl<Instruction *> &DepInsts,
+ SmallPtrSetImpl<const BasicBlock *> &Visited,
ProvenanceAnalysis &PA) {
FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
DepInsts, Visited, PA);
@@ -2879,8 +2848,8 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
static CallInst *
FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
Instruction *Autorelease,
- SmallPtrSet<Instruction *, 4> &DepInsts,
- SmallPtrSet<const BasicBlock *, 4> &Visited,
+ SmallPtrSetImpl<Instruction *> &DepInsts,
+ SmallPtrSetImpl<const BasicBlock *> &Visited,
ProvenanceAnalysis &PA) {
FindDependencies(CanChangeRetainCount, Arg,
BB, Autorelease, DepInsts, Visited, PA);
@@ -2906,8 +2875,8 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
static CallInst *
FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
ReturnInst *Ret,
- SmallPtrSet<Instruction *, 4> &DepInsts,
- SmallPtrSet<const BasicBlock *, 4> &V,
+ SmallPtrSetImpl<Instruction *> &DepInsts,
+ SmallPtrSetImpl<const BasicBlock *> &V,
ProvenanceAnalysis &PA) {
FindDependencies(NeedsPositiveRetainCount, Arg,
BB, Ret, DepInsts, V, PA);
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 22be6fd..410abfc 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -62,7 +62,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
SmallPtrSet<const Value *, 4> UniqueSrc;
for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
const Value *PV1 = A->getIncomingValue(i);
- if (UniqueSrc.insert(PV1) && related(PV1, B))
+ if (UniqueSrc.insert(PV1).second && related(PV1, B))
return true;
}
@@ -94,7 +94,7 @@ static bool IsStoredObjCPointer(const Value *P) {
if (isa<PtrToIntInst>(P))
// Assume the worst.
return true;
- if (Visited.insert(Ur))
+ if (Visited.insert(Ur).second)
Worklist.push_back(Ur);
}
} while (!Worklist.empty());
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index a13fb9e..7820468 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -22,8 +22,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
-#define LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
#include "llvm/ADT/DenseMap.h"
@@ -77,4 +77,4 @@ public:
} // end namespace objcarc
} // end namespace llvm
-#endif // LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+#endif
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
new file mode 100644
index 0000000..d836632
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -0,0 +1,92 @@
+//===- ProvenanceAnalysisEvaluator.cpp - ObjC ARC Optimization ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProvenanceAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+namespace {
+class PAEval : public FunctionPass {
+
+public:
+ static char ID;
+ PAEval();
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+}
+
+char PAEval::ID = 0;
+PAEval::PAEval() : FunctionPass(ID) {}
+
+void PAEval::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+}
+
+static StringRef getName(Value *V) {
+ StringRef Name = V->getName();
+ if (Name.startswith("\1"))
+ return Name.substr(1);
+ return Name;
+}
+
+static void insertIfNamed(SetVector<Value *> &Values, Value *V) {
+ if (!V->hasName())
+ return;
+ Values.insert(V);
+}
+
+bool PAEval::runOnFunction(Function &F) {
+ SetVector<Value *> Values;
+
+ for (auto &Arg : F.args())
+ insertIfNamed(Values, &Arg);
+
+ for (auto I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ insertIfNamed(Values, &*I);
+
+ for (auto &Op : I->operands())
+ insertIfNamed(Values, Op);
+ }
+
+ ProvenanceAnalysis PA;
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ for (Value *V1 : Values) {
+ StringRef NameV1 = getName(V1);
+ for (Value *V2 : Values) {
+ StringRef NameV2 = getName(V2);
+ if (NameV1 >= NameV2)
+ continue;
+ errs() << NameV1 << " and " << NameV2;
+ if (PA.related(V1, V2))
+ errs() << " are related.\n";
+ else
+ errs() << " are not related.\n";
+ }
+ }
+
+ return false;
+}
+
+FunctionPass *llvm::createPAEvalPass() { return new PAEval(); }
+
+INITIALIZE_PASS_BEGIN(PAEval, "pa-eval",
+ "Evaluate ProvenanceAnalysis on all pairs", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(PAEval, "pa-eval",
+ "Evaluate ProvenanceAnalysis on all pairs", false, true)
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 1a3a4aa..3d91984 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -73,7 +73,7 @@ bool ADCE::runOnFunction(Function& F) {
for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
OI != OE; ++OI)
if (Instruction* Inst = dyn_cast<Instruction>(OI))
- if (alive.insert(Inst))
+ if (alive.insert(Inst).second)
worklist.push_back(Inst);
}
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
new file mode 100644
index 0000000..06c3dfd
--- /dev/null
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -0,0 +1,428 @@
+//===----------------------- AlignmentFromAssumptions.cpp -----------------===//
+// Set Load/Store Alignments From Assumptions
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a ScalarEvolution-based transformation to set
+// the alignments of load, stores and memory intrinsics based on the truth
+// expressions of assume intrinsics. The primary motivation is to handle
+// complex alignment assumptions that apply to vector loads and stores that
+// appear after vectorization and unrolling.
+//
+//===----------------------------------------------------------------------===//
+
+#define AA_NAME "alignment-from-assumptions"
+#define DEBUG_TYPE AA_NAME
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumLoadAlignChanged,
+ "Number of loads changed by alignment assumptions");
+STATISTIC(NumStoreAlignChanged,
+ "Number of stores changed by alignment assumptions");
+STATISTIC(NumMemIntAlignChanged,
+ "Number of memory intrinsics changed by alignment assumptions");
+
+namespace {
+struct AlignmentFromAssumptions : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ AlignmentFromAssumptions() : FunctionPass(ID) {
+ initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AssumptionTracker>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+
+ AU.setPreservesCFG();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<ScalarEvolution>();
+ }
+
+ // For memory transfers, we need a common alignment for both the source and
+ // destination. If we have a new alignment for only one operand of a transfer
+ // instruction, save it in these maps. If we reach the other operand through
+ // another assumption later, then we may change the alignment at that point.
+ DenseMap<MemTransferInst *, unsigned> NewDestAlignments, NewSrcAlignments;
+
+ AssumptionTracker *AT;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ const DataLayout *DL;
+
+ bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
+ const SCEV *&OffSCEV);
+ bool processAssumption(CallInst *I);
+};
+}
+
+char AlignmentFromAssumptions::ID = 0;
+static const char aip_name[] = "Alignment from assumptions";
+INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
+ aip_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
+ aip_name, false, false)
+
+FunctionPass *llvm::createAlignmentFromAssumptionsPass() {
+ return new AlignmentFromAssumptions();
+}
+
+// Given an expression for the (constant) alignment, AlignSCEV, and an
+// expression for the displacement between a pointer and the aligned address,
+// DiffSCEV, compute the alignment of the displaced pointer if it can be reduced
+// to a constant. Using SCEV to compute alignment handles the case where
+// DiffSCEV is a recurrence with constant start such that the aligned offset
+// is constant. e.g. {16,+,32} % 32 -> 16.
+static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
+ const SCEV *AlignSCEV,
+ ScalarEvolution *SE) {
+ // DiffUnits = Diff % int64_t(Alignment)
+ const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV);
+ const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV);
+ const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV);
+
+ DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " <<
+ *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");
+
+ if (const SCEVConstant *ConstDUSCEV =
+ dyn_cast<SCEVConstant>(DiffUnitsSCEV)) {
+ int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue();
+
+ // If the displacement is an exact multiple of the alignment, then the
+ // displaced pointer has the same alignment as the aligned pointer, so
+ // return the alignment value.
+ if (!DiffUnits)
+ return (unsigned)
+ cast<SCEVConstant>(AlignSCEV)->getValue()->getSExtValue();
+
+ // If the displacement is not an exact multiple, but the remainder is a
+ // constant, then return this remainder (but only if it is a power of 2).
+ uint64_t DiffUnitsAbs = abs64(DiffUnits);
+ if (isPowerOf2_64(DiffUnitsAbs))
+ return (unsigned) DiffUnitsAbs;
+ }
+
+ return 0;
+}
+
+// There is an address given by an offset OffSCEV from AASCEV which has an
+// alignment AlignSCEV. Use that information, if possible, to compute a new
+// alignment for Ptr.
+static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
+ const SCEV *OffSCEV, Value *Ptr,
+ ScalarEvolution *SE) {
+ const SCEV *PtrSCEV = SE->getSCEV(Ptr);
+ const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
+
+ // On 32-bit platforms, DiffSCEV might now have type i32 -- we've always
+ // sign-extended OffSCEV to i64, so make sure they agree again.
+ DiffSCEV = SE->getNoopOrSignExtend(DiffSCEV, OffSCEV->getType());
+
+ // What we really want to know is the overall offset to the aligned
+ // address. This address is displaced by the provided offset.
+ DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV);
+
+ DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to " <<
+ *AlignSCEV << " and offset " << *OffSCEV <<
+ " using diff " << *DiffSCEV << "\n");
+
+ unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE);
+ DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n");
+
+ if (NewAlignment) {
+ return NewAlignment;
+ } else if (const SCEVAddRecExpr *DiffARSCEV =
+ dyn_cast<SCEVAddRecExpr>(DiffSCEV)) {
+ // The relative offset to the alignment assumption did not yield a constant,
+ // but we should try harder: if we assume that a is 32-byte aligned, then in
+ // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are
+ // 32-byte aligned, but instead alternate between 32 and 16-byte alignment.
+ // As a result, the new alignment will not be a constant, but can still
+ // be improved over the default (of 4) to 16.
+
+ const SCEV *DiffStartSCEV = DiffARSCEV->getStart();
+ const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE);
+
+ DEBUG(dbgs() << "\ttrying start/inc alignment using start " <<
+ *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n");
+
+ // Now compute the new alignment using the displacement to the value in the
+ // first iteration, and also the alignment using the per-iteration delta.
+ // If these are the same, then use that answer. Otherwise, use the smaller
+ // one, but only if it divides the larger one.
+ NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE);
+ unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE);
+
+ DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n");
+ DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n");
+
+ if (!NewAlignment || !NewIncAlignment) {
+ return 0;
+ } else if (NewAlignment > NewIncAlignment) {
+ if (NewAlignment % NewIncAlignment == 0) {
+ DEBUG(dbgs() << "\tnew start/inc alignment: " <<
+ NewIncAlignment << "\n");
+ return NewIncAlignment;
+ }
+ } else if (NewIncAlignment > NewAlignment) {
+ if (NewIncAlignment % NewAlignment == 0) {
+ DEBUG(dbgs() << "\tnew start/inc alignment: " <<
+ NewAlignment << "\n");
+ return NewAlignment;
+ }
+ } else if (NewIncAlignment == NewAlignment) {
+ DEBUG(dbgs() << "\tnew start/inc alignment: " <<
+ NewAlignment << "\n");
+ return NewAlignment;
+ }
+ }
+
+ return 0;
+}
+
+bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
+ Value *&AAPtr, const SCEV *&AlignSCEV,
+ const SCEV *&OffSCEV) {
+ // An alignment assume must be a statement about the least-significant
+ // bits of the pointer being zero, possibly with some offset.
+ ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
+ if (!ICI)
+ return false;
+
+ // This must be an expression of the form: x & m == 0.
+ if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
+ return false;
+
+ // Swap things around so that the RHS is 0.
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
+ const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
+ if (CmpLHSSCEV->isZero())
+ std::swap(CmpLHS, CmpRHS);
+ else if (!CmpRHSSCEV->isZero())
+ return false;
+
+ BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
+ if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
+ return false;
+
+ // Swap things around so that the right operand of the and is a constant
+ // (the mask); we cannot deal with variable masks.
+ Value *AndLHS = CmpBO->getOperand(0);
+ Value *AndRHS = CmpBO->getOperand(1);
+ const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
+ const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
+ if (isa<SCEVConstant>(AndLHSSCEV)) {
+ std::swap(AndLHS, AndRHS);
+ std::swap(AndLHSSCEV, AndRHSSCEV);
+ }
+
+ const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
+ if (!MaskSCEV)
+ return false;
+
+ // The mask must have some trailing ones (otherwise the condition is
+ // trivial and tells us nothing about the alignment of the left operand).
+ unsigned TrailingOnes =
+ MaskSCEV->getValue()->getValue().countTrailingOnes();
+ if (!TrailingOnes)
+ return false;
+
+ // Cap the alignment at the maximum with which LLVM can deal (and make sure
+ // we don't overflow the shift).
+ uint64_t Alignment;
+ TrailingOnes = std::min(TrailingOnes,
+ unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+ Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
+
+ Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
+ AlignSCEV = SE->getConstant(Int64Ty, Alignment);
+
+ // The LHS might be a ptrtoint instruction, or it might be the pointer
+ // with an offset.
+ AAPtr = nullptr;
+ OffSCEV = nullptr;
+ if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
+ AAPtr = PToI->getPointerOperand();
+ OffSCEV = SE->getConstant(Int64Ty, 0);
+ } else if (const SCEVAddExpr* AndLHSAddSCEV =
+ dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
+ // Try to find the ptrtoint; subtract it and the rest is the offset.
+ for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
+ JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
+ if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
+ if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
+ AAPtr = PToI->getPointerOperand();
+ OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
+ break;
+ }
+ }
+
+ if (!AAPtr)
+ return false;
+
+ // Sign extend the offset to 64 bits (so that it is like all of the other
+ // expressions).
+ unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
+ if (OffSCEVBits < 64)
+ OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
+ else if (OffSCEVBits > 64)
+ return false;
+
+ AAPtr = AAPtr->stripPointerCasts();
+ return true;
+}
+
+bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
+ Value *AAPtr;
+ const SCEV *AlignSCEV, *OffSCEV;
+ if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
+ return false;
+
+ const SCEV *AASCEV = SE->getSCEV(AAPtr);
+
+ // Apply the assumption to all other users of the specified pointer.
+ SmallPtrSet<Instruction *, 32> Visited;
+ SmallVector<Instruction*, 16> WorkList;
+ for (User *J : AAPtr->users()) {
+ if (J == ACall)
+ continue;
+
+ if (Instruction *K = dyn_cast<Instruction>(J))
+ if (isValidAssumeForContext(ACall, K, DL, DT))
+ WorkList.push_back(K);
+ }
+
+ while (!WorkList.empty()) {
+ Instruction *J = WorkList.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
+ unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+ LI->getPointerOperand(), SE);
+
+ if (NewAlignment > LI->getAlignment()) {
+ LI->setAlignment(NewAlignment);
+ ++NumLoadAlignChanged;
+ }
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
+ unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+ SI->getPointerOperand(), SE);
+
+ if (NewAlignment > SI->getAlignment()) {
+ SI->setAlignment(NewAlignment);
+ ++NumStoreAlignChanged;
+ }
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
+ unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+ MI->getDest(), SE);
+
+ // For memory transfers, we need a common alignment for both the
+ // source and destination. If we have a new alignment for this
+ // instruction, but only for one operand, save it. If we reach the
+ // other operand through another assumption later, then we may
+ // change the alignment at that point.
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
+ MTI->getSource(), SE);
+
+ DenseMap<MemTransferInst *, unsigned>::iterator DI =
+ NewDestAlignments.find(MTI);
+ unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ?
+ 0 : DI->second;
+
+ DenseMap<MemTransferInst *, unsigned>::iterator SI =
+ NewSrcAlignments.find(MTI);
+ unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ?
+ 0 : SI->second;
+
+ DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " <<
+ AltDestAlignment << " " << NewSrcAlignment <<
+ " " << AltSrcAlignment << "\n");
+
+ // Of these four alignments, pick the largest possible...
+ unsigned NewAlignment = 0;
+ if (NewDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
+ NewAlignment = std::max(NewAlignment, NewDestAlignment);
+ if (AltDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
+ NewAlignment = std::max(NewAlignment, AltDestAlignment);
+ if (NewSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
+ NewAlignment = std::max(NewAlignment, NewSrcAlignment);
+ if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
+ NewAlignment = std::max(NewAlignment, AltSrcAlignment);
+
+ if (NewAlignment > MI->getAlignment()) {
+ MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
+ MI->getParent()->getContext()), NewAlignment));
+ ++NumMemIntAlignChanged;
+ }
+
+ NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment));
+ NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment));
+ } else if (NewDestAlignment > MI->getAlignment()) {
+ assert((!isa<MemIntrinsic>(MI) || isa<MemSetInst>(MI)) &&
+ "Unknown memory intrinsic");
+
+ MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
+ MI->getParent()->getContext()), NewDestAlignment));
+ ++NumMemIntAlignChanged;
+ }
+ }
+
+ // Now that we've updated that use of the pointer, look for other uses of
+ // the pointer to update.
+ Visited.insert(J);
+ for (User *UJ : J->users()) {
+ Instruction *K = cast<Instruction>(UJ);
+ if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT))
+ WorkList.push_back(K);
+ }
+ }
+
+ return true;
+}
+
+bool AlignmentFromAssumptions::runOnFunction(Function &F) {
+ bool Changed = false;
+ AT = &getAnalysis<AssumptionTracker>();
+ SE = &getAnalysis<ScalarEvolution>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+
+ NewDestAlignments.clear();
+ NewSrcAlignments.clear();
+
+ for (auto &I : AT->assumptions(&F))
+ Changed |= processAssumption(I);
+
+ return Changed;
+}
+
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index 5e22de6..9028b42 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -2,12 +2,14 @@ LOCAL_PATH:= $(call my-dir)
transforms_scalar_SRC_FILES := \
ADCE.cpp \
+ AlignmentFromAssumptions.cpp \
ConstantProp.cpp \
ConstantHoisting.cpp \
CorrelatedValuePropagation.cpp \
DCE.cpp \
DeadStoreElimination.cpp \
EarlyCSE.cpp \
+ FlattenCFGPass.cpp \
GVN.cpp \
IndVarSimplify.cpp \
JumpThreading.cpp \
@@ -23,6 +25,7 @@ transforms_scalar_SRC_FILES := \
LoopUnswitch.cpp \
LowerAtomic.cpp \
MemCpyOptimizer.cpp \
+ MergedLoadStoreMotion.cpp \
PartiallyInlineLibCalls.cpp \
Reassociate.cpp \
Reg2Mem.cpp \
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 2dcfa23..b3ee11e 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_library(LLVMScalarOpts
ADCE.cpp
+ AlignmentFromAssumptions.cpp
ConstantHoisting.cpp
ConstantProp.cpp
CorrelatedValuePropagation.cpp
@@ -22,6 +23,7 @@ add_llvm_library(LLVMScalarOpts
LoopUnswitch.cpp
LowerAtomic.cpp
MemCpyOptimizer.cpp
+ MergedLoadStoreMotion.cpp
PartiallyInlineLibCalls.cpp
Reassociate.cpp
Reg2Mem.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 763d02b..27c177a 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -91,7 +91,7 @@ struct RebasedConstantInfo {
Constant *Offset;
RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset)
- : Uses(Uses), Offset(Offset) { }
+ : Uses(std::move(Uses)), Offset(Offset) { }
};
/// \brief A base constant and all its rebased constants.
@@ -395,7 +395,7 @@ void ConstantHoisting::findAndMakeBaseConstant(ConstCandVecType::iterator S,
ConstInfo.RebasedConstants.push_back(
RebasedConstantInfo(std::move(ConstCand->Uses), Offset));
}
- ConstantVec.push_back(ConstInfo);
+ ConstantVec.push_back(std::move(ConstInfo));
}
/// \brief Finds and combines constant candidates that can be easily
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0829462..5a3b5cf 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -73,7 +73,7 @@ bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
if (S->getType()->isVectorTy()) return false;
if (isa<Constant>(S->getOperand(0))) return false;
- Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+ Constant *C = LVI->getConstant(S->getOperand(0), S->getParent(), S);
if (!C) return false;
ConstantInt *CI = dyn_cast<ConstantInt>(C);
@@ -100,7 +100,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Value *Incoming = P->getIncomingValue(i);
if (isa<Constant>(Incoming)) continue;
- Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB);
+ Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);
// Look if the incoming value is a select with a constant but LVI tells us
// that the incoming value can never be that constant. In that case replace
@@ -114,7 +114,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
if (!C) continue;
if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
- P->getIncomingBlock(i), BB) !=
+ P->getIncomingBlock(i), BB, P) !=
LazyValueInfo::False)
continue;
@@ -126,6 +126,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Changed = true;
}
+ // FIXME: Provide DL, TLI, DT, AT to SimplifyInstruction.
if (Value *V = SimplifyInstruction(P)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
@@ -147,7 +148,7 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
if (isa<Constant>(Pointer)) return false;
- Constant *C = LVI->getConstant(Pointer, I->getParent());
+ Constant *C = LVI->getConstant(Pointer, I->getParent(), I);
if (!C) return false;
++NumMemAccess;
@@ -173,13 +174,15 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
if (PI == PE) return false;
LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI, C->getParent());
+ C->getOperand(0), Op1, *PI,
+ C->getParent(), C);
if (Result == LazyValueInfo::Unknown) return false;
++PI;
while (PI != PE) {
LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
- C->getOperand(0), Op1, *PI, C->getParent());
+ C->getOperand(0), Op1, *PI,
+ C->getParent(), C);
if (Res != Result) return false;
++PI;
}
@@ -229,7 +232,8 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
for (pred_iterator PI = PB; PI != PE; ++PI) {
// Is the switch condition equal to the case value?
LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
- Cond, Case, *PI, BB);
+ Cond, Case, *PI,
+ BB, SI);
// Give up on this case if nothing is known.
if (Value == LazyValueInfo::Unknown) {
State = LazyValueInfo::Unknown;
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 3af8ee7..a1ddc00 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -356,15 +356,8 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we don't know the sizes of either access, then we can't do a
// comparison.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize) {
- // If we have no DataLayout information around, then the size of the store
- // is inferrable from the pointee type. If they are the same type, then
- // we know that the store is safe.
- if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType())
- return OverwriteComplete;
-
+ Earlier.Size == AliasAnalysis::UnknownSize)
return OverwriteUnknown;
- }
// Make sure that the Later size is >= the Earlier size.
if (Later.Size >= Earlier.Size)
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 735f5c1..cd2ecad 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,17 +16,21 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <vector>
+#include <deque>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "early-cse"
@@ -266,6 +270,7 @@ public:
const DataLayout *DL;
const TargetLibraryInfo *TLI;
DominatorTree *DT;
+ AssumptionTracker *AT;
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
@@ -378,6 +383,7 @@ private:
// This transformation requires dominator postdominator info
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
AU.setPreservesCFG();
@@ -393,6 +399,7 @@ FunctionPass *llvm::createEarlyCSEPass() {
}
INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
@@ -431,9 +438,18 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Skip assume intrinsics, they don't really have side effects (although
+ // they're marked as such to ensure preservation of control dependencies),
+ // and this pass will not disturb any of the assumption's control
+ // dependencies.
+ if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
+ DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
+ continue;
+ }
+
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, DL, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(Inst, DL, TLI, DT, AT)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -530,7 +546,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
Changed = true;
++NumDSE;
LastStore = nullptr;
- continue;
+ // fallthrough - we can exploit information about this store
}
// Okay, we just invalidated anything we knew about loaded values. Try
@@ -556,12 +572,17 @@ bool EarlyCSE::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
- std::vector<StackNode *> nodesToProcess;
+ // Note, deque is being used here because there is significant performance gains
+ // over vector when the container becomes very large due to the specific access
+ // patterns. For more information see the mailing list discussion on this:
+ // http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
+ std::deque<StackNode *> nodesToProcess;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AT = &getAnalysis<AssumptionTracker>();
// Tables that the pass uses when walking the domtree.
ScopedHTType AVTable;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 106eba0..7dba4e2 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -45,6 +46,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <vector>
using namespace llvm;
@@ -590,6 +592,7 @@ namespace {
DominatorTree *DT;
const DataLayout *DL;
const TargetLibraryInfo *TLI;
+ AssumptionTracker *AT;
SetVector<BasicBlock *> DeadBlocks;
ValueTable VN;
@@ -679,6 +682,7 @@ namespace {
// This transformation requires dominator postdominator info
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
@@ -727,6 +731,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
}
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
@@ -1616,7 +1621,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
- PHITransAddr Address(LI->getPointerOperand(), DL);
+ PHITransAddr Address(LI->getPointerOperand(), DL, AT);
Value *LoadPtr = nullptr;
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
@@ -1669,9 +1674,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
LI->getAlignment(),
UnavailablePred->getTerminator());
- // Transfer the old load's TBAA tag to the new load.
- if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
- NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ // Transfer the old load's AA tags to the new load.
+ AAMDNodes Tags;
+ LI->getAAMetadata(Tags);
+ if (Tags)
+ NewLoad->setAAMetadata(Tags);
// Transfer DebugLoc.
NewLoad->setDebugLoc(LI->getDebugLoc());
@@ -1774,36 +1781,24 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
ReplOp->setHasNoUnsignedWrap(false);
}
if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
- SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
- ReplInst->getAllMetadataOtherThanDebugLoc(Metadata);
- for (int i = 0, n = Metadata.size(); i < n; ++i) {
- unsigned Kind = Metadata[i].first;
- MDNode *IMD = I->getMetadata(Kind);
- MDNode *ReplMD = Metadata[i].second;
- switch(Kind) {
- default:
- ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata
- break;
- case LLVMContext::MD_dbg:
- llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
- case LLVMContext::MD_tbaa:
- ReplInst->setMetadata(Kind, MDNode::getMostGenericTBAA(IMD, ReplMD));
- break;
- case LLVMContext::MD_range:
- ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD));
- break;
- case LLVMContext::MD_prof:
- llvm_unreachable("MD_prof in a non-terminator instruction");
- break;
- case LLVMContext::MD_fpmath:
- ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
- break;
- case LLVMContext::MD_invariant_load:
- // Only set the !invariant.load if it is present in both instructions.
- ReplInst->setMetadata(Kind, IMD);
- break;
- }
- }
+ // FIXME: If both the original and replacement value are part of the
+ // same control-flow region (meaning that the execution of one
+ // guarentees the executation of the other), then we can combine the
+ // noalias scopes here and do better than the general conservative
+ // answer used in combineMetadata().
+
+ // In general, GVN unifies expressions over different control-flow
+ // regions, and so we need a conservative combination of the noalias
+ // scopes.
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ };
+ combineMetadata(ReplInst, I, KnownIDs);
}
}
@@ -2219,7 +2214,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -2339,6 +2334,7 @@ bool GVN::runOnFunction(Function& F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
+ AT = &getAnalysis<AssumptionTracker>();
TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -2653,8 +2649,8 @@ bool GVN::iterateOnFunction(Function &F) {
//
std::vector<BasicBlock *> BBVect;
BBVect.reserve(256);
- for (DomTreeNode *x : depth_first(DT->getRootNode()))
- BBVect.push_back(x->getBlock());
+ for (DomTreeNode *X : depth_first(DT->getRootNode()))
+ BBVect.push_back(X->getBlock());
for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
I != E; I++)
@@ -2802,7 +2798,7 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
return true;
}
-// performPRE() will trigger assert if it come across an instruciton without
+// performPRE() will trigger assert if it comes across an instruction without
// associated val-num. As it normally has far more live instructions than dead
// instructions, it makes more sense just to "fabricate" a val-number for the
// dead code than checking if instruction involved is dead or not.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index e83a5c4..c01f57f 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -69,11 +70,12 @@ static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
namespace {
class IndVarSimplify : public LoopPass {
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- const DataLayout *DL;
- TargetLibraryInfo *TLI;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DominatorTree *DT;
+ const DataLayout *DL;
+ TargetLibraryInfo *TLI;
+ const TargetTransformInfo *TTI;
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
@@ -650,7 +652,7 @@ namespace {
struct WideIVInfo {
PHINode *NarrowIV;
Type *WidestNativeType; // Widest integer type created [sz]ext
- bool IsSigned; // Was an sext user seen before a zext?
+ bool IsSigned; // Was a sext user seen before a zext?
WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
IsSigned(false) {}
@@ -661,7 +663,7 @@ namespace {
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
- const DataLayout *DL) {
+ const DataLayout *DL, const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
return;
@@ -671,6 +673,19 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
if (DL && !DL->isLegalInteger(Width))
return;
+ // Cast is either an sext or zext up to this point.
+ // We should not widen an indvar if arithmetics on the wider indvar are more
+ // expensive than those on the narrower indvar. We check only the cost of ADD
+ // because at least an ADD is required to increment the induction variable. We
+ // could compute more comprehensively the cost of all instructions on the
+ // induction variable when necessary.
+ if (TTI &&
+ TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
+ TTI->getArithmeticInstrCost(Instruction::Add,
+ Cast->getOperand(0)->getType())) {
+ return;
+ }
+
if (!WI.WidestNativeType) {
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
WI.IsSigned = IsSigned;
@@ -757,8 +772,13 @@ protected:
const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+ const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const;
+
Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+ bool WidenLoopCompare(NarrowIVDefUse DU);
+
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
} // anonymous namespace
@@ -833,18 +853,35 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
}
}
+const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+ unsigned OpCode) const {
+ if (OpCode == Instruction::Add)
+ return SE->getAddExpr(LHS, RHS);
+ if (OpCode == Instruction::Sub)
+ return SE->getMinusSCEV(LHS, RHS);
+ if (OpCode == Instruction::Mul)
+ return SE->getMulExpr(LHS, RHS);
+
+ llvm_unreachable("Unsupported opcode.");
+}
+
/// No-wrap operations can transfer sign extension of their result to their
/// operands. Generate the SCEV value for the widened operation without
/// actually modifying the IR yet. If the expression after extending the
/// operands is an AddRec for this loop, return it.
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+
// Handle the common case of add<nsw/nuw>
- if (DU.NarrowUse->getOpcode() != Instruction::Add)
+ const unsigned OpCode = DU.NarrowUse->getOpcode();
+ // Only Add/Sub/Mul instructions supported yet.
+ if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
+ OpCode != Instruction::Mul)
return nullptr;
// One operand (NarrowDef) has already been extended to WideDef. Now determine
// if extending the other will lead to a recurrence.
- unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
+ const unsigned ExtendOperIdx =
+ DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
const SCEV *ExtendOperExpr = nullptr;
@@ -859,13 +896,20 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
else
return nullptr;
- // When creating this AddExpr, don't apply the current operations NSW or NUW
+ // When creating this SCEV expr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap
// behavior depends on. Non-control-equivalent instructions can be mapped to
// the same SCEV expression, and it would be incorrect to transfer NSW/NUW
// semantics to those operations.
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
- SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
+ const SCEV *lhs = SE->getSCEV(DU.WideDef);
+ const SCEV *rhs = ExtendOperExpr;
+
+ // Let's swap operands to the initial order for the case of non-commutative
+ // operations, like SUB. See PR21014.
+ if (ExtendOperIdx == 0)
+ std::swap(lhs, rhs);
+ const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
if (!AddRec || AddRec->getLoop() != L)
return nullptr;
@@ -908,6 +952,35 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
}
+/// If the narrow use is a compare instruction, then widen the compare
+// (and possibly the other operand). The extend operation is hoisted into the
+// loop preheader as far as possible.
+bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
+ if (!Cmp)
+ return false;
+
+ // Sign of IV user and compare must match.
+ if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
+ return false;
+
+ Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
+ unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
+ unsigned IVWidth = SE->getTypeSizeInBits(WideType);
+ assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
+
+ // Widen the compare instruction.
+ IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
+
+ // Widen the other operand of the compare, if necessary.
+ if (CastWidth < IVWidth) {
+ Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
+ DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
+ }
+ return true;
+}
+
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
@@ -975,10 +1048,15 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Does this user itself evaluate to a recurrence after widening?
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+ if (!WideAddRec)
+ WideAddRec = GetExtendedOperandRecurrence(DU);
+
if (!WideAddRec) {
- WideAddRec = GetExtendedOperandRecurrence(DU);
- }
- if (!WideAddRec) {
+ // If use is a loop condition, try to promote the condition instead of
+ // truncating the IV first.
+ if (WidenLoopCompare(DU))
+ return nullptr;
+
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
@@ -1024,7 +1102,7 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
Instruction *NarrowUser = cast<Instruction>(U);
// Handle data flow merges and bizarre phi cycles.
- if (!Widened.insert(NarrowUser))
+ if (!Widened.insert(NarrowUser).second)
continue;
NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
@@ -1124,14 +1202,16 @@ namespace {
class IndVarSimplifyVisitor : public IVVisitor {
ScalarEvolution *SE;
const DataLayout *DL;
+ const TargetTransformInfo *TTI;
PHINode *IVPhi;
public:
WideIVInfo WI;
IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
- const DataLayout *DL, const DominatorTree *DTree):
- SE(SCEV), DL(DL), IVPhi(IV) {
+ const DataLayout *DL, const TargetTransformInfo *TTI,
+ const DominatorTree *DTree)
+ : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) {
DT = DTree;
WI.NarrowIV = IVPhi;
if (ReduceLiveIVs)
@@ -1139,7 +1219,9 @@ namespace {
}
// Implement the interface used by simplifyUsersOfIV.
- void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, DL); }
+ void visitCast(CastInst *Cast) override {
+ visitIVCast(Cast, WI, SE, DL, TTI);
+ }
};
}
@@ -1173,7 +1255,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, DT);
+ IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT);
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
@@ -1200,9 +1282,9 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
/// BackedgeTakenInfo. If these expressions have not been reduced, then
/// expanding them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
- SmallPtrSet<const SCEV*, 8> &Processed,
+ SmallPtrSetImpl<const SCEV*> &Processed,
ScalarEvolution *SE) {
- if (!Processed.insert(S))
+ if (!Processed.insert(S).second)
return false;
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
@@ -1373,7 +1455,7 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
/// down to checking that all operands are constant and listing instructions
/// that may hide undef.
-static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
+static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
unsigned Depth) {
if (isa<Constant>(V))
return !isa<UndefValue>(V);
@@ -1393,7 +1475,7 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
// Optimistically handle other instructions.
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
- if (!Visited.insert(*OI))
+ if (!Visited.insert(*OI).second)
continue;
if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
return false;
@@ -1623,15 +1705,51 @@ LinearFunctionTestReplace(Loop *L,
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
if (L->getExitingBlock() == L->getLoopLatch()) {
- // Add one to the "backedge-taken" count to get the trip count.
- // This addition may overflow, which is valid as long as the comparison is
- // truncated to BackedgeTakenCount->getType().
- IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ llvm::Value *IncrementedIndvar =
+ IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ const auto *IncrementedIndvarSCEV =
+ cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar));
+ // It is unsafe to use the incremented indvar if it has a wrapping flag, we
+ // don't want to compare against a poison value. Check the SCEV that
+ // corresponds to the incremented indvar, the SCEVExpander will only insert
+ // flags in the IR if the SCEV originally had wrapping flags.
+ // FIXME: In theory, SCEV could drop flags even though they exist in IR.
+ // A more robust solution would involve getting a new expression for
+ // CmpIndVar by applying non-NSW/NUW AddExprs.
+ auto WrappingFlags =
+ ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW);
+ const SCEV *IVInit = IncrementedIndvarSCEV->getStart();
+ if (SE->getTypeSizeInBits(IVInit->getType()) >
+ SE->getTypeSizeInBits(IVCount->getType()))
+ IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+ unsigned BitWidth = SE->getTypeSizeInBits(IVCount->getType());
+ Type *WideTy = IntegerType::get(SE->getContext(), BitWidth + 1);
+ // Check if InitIV + BECount+1 requires sign/zero extension.
+ // If not, clear the corresponding flag from WrappingFlags because it is not
+ // necessary for those flags in the IncrementedIndvarSCEV expression.
+ if (SE->getSignExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
+ WideTy) ==
+ SE->getAddExpr(SE->getSignExtendExpr(IVInit, WideTy),
+ SE->getSignExtendExpr(BackedgeTakenCount, WideTy)))
+ WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNSW);
+ if (SE->getZeroExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
+ WideTy) ==
+ SE->getAddExpr(SE->getZeroExtendExpr(IVInit, WideTy),
+ SE->getZeroExtendExpr(BackedgeTakenCount, WideTy)))
+ WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNUW);
+ if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(),
+ WrappingFlags)) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // This addition may overflow, which is valid as long as the comparison is
+ // truncated to BackedgeTakenCount->getType().
+ IVCount =
+ SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
+ CmpIndVar = IncrementedIndvar;
+ }
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
@@ -1817,6 +1935,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
DeadInsts.clear();
Changed = false;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 6e50d33..60a4925 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -44,7 +45,7 @@ STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
static cl::opt<unsigned>
-Threshold("jump-threading-threshold",
+BBDuplicateThreshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
@@ -87,6 +88,8 @@ namespace {
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+ unsigned BBDupThreshold;
+
// RAII helper for updating the recursion stack.
struct RecursionSetRemover {
DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
@@ -102,7 +105,8 @@ namespace {
};
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(ID) {
+ JumpThreading(int T = -1) : FunctionPass(ID) {
+ BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
@@ -123,9 +127,11 @@ namespace {
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
PredValueInfo &Result,
- ConstantPreference Preference);
+ ConstantPreference Preference,
+ Instruction *CxtI = nullptr);
bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
- ConstantPreference Preference);
+ ConstantPreference Preference,
+ Instruction *CxtI = nullptr);
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
@@ -144,7 +150,7 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
// Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
+FunctionPass *llvm::createJumpThreadingPass(int Threshold) { return new JumpThreading(Threshold); }
/// runOnFunction - Top level algorithm.
///
@@ -339,7 +345,8 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
///
bool JumpThreading::
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
- ConstantPreference Preference) {
+ ConstantPreference Preference,
+ Instruction *CxtI) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
@@ -381,7 +388,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
BasicBlock *P = *PI;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
if (Constant *KC = getKnownConstant(PredCst, Preference))
Result.push_back(std::make_pair(KC, P));
}
@@ -397,7 +404,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
} else {
Constant *CI = LVI->getConstantOnEdge(InVal,
- PN->getIncomingBlock(i), BB);
+ PN->getIncomingBlock(i),
+ BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference))
Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
}
@@ -416,9 +424,9 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
if (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And) {
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
- WantInteger);
+ WantInteger, CxtI);
ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
- WantInteger);
+ WantInteger, CxtI);
if (LHSVals.empty() && RHSVals.empty())
return false;
@@ -459,7 +467,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
- WantInteger);
+ WantInteger, CxtI);
if (Result.empty())
return false;
@@ -477,7 +485,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
PredValueInfoTy LHSVals;
ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
- WantInteger);
+ WantInteger, CxtI);
// Try to use constant folding to simplify the binary operator.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
@@ -511,7 +519,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
LazyValueInfo::Tristate
ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
- cast<Constant>(RHS), PredBB, BB);
+ cast<Constant>(RHS), PredBB, BB,
+ CxtI ? CxtI : Cmp);
if (ResT == LazyValueInfo::Unknown)
continue;
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
@@ -524,7 +533,6 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
return !Result.empty();
}
-
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
@@ -538,7 +546,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate Res =
LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, P, BB);
+ RHSCst, P, BB, CxtI ? CxtI : Cmp);
if (Res == LazyValueInfo::Unknown)
continue;
@@ -554,7 +562,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
PredValueInfoTy LHSVals;
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
- WantInteger);
+ WantInteger, CxtI);
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
Constant *V = LHSVals[i].first;
@@ -577,7 +585,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
PredValueInfoTy Conds;
if ((TrueVal || FalseVal) &&
ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
- WantInteger)) {
+ WantInteger, CxtI)) {
for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
Constant *Cond = Conds[i].first;
@@ -604,7 +612,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
}
// If all else fails, see if LVI can figure out a constant value for us.
- Constant *CI = LVI->getConstant(V, BB);
+ Constant *CI = LVI->getConstant(V, BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(KC, *PI));
@@ -669,14 +677,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
- // Remember if SinglePred was the entry block of the function. If so, we
- // will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
}
@@ -749,7 +752,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// All the rest of our checks depend on the condition being an instruction.
if (!CondInst) {
// FIXME: Unify this with code below.
- if (ProcessThreadableEdges(Condition, BB, Preference))
+ if (ProcessThreadableEdges(Condition, BB, Preference, Terminator))
return true;
return false;
}
@@ -771,13 +774,14 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// FIXME: We could handle mixed true/false by duplicating code.
LazyValueInfo::Tristate Baseline =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
- CondConst, *PI, BB);
+ CondConst, *PI, BB, CondCmp);
if (Baseline != LazyValueInfo::Unknown) {
// Check that all remaining incoming values match the first one.
while (++PI != PE) {
LazyValueInfo::Tristate Ret =
LVI->getPredicateOnEdge(CondCmp->getPredicate(),
- CondCmp->getOperand(0), CondConst, *PI, BB);
+ CondCmp->getOperand(0), CondConst, *PI, BB,
+ CondCmp);
if (Ret != Baseline) break;
}
@@ -792,6 +796,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
+ } else if (CondBr && CondConst && CondBr->isConditional()) {
+ // There might be an invairant in the same block with the conditional
+ // that can determine the predicate.
+
+ LazyValueInfo::Tristate Ret =
+ LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, CondCmp);
+ if (Ret != LazyValueInfo::Unknown) {
+ unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
+ CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
}
if (CondBr && CondConst && TryToUnfoldSelect(CondCmp, BB))
@@ -819,7 +838,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
//
- if (ProcessThreadableEdges(CondInst, BB, Preference))
+ if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator))
return true;
// If this is an otherwise-unfoldable branch on a phi node in the current
@@ -882,6 +901,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+ if (AvailableVal->getType() != LI->getType())
+ AvailableVal = CastInst::Create(CastInst::BitCast, AvailableVal,
+ LI->getType(), "", LI);
LI->replaceAllUsesWith(AvailableVal);
LI->eraseFromParent();
return true;
@@ -893,9 +915,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (BBIt != LoadBB->begin())
return false;
- // If all of the loads and stores that feed the value have the same TBAA tag,
- // then we can propagate it onto any newly inserted loads.
- MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+ // If all of the loads and stores that feed the value have the same AA tags,
+ // then we can propagate them onto any newly inserted loads.
+ AAMDNodes AATags;
+ LI->getAAMetadata(AATags);
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -909,21 +932,21 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
BasicBlock *PredBB = *PI;
// If we already scanned this predecessor, skip it.
- if (!PredsScanned.insert(PredBB))
+ if (!PredsScanned.insert(PredBB).second)
continue;
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- MDNode *ThisTBAATag = nullptr;
+ AAMDNodes ThisAATags;
Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
- nullptr, &ThisTBAATag);
+ nullptr, &ThisAATags);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
- // If tbaa tags disagree or are not present, forget about them.
- if (TBAATag != ThisTBAATag) TBAATag = nullptr;
+ // If AA tags disagree or are not present, forget about them.
+ if (AATags != ThisAATags) AATags = AAMDNodes();
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
@@ -983,8 +1006,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
LI->getAlignment(),
UnavailablePred->getTerminator());
NewVal->setDebugLoc(LI->getDebugLoc());
- if (TBAATag)
- NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ if (AATags)
+ NewVal->setAAMetadata(AATags);
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
@@ -1011,7 +1034,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
- PN->addIncoming(I->second, I->first);
+ // If we have an available predecessor but it requires casting, insert the
+ // cast in the predecessor and use the cast. Note that we have to update the
+ // AvailablePreds vector as we go so that all of the PHI entries for this
+ // predecessor use the same bitcast.
+ Value *&PredV = I->second;
+ if (PredV->getType() != LI->getType())
+ PredV = CastInst::Create(CastInst::BitCast, PredV, LI->getType(), "",
+ P->getTerminator());
+
+ PN->addIncoming(PredV, I->first);
}
//cerr << "PRE: " << *LI << *PN << "\n";
@@ -1086,14 +1118,15 @@ FindMostPopularDest(BasicBlock *BB,
}
bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
- ConstantPreference Preference) {
+ ConstantPreference Preference,
+ Instruction *CxtI) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
return false;
PredValueInfoTy PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
return false;
assert(!PredValues.empty() &&
@@ -1118,7 +1151,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
BasicBlock *Pred = PredValues[i].second;
- if (!SeenPreds.insert(Pred))
+ if (!SeenPreds.insert(Pred).second)
continue; // Duplicate predecessor entry.
// If the predecessor ends with an indirect goto, we can't change its
@@ -1258,10 +1291,10 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
PredValueInfoTy XorOpValues;
bool isLHS = true;
if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
- WantInteger)) {
+ WantInteger, BO)) {
assert(XorOpValues.empty());
if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
- WantInteger))
+ WantInteger, BO))
return false;
isLHS = false;
}
@@ -1371,8 +1404,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
return false;
}
- unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold);
- if (JumpThreadCost > Threshold) {
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
+ if (JumpThreadCost > BBDupThreshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
return false;
@@ -1514,8 +1547,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
return false;
}
- unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold);
- if (DuplicationCost > Threshold) {
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
+ if (DuplicationCost > BBDupThreshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
return false;
@@ -1677,10 +1710,10 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
// cases will be threaded in any case.
LazyValueInfo::Tristate LHSFolds =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
- CondRHS, Pred, BB);
+ CondRHS, Pred, BB, CondCmp);
LazyValueInfo::Tristate RHSFolds =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
- CondRHS, Pred, BB);
+ CondRHS, Pred, BB, CondCmp);
if ((LHSFolds != LazyValueInfo::Unknown ||
RHSFolds != LazyValueInfo::Unknown) &&
LHSFolds != RHSFolds) {
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index abcceb2..5f00bb9 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -130,6 +130,9 @@ namespace {
/// set.
void deleteAnalysisValue(Value *V, Loop *L) override;
+ /// Simple Analysis hook. Delete loop L from alias set map.
+ void deleteAnalysisLoop(Loop *L) override;
+
/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in
/// reverse depth first order w.r.t the DominatorTree. This allows us to
@@ -180,9 +183,9 @@ namespace {
/// store into the memory location pointed to by V.
///
bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
- const MDNode *TBAAInfo) {
+ const AAMDNodes &AAInfo) {
// Check to see if any of the basic blocks in CurLoop invalidate *V.
- return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
+ return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
}
bool canSinkOrHoistInst(Instruction &I);
@@ -441,15 +444,18 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
// in the same alias set as something that ends up being modified.
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
- if (LI->getMetadata("invariant.load"))
+ if (LI->getMetadata(LLVMContext::MD_invariant_load))
return true;
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
Size = AA->getTypeStoreSize(LI->getType());
- return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
- LI->getMetadata(LLVMContext::MD_tbaa));
+
+ AAMDNodes AAInfo;
+ LI->getAAMetadata(AAInfo);
+
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo);
} else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
// Don't sink or hoist dbg info; it's legal, but not useful.
if (isa<DbgInfoIntrinsic>(I))
@@ -594,8 +600,13 @@ void LICM::sink(Instruction &I) {
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
while (!I.use_empty()) {
+ Instruction *User = I.user_back();
+ if (!DT->isReachableFromEntry(User->getParent())) {
+ User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
+ continue;
+ }
// The user must be a PHI node.
- PHINode *PN = cast<PHINode>(I.user_back());
+ PHINode *PN = cast<PHINode>(User);
BasicBlock *ExitBlock = PN->getParent();
assert(ExitBlockSet.count(ExitBlock) &&
@@ -682,7 +693,7 @@ bool LICM::isGuaranteedToExecute(Instruction &Inst) {
namespace {
class LoopPromoter : public LoadAndStorePromoter {
Value *SomePtr; // Designated pointer to store to.
- SmallPtrSet<Value*, 4> &PointerMustAliases;
+ SmallPtrSetImpl<Value*> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
SmallVectorImpl<Instruction*> &LoopInsertPts;
PredIteratorCache &PredCache;
@@ -690,7 +701,7 @@ namespace {
LoopInfo &LI;
DebugLoc DL;
int Alignment;
- MDNode *TBAATag;
+ AAMDNodes AATags;
Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -710,14 +721,14 @@ namespace {
public:
LoopPromoter(Value *SP, const SmallVectorImpl<Instruction *> &Insts,
- SSAUpdater &S, SmallPtrSet<Value *, 4> &PMA,
+ SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- MDNode *TBAATag)
+ const AAMDNodes &AATags)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
+ LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &) const override {
@@ -743,7 +754,7 @@ namespace {
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
- if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ if (AATags) NewSI->setAAMetadata(AATags);
}
}
@@ -798,11 +809,11 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
- MDNode *TBAATag = nullptr;
+ AAMDNodes AATags;
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
- // different sizes. While we are at it, collect alignment and TBAA info.
+ // different sizes. While we are at it, collect alignment and AA info.
for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
Value *ASIV = ASI->getValue();
PointerMustAliases.insert(ASIV);
@@ -855,13 +866,12 @@ void LICM::PromoteAliasSet(AliasSet &AS,
} else
return; // Not a load or store.
- // Merge the TBAA tags.
+ // Merge the AA tags.
if (LoopUses.empty()) {
- // On the first load/store, just take its TBAA tag.
- TBAATag = UI->getMetadata(LLVMContext::MD_tbaa);
- } else if (TBAATag) {
- TBAATag = MDNode::getMostGenericTBAA(TBAATag,
- UI->getMetadata(LLVMContext::MD_tbaa));
+ // On the first load/store, just take its AA tags.
+ UI->getAAMetadata(AATags);
+ } else if (AATags) {
+ UI->getAAMetadata(AATags, /* Merge = */ true);
}
LoopUses.push_back(UI);
@@ -896,7 +906,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, PIC, *CurAST, *LI, DL, Alignment, TBAATag);
+ InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
@@ -905,7 +915,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
Preheader->getTerminator());
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
- if (TBAATag) PreheaderLoad->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ if (AATags) PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
// Rewrite all the loads in the loop and remember all the definitions from
@@ -936,3 +946,13 @@ void LICM::deleteAnalysisValue(Value *V, Loop *L) {
AST->deleteValue(V);
}
+
+/// Simple Analysis hook. Delete value L from alias set map.
+void LICM::deleteAnalysisLoop(Loop *L) {
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+ if (!AST)
+ return;
+
+ delete AST;
+ LoopToAliasSetMap.erase(L);
+}
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
index 1f6df7d..2bb49a3 100644
--- a/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = Scalar
parent = Transforms
library_name = ScalarOpts
-required_libraries = Analysis Core IPA InstCombine Support Target TransformUtils
+required_libraries = Analysis Core InstCombine ProfileData Support Target TransformUtils
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 846aa70..11e4d76 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -15,6 +15,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Pass.h"
#include "llvm/IR/DataLayout.h"
@@ -51,13 +53,16 @@ struct LoadPOPPair {
class LoadCombine : public BasicBlockPass {
LLVMContext *C;
const DataLayout *DL;
+ AliasAnalysis *AA;
public:
LoadCombine()
: BasicBlockPass(ID),
- C(nullptr), DL(nullptr) {
+ C(nullptr), DL(nullptr), AA(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
+
+ using llvm::Pass::doInitialization;
bool doInitialization(Function &) override;
bool runOnBasicBlock(BasicBlock &BB) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -223,19 +228,23 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
if (skipOptnoneFunction(BB) || !DL)
return false;
+ AA = &getAnalysis<AliasAnalysis>();
+
IRBuilder<true, TargetFolder>
TheBuilder(BB.getContext(), TargetFolder(DL));
Builder = &TheBuilder;
DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
+ AliasSetTracker AST(*AA);
bool Combined = false;
unsigned Index = 0;
for (auto &I : BB) {
- if (I.mayWriteToMemory() || I.mayThrow()) {
+ if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) {
if (combineLoads(LoadMap))
Combined = true;
LoadMap.clear();
+ AST.clear();
continue;
}
LoadInst *LI = dyn_cast<LoadInst>(&I);
@@ -248,6 +257,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
if (!POP.Pointer)
continue;
LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++));
+ AST.add(LI);
}
if (combineLoads(LoadMap))
Combined = true;
@@ -256,6 +266,9 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
}
char LoadCombine::ID = 0;
@@ -264,5 +277,9 @@ BasicBlockPass *llvm::createLoadCombinePass() {
return new LoadCombine();
}
-INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false,
- false)
+INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
+ false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
+ false, false)
+
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 5ab686a..1d1f33a 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -239,9 +239,8 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo &loopInfo = getAnalysis<LoopInfo>();
SmallPtrSet<BasicBlock*, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
- for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
- E = blocks.end(); I != E; ++I)
- loopInfo.removeBlock(*I);
+ for (BasicBlock *BB : blocks)
+ loopInfo.removeBlock(BB);
// The last step is to inform the loop pass manager that we've
// eliminated this loop.
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index ab1a939..8fd7c8f 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -41,6 +42,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
@@ -54,6 +56,7 @@ namespace {
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -76,6 +79,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -116,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, DL, TLI, DT);
+ Value *V = SimplifyInstruction(I, DL, TLI, DT, AT);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
@@ -148,7 +152,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
++SI) {
BasicBlock *SuccBB = *SI;
- if (!Visited.insert(SuccBB))
+ if (!Visited.insert(SuccBB).second)
continue;
const Loop *SuccLoop = LI->getLoopFor(SuccBB);
@@ -161,7 +165,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
BasicBlock *ExitBB = SubLoopExitBlocks[i];
- if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+ if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB).second)
VisitStack.push_back(WorklistItem(ExitBB, false));
}
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index b6fbb16..8f12204 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -215,9 +215,7 @@ protected:
typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
// Add a new possible reduction.
- void addSLR(SimpleLoopReduction &SLR) {
- PossibleReds.push_back(SLR);
- }
+ void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
// Setup to track possible reductions corresponding to the provided
// rerolling scale. Only reductions with a number of non-PHI instructions
@@ -225,7 +223,8 @@ protected:
// are filled in:
// - A set of all possible instructions in eligible reductions.
// - A set of all PHIs in eligible reductions
- // - A set of all reduced values (last instructions) in eligible reductions.
+ // - A set of all reduced values (last instructions) in eligible
+ // reductions.
void restrictToScale(uint64_t Scale,
SmallInstructionSet &PossibleRedSet,
SmallInstructionSet &PossibleRedPHISet,
@@ -238,13 +237,12 @@ protected:
if (PossibleReds[i].size() % Scale == 0) {
PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
PossibleRedPHISet.insert(PossibleReds[i].getPHI());
-
+
PossibleRedSet.insert(PossibleReds[i].getPHI());
PossibleRedIdx[PossibleReds[i].getPHI()] = i;
- for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
- JE = PossibleReds[i].end(); J != JE; ++J) {
- PossibleRedSet.insert(*J);
- PossibleRedIdx[*J] = i;
+ for (Instruction *J : PossibleReds[i]) {
+ PossibleRedSet.insert(J);
+ PossibleRedIdx[J] = i;
}
}
}
@@ -487,7 +485,7 @@ void LoopReroll::collectInLoopUserSet(Loop *L,
if (PN->getIncomingBlock(U) == L->getHeader())
continue;
}
-
+
if (L->contains(User) && !Exclude.count(User)) {
Queue.push_back(User);
}
@@ -659,16 +657,15 @@ bool LoopReroll::ReductionTracker::validateSelected() {
RI != RIE; ++RI) {
int i = *RI;
int PrevIter = 0, BaseCount = 0, Count = 0;
- for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
- JE = PossibleReds[i].end(); J != JE; ++J) {
- // Note that all instructions in the chain must have been found because
- // all instructions in the function must have been assigned to some
- // iteration.
- int Iter = PossibleRedIter[*J];
+ for (Instruction *J : PossibleReds[i]) {
+ // Note that all instructions in the chain must have been found because
+ // all instructions in the function must have been assigned to some
+ // iteration.
+ int Iter = PossibleRedIter[J];
if (Iter != PrevIter && Iter != PrevIter + 1 &&
!PossibleReds[i].getReducedValue()->isAssociative()) {
DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
- *J << "\n");
+ J << "\n");
return false;
}
@@ -881,7 +878,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
// needed because otherwise isSafeToSpeculativelyExecute returns
// false on PHI nodes.
if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
- FutureSideEffects = true;
+ FutureSideEffects = true;
}
++J2;
@@ -952,9 +949,9 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
Value *Op2 = J2->getOperand(j);
- // If this is part of a reduction (and the operation is not
- // associatve), then we match all operands, but not those that are
- // part of the reduction.
+ // If this is part of a reduction (and the operation is not
+ // associatve), then we match all operands, but not those that are
+ // part of the reduction.
if (InReduction)
if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
if (Reductions.isPairInSame(J2, Op2I))
@@ -968,11 +965,11 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
Op2 = IV;
if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
- // If we've not already decided to swap the matched operands, and
- // we've not already matched our first operand (note that we could
- // have skipped matching the first operand because it is part of a
- // reduction above), and the instruction is commutative, then try
- // the swapped match.
+ // If we've not already decided to swap the matched operands, and
+ // we've not already matched our first operand (note that we could
+ // have skipped matching the first operand because it is part of a
+ // reduction above), and the instruction is commutative, then try
+ // the swapped match.
if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
J1->getOperand(!j) == Op2) {
Swapped = true;
@@ -1069,7 +1066,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
continue;
}
- ++J;
+ ++J;
}
// Insert the new induction variable.
@@ -1110,9 +1107,9 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
Preheader->getTerminator());
}
-
- Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
- "exitcond");
+
+ Value *Cond =
+ new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond");
BI->setCondition(Cond);
if (BI->getSuccessor(1) != Header)
@@ -1182,4 +1179,3 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
-
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 2ce5831..afd2eca 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
@@ -53,6 +54,7 @@ namespace {
// LCSSA form makes instruction renaming easier.
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
@@ -72,12 +74,14 @@ namespace {
unsigned MaxHeaderSize;
LoopInfo *LI;
const TargetTransformInfo *TTI;
+ AssumptionTracker *AT;
};
}
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -98,6 +102,7 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
+ AT = &getAnalysis<AssumptionTracker>();
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
@@ -184,13 +189,18 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
}
}
-/// Determine whether the instructions in this range my be safely and cheaply
+/// Determine whether the instructions in this range may be safely and cheaply
/// speculated. This is not an important enough situation to develop complex
/// heuristics. We handle a single arithmetic instruction along with any type
/// conversions.
static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
- BasicBlock::iterator End) {
+ BasicBlock::iterator End, Loop *L) {
bool seenIncrement = false;
+ bool MultiExitLoop = false;
+
+ if (!L->getExitingBlock())
+ MultiExitLoop = true;
+
for (BasicBlock::iterator I = Begin; I != End; ++I) {
if (!isSafeToSpeculativelyExecute(I))
@@ -214,11 +224,33 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
case Instruction::Xor:
case Instruction::Shl:
case Instruction::LShr:
- case Instruction::AShr:
+ case Instruction::AShr: {
+ Value *IVOpnd = nullptr;
+ if (isa<ConstantInt>(I->getOperand(0)))
+ IVOpnd = I->getOperand(1);
+
+ if (isa<ConstantInt>(I->getOperand(1))) {
+ if (IVOpnd)
+ return false;
+
+ IVOpnd = I->getOperand(0);
+ }
+
+ // If increment operand is used outside of the loop, this speculation
+ // could cause extra live range interference.
+ if (MultiExitLoop && IVOpnd) {
+ for (User *UseI : IVOpnd->users()) {
+ auto *UserInst = cast<Instruction>(UseI);
+ if (!L->contains(UserInst))
+ return false;
+ }
+ }
+
if (seenIncrement)
return false;
seenIncrement = true;
break;
+ }
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -232,7 +264,7 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// Fold the loop tail into the loop exit by speculating the loop tail
/// instructions. Typically, this is a single post-increment. In the case of a
/// simple 2-block loop, hoisting the increment can be much better than
-/// duplicating the entire loop header. In the cast of loops with early exits,
+/// duplicating the entire loop header. In the case of loops with early exits,
/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
/// canonical form so downstream passes can handle it.
///
@@ -254,7 +286,7 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
if (!BI)
return false;
- if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
+ if (!shouldSpeculateInstrs(Latch->begin(), Jmp, L))
return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
@@ -323,8 +355,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Check size of original header and reject loop if it is very big or we can't
// duplicate blocks inside it.
{
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader, *TTI);
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
<< " instructions: "; L->dump());
@@ -406,6 +441,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
+ // FIXME: Provide DL, TLI, DT, AT to SimplifyInstruction.
Value *V = SimplifyInstruction(C);
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 914b56a..7b60373 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -744,7 +744,7 @@ static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
/// obvious multiple of the UDivExpr.
static bool isHighCostExpansion(const SCEV *S,
- SmallPtrSet<const SCEV*, 8> &Processed,
+ SmallPtrSetImpl<const SCEV*> &Processed,
ScalarEvolution &SE) {
// Zero/One operand expressions
switch (S->getSCEVType()) {
@@ -762,7 +762,7 @@ static bool isHighCostExpansion(const SCEV *S,
Processed, SE);
}
- if (!Processed.insert(S))
+ if (!Processed.insert(S).second)
return false;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
@@ -892,34 +892,34 @@ public:
void RateFormula(const TargetTransformInfo &TTI,
const Formula &F,
- SmallPtrSet<const SCEV *, 16> &Regs,
+ SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
- SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr);
+ SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
private:
void RateRegister(const SCEV *Reg,
- SmallPtrSet<const SCEV *, 16> &Regs,
+ SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT);
void RatePrimaryRegister(const SCEV *Reg,
- SmallPtrSet<const SCEV *, 16> &Regs,
+ SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSet<const SCEV *, 16> *LoserRegs);
+ SmallPtrSetImpl<const SCEV *> *LoserRegs);
};
}
/// RateRegister - Tally up interesting quantities from the given register.
void Cost::RateRegister(const SCEV *Reg,
- SmallPtrSet<const SCEV *, 16> &Regs,
+ SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
@@ -967,15 +967,15 @@ void Cost::RateRegister(const SCEV *Reg,
/// before, rate it. Optional LoserRegs provides a way to declare any formula
/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
- SmallPtrSet<const SCEV *, 16> &Regs,
+ SmallPtrSetImpl<const SCEV *> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ SmallPtrSetImpl<const SCEV *> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
Lose();
return;
}
- if (Regs.insert(Reg)) {
+ if (Regs.insert(Reg).second) {
RateRegister(Reg, Regs, L, SE, DT);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
@@ -984,13 +984,13 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
void Cost::RateFormula(const TargetTransformInfo &TTI,
const Formula &F,
- SmallPtrSet<const SCEV *, 16> &Regs,
+ SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
- SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ SmallPtrSetImpl<const SCEV *> *LoserRegs) {
assert(F.isCanonical() && "Cost is accurate only for canonical formula");
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
@@ -1337,10 +1337,9 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
}
// Update the RegTracker.
- for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
- E = OldRegs.end(); I != E; ++I)
- if (!Regs.count(*I))
- RegUses.DropRegister(*I, LUIdx);
+ for (const SCEV *S : OldRegs)
+ if (!Regs.count(S))
+ RegUses.DropRegister(S, LUIdx);
}
void LSRUse::print(raw_ostream &OS) const {
@@ -2226,13 +2225,12 @@ LSRInstance::OptimizeLoopTermCond() {
// must dominate all the post-inc comparisons we just set up, and it must
// dominate the loop latch edge.
IVIncInsertPos = L->getLoopLatch()->getTerminator();
- for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
- E = PostIncs.end(); I != E; ++I) {
+ for (Instruction *Inst : PostIncs) {
BasicBlock *BB =
DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
- (*I)->getParent());
- if (BB == (*I)->getParent())
- IVIncInsertPos = *I;
+ Inst->getParent());
+ if (BB == Inst->getParent())
+ IVIncInsertPos = Inst;
else if (BB != IVIncInsertPos->getParent())
IVIncInsertPos = BB->getTerminator();
}
@@ -2557,7 +2555,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
///
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
-isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
+isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
ScalarEvolution &SE, const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
@@ -2567,9 +2565,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
if (!Users.empty()) {
DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
- for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
- E = Users.end(); I != E; ++I) {
- dbgs() << " " << **I << "\n";
+ for (Instruction *Inst : Users) {
+ dbgs() << " " << *Inst << "\n";
});
return false;
}
@@ -2805,7 +2802,7 @@ void LSRInstance::CollectChains() {
User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
- if (UniqueOperands.insert(IVOpInst))
+ if (UniqueOperands.insert(IVOpInst).second)
ChainInstruction(I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
@@ -3119,11 +3116,15 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
void
LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
- SmallPtrSet<const SCEV *, 8> Inserted;
+ SmallPtrSet<const SCEV *, 32> Visited;
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
+ // Don't process the same SCEV twice
+ if (!Visited.insert(S).second)
+ continue;
+
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
@@ -3132,7 +3133,6 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
- if (!Inserted.insert(US)) continue;
const Value *V = US->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
// Look for instructions defined outside the loop.
@@ -3774,7 +3774,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
LUIdx = UsedByIndices.find_next(LUIdx))
// Make a memo of this use, offset, and register tuple.
- if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+ if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
}
}
@@ -4302,10 +4302,9 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// reference that register in order to be considered. This prunes out
// unprofitable searching.
SmallSetVector<const SCEV *, 4> ReqRegs;
- for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
- E = CurRegs.end(); I != E; ++I)
- if (LU.Regs.count(*I))
- ReqRegs.insert(*I);
+ for (const SCEV *S : CurRegs)
+ if (LU.Regs.count(S))
+ ReqRegs.insert(S);
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
@@ -4350,9 +4349,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
} else {
DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
dbgs() << ".\n Regs:";
- for (SmallPtrSet<const SCEV *, 16>::const_iterator
- I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
- dbgs() << ' ' << **I;
+ for (const SCEV *S : NewRegs)
+ dbgs() << ' ' << *S;
dbgs() << '\n');
SolutionCost = NewCost;
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 00c0f88..f60d990 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -13,7 +13,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/FunctionTargetTransformInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -52,7 +54,7 @@ UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
static cl::opt<unsigned>
PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
- cl::desc("Unrolled size limit for loops with an unroll(enable) or "
+ cl::desc("Unrolled size limit for loops with an unroll(full) or "
"unroll_count pragma."));
namespace {
@@ -102,6 +104,7 @@ namespace {
/// loop preheaders be inserted into the CFG...
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -111,6 +114,7 @@ namespace {
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<FunctionTargetTransformInfo>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
@@ -120,7 +124,7 @@ namespace {
// Fill in the UnrollingPreferences parameter with values from the
// TargetTransformationInfo.
- void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
+ void getUnrollingPreferences(Loop *L, const FunctionTargetTransformInfo &FTTI,
TargetTransformInfo::UnrollingPreferences &UP) {
UP.Threshold = CurrentThreshold;
UP.OptSizeThreshold = OptSizeUnrollThreshold;
@@ -130,7 +134,7 @@ namespace {
UP.MaxCount = UINT_MAX;
UP.Partial = CurrentAllowPartial;
UP.Runtime = CurrentRuntime;
- TTI.getUnrollingPreferences(L, UP);
+ FTTI.getUnrollingPreferences(L, UP);
}
// Select and return an unroll count based on parameters from
@@ -138,12 +142,11 @@ namespace {
// SetExplicitly is set to true if the unroll count is is set by
// the user or a pragma rather than selected heuristically.
unsigned
- selectUnrollCount(const Loop *L, unsigned TripCount, bool HasEnablePragma,
+ selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
unsigned PragmaCount,
const TargetTransformInfo::UnrollingPreferences &UP,
bool &SetExplicitly);
-
// Select threshold values used to limit unrolling based on a
// total unrolled size. Parameters Threshold and PartialThreshold
// are set to the maximum unrolled size for fully and partially
@@ -183,6 +186,8 @@ namespace {
char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(FunctionTargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -201,11 +206,15 @@ Pass *llvm::createSimpleLoopUnrollPass() {
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ AssumptionTracker *AT) {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I, TTI);
+ Metrics.analyzeBasicBlock(*I, TTI, EphValues);
NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
@@ -219,13 +228,13 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
return LoopSize;
}
-// Returns the value associated with the given metadata node name (for
-// example, "llvm.loop.unroll.count"). If no such named metadata node
-// exists, then nullptr is returned.
-static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
- StringRef Name) {
+// Returns the loop hint metadata node with the given name (for example,
+// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
+// returned.
+static const MDNode *GetUnrollMetadata(const Loop *L, StringRef Name) {
MDNode *LoopID = L->getLoopID();
- if (!LoopID) return nullptr;
+ if (!LoopID)
+ return nullptr;
// First operand should refer to the loop id itself.
assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
@@ -233,49 +242,80 @@ static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (!MD) continue;
+ if (!MD)
+ continue;
const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- if (!S) continue;
+ if (!S)
+ continue;
- if (Name.equals(S->getString())) {
- assert(MD->getNumOperands() == 2 &&
- "Unroll hint metadata should have two operands.");
- return cast<ConstantInt>(MD->getOperand(1));
- }
+ if (Name.equals(S->getString()))
+ return MD;
}
return nullptr;
}
-// Returns true if the loop has an unroll(enable) pragma.
-static bool HasUnrollEnablePragma(const Loop *L) {
- const ConstantInt *EnableValue =
- GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
- return (EnableValue && EnableValue->getZExtValue());
+// Returns true if the loop has an unroll(full) pragma.
+static bool HasUnrollFullPragma(const Loop *L) {
+ return GetUnrollMetadata(L, "llvm.loop.unroll.full");
}
// Returns true if the loop has an unroll(disable) pragma.
static bool HasUnrollDisablePragma(const Loop *L) {
- const ConstantInt *EnableValue =
- GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
- return (EnableValue && !EnableValue->getZExtValue());
+ return GetUnrollMetadata(L, "llvm.loop.unroll.disable");
}
// If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0.
static unsigned UnrollCountPragmaValue(const Loop *L) {
- const ConstantInt *CountValue =
- GetUnrollMetadataValue(L, "llvm.loop.unroll.count");
- if (CountValue) {
- unsigned Count = CountValue->getZExtValue();
+ const MDNode *MD = GetUnrollMetadata(L, "llvm.loop.unroll.count");
+ if (MD) {
+ assert(MD->getNumOperands() == 2 &&
+ "Unroll count hint metadata should have two operands.");
+ unsigned Count = cast<ConstantInt>(MD->getOperand(1))->getZExtValue();
assert(Count >= 1 && "Unroll count must be positive.");
return Count;
}
return 0;
}
+// Remove existing unroll metadata and add unroll disable metadata to
+// indicate the loop has already been unrolled. This prevents a loop
+// from being unrolled more than is directed by a pragma if the loop
+// unrolling pass is run more than once (which it generally is).
+static void SetLoopAlreadyUnrolled(Loop *L) {
+ MDNode *LoopID = L->getLoopID();
+ if (!LoopID) return;
+
+ // First remove any existing loop unrolling metadata.
+ SmallVector<Value *, 4> Vals;
+ // Reserve first location for self reference to the LoopID metadata node.
+ Vals.push_back(nullptr);
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ bool IsUnrollMetadata = false;
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
+ }
+ if (!IsUnrollMetadata) Vals.push_back(LoopID->getOperand(i));
+ }
+
+ // Add unroll(disable) metadata to disable future unrolling.
+ LLVMContext &Context = L->getHeader()->getContext();
+ SmallVector<Value *, 1> DisableOperands;
+ DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ Vals.push_back(DisableNode);
+
+ MDNode *NewLoopID = MDNode::get(Context, Vals);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L->setLoopID(NewLoopID);
+}
+
unsigned LoopUnroll::selectUnrollCount(
- const Loop *L, unsigned TripCount, bool HasEnablePragma,
+ const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
bool &SetExplicitly) {
SetExplicitly = true;
@@ -289,9 +329,7 @@ unsigned LoopUnroll::selectUnrollCount(
if (Count == 0) {
if (PragmaCount) {
Count = PragmaCount;
- } else if (HasEnablePragma) {
- // unroll(enable) pragma without an unroll_count pragma
- // indicates to unroll loop fully.
+ } else if (PragmaFullUnroll) {
Count = TripCount;
}
}
@@ -323,6 +361,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo *LI = &getAnalysis<LoopInfo>();
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ const FunctionTargetTransformInfo &FTTI =
+ getAnalysis<FunctionTargetTransformInfo>();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -331,35 +372,37 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (HasUnrollDisablePragma(L)) {
return false;
}
- bool HasEnablePragma = HasUnrollEnablePragma(L);
+ bool PragmaFullUnroll = HasUnrollFullPragma(L);
unsigned PragmaCount = UnrollCountPragmaValue(L);
- bool HasPragma = HasEnablePragma || PragmaCount > 0;
+ bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
TargetTransformInfo::UnrollingPreferences UP;
- getUnrollingPreferences(L, TTI, UP);
+ getUnrollingPreferences(L, FTTI, UP);
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
- // Find "latch trip count". UnrollLoop assumes that control cannot exit
- // via the loop latch on any iteration prior to TripCount. The loop may exit
- // early via an earlier branch.
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (LatchBlock) {
- TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
+ // If there are multiple exiting blocks but one of them is the latch, use the
+ // latch for the trip count estimation. Otherwise insist on a single exiting
+ // block for the trip count estimation.
+ BasicBlock *ExitingBlock = L->getLoopLatch();
+ if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
+ ExitingBlock = L->getExitingBlock();
+ if (ExitingBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
// Select an initial unroll count. This may be reduced later based
// on size thresholds.
bool CountSetExplicitly;
- unsigned Count = selectUnrollCount(L, TripCount, HasEnablePragma, PragmaCount,
- UP, CountSetExplicitly);
+ unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
+ PragmaCount, UP, CountSetExplicitly);
unsigned NumInlineCandidates;
bool notDuplicatable;
unsigned LoopSize =
- ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
+ ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, AT);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
if (notDuplicatable) {
@@ -428,21 +471,26 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
if (HasPragma) {
+ if (PragmaCount != 0)
+ // If loop has an unroll count pragma mark loop as unrolled to prevent
+ // unrolling beyond that requested by the pragma.
+ SetLoopAlreadyUnrolled(L);
+
// Emit optimization remarks if we are unable to unroll the loop
// as directed by a pragma.
DebugLoc LoopLoc = L->getStartLoc();
Function *F = Header->getParent();
LLVMContext &Ctx = F->getContext();
- if (HasEnablePragma && PragmaCount == 0) {
+ if (PragmaFullUnroll && PragmaCount == 0) {
if (TripCount && Count != TripCount) {
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(enable) pragma "
+ "Unable to fully unroll loop as directed by unroll(full) pragma "
"because unrolled size is too large.");
} else if (!TripCount) {
emitOptimizationRemarkMissed(
Ctx, DEBUG_TYPE, *F, LoopLoc,
- "Unable to fully unroll loop as directed by unroll(enable) pragma "
+ "Unable to fully unroll loop as directed by unroll(full) pragma "
"because loop has a runtime trip count.");
}
} else if (PragmaCount > 0 && Count != OriginalCount) {
@@ -460,7 +508,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this,
+ &LPM, AT))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 977c53a..ef43483 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -103,7 +104,8 @@ namespace {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
- bool countLoop(const Loop *L, const TargetTransformInfo &TTI);
+ bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
+ AssumptionTracker *AT);
// Clean all data related to given loop.
void forgetLoop(const Loop *L);
@@ -126,6 +128,7 @@ namespace {
class LoopUnswitch : public LoopPass {
LoopInfo *LI; // Loop information
LPPassManager *LPM;
+ AssumptionTracker *AT;
// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.
@@ -164,6 +167,7 @@ namespace {
/// loop preheaders be inserted into the CFG.
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
@@ -212,7 +216,8 @@ namespace {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
+bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
+ AssumptionTracker *AT) {
LoopPropsMapIt PropsIt;
bool Inserted;
@@ -229,13 +234,16 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
// FIXME: This is overly conservative because it does not take into
// consideration code simplification opportunities and code that can
// be shared by the resultant unswitched loops.
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I, TTI);
+ Metrics.analyzeBasicBlock(*I, TTI, EphValues);
Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
@@ -326,6 +334,7 @@ char LoopUnswitch::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -376,6 +385,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
if (skipOptnoneFunction(L))
return false;
+ AT = &getAnalysis<AssumptionTracker>();
LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;
DominatorTreeWrapperPass *DTWP =
@@ -421,7 +431,8 @@ bool LoopUnswitch::processCurrentLoop() {
// Probably we reach the quota of branches for this loop. If so
// stop unswitching.
- if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
+ if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>(),
+ AT))
return false;
// Loop over all of the basic blocks in the loop. If we find an interior
@@ -823,6 +834,10 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
NewBlocks[0], F->end());
+ // FIXME: We could register any cloned assumptions instead of clearing the
+ // whole function's cache.
+ AT->forgetCachedAssumptions(F);
+
// Now we create the new Loop object for the versioned loop.
Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index b6bc792..be524be 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
@@ -329,6 +330,7 @@ namespace {
// This transformation requires dominator postdominator info
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
@@ -361,6 +363,7 @@ FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
@@ -631,22 +634,24 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (destSize < srcSize)
return false;
} else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
- // If the destination is an sret parameter then only accesses that are
- // outside of the returned struct type can trap.
- if (!A->hasStructRetAttr())
- return false;
+ if (A->getDereferenceableBytes() < srcSize) {
+ // If the destination is an sret parameter then only accesses that are
+ // outside of the returned struct type can trap.
+ if (!A->hasStructRetAttr())
+ return false;
- Type *StructTy = cast<PointerType>(A->getType())->getElementType();
- if (!StructTy->isSized()) {
- // The call may never return and hence the copy-instruction may never
- // be executed, and therefore it's not safe to say "the destination
- // has at least <cpyLen> bytes, as implied by the copy-instruction",
- return false;
- }
+ Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+ if (!StructTy->isSized()) {
+ // The call may never return and hence the copy-instruction may never
+ // be executed, and therefore it's not safe to say "the destination
+ // has at least <cpyLen> bytes, as implied by the copy-instruction",
+ return false;
+ }
- uint64_t destSize = DL->getTypeAllocSize(StructTy);
- if (destSize < srcSize)
- return false;
+ uint64_t destSize = DL->getTypeAllocSize(StructTy);
+ if (destSize < srcSize)
+ return false;
+ }
} else {
return false;
}
@@ -673,17 +678,31 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
for (User *UU : U->users())
srcUseList.push_back(UU);
- } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
- if (G->hasAllZeroIndices())
- for (User *UU : U->users())
- srcUseList.push_back(UU);
- else
+ continue;
+ }
+ if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
+ if (!G->hasAllZeroIndices())
return false;
- } else if (U != C && U != cpy) {
- return false;
+
+ for (User *UU : U->users())
+ srcUseList.push_back(UU);
+ continue;
}
+ if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
+ if (IT->getIntrinsicID() == Intrinsic::lifetime_start ||
+ IT->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+
+ if (U != C && U != cpy)
+ return false;
}
+ // Check that src isn't captured by the called function since the
+ // transformation can cause aliasing issues in that case.
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i))
+ return false;
+
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -963,8 +982,11 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (MDep->getAlignment() < ByValAlign &&
- getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, DL) < ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign,
+ DL, AT, CS.getInstruction(), &DT) < ByValAlign)
return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
new file mode 100644
index 0000000..8281c59
--- /dev/null
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -0,0 +1,604 @@
+//===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//! \file
+//! \brief This pass performs merges of loads and stores on both sides of a
+// diamond (hammock). It hoists the loads and sinks the stores.
+//
+// The algorithm iteratively hoists two loads to the same address out of a
+// diamond (hammock) and merges them into a single load in the header. Similar
+// it sinks and merges two stores to the tail block (footer). The algorithm
+// iterates over the instructions of one side of the diamond and attempts to
+// find a matching load/store on the other side. It hoists / sinks when it
+// thinks it safe to do so. This optimization helps with eg. hiding load
+// latencies, triggering if-conversion, and reducing static code size.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// Example:
+// Diamond shaped code before merge:
+//
+// header:
+// br %cond, label %if.then, label %if.else
+// + +
+// + +
+// + +
+// if.then: if.else:
+// %lt = load %addr_l %le = load %addr_l
+// <use %lt> <use %le>
+// <...> <...>
+// store %st, %addr_s store %se, %addr_s
+// br label %if.end br label %if.end
+// + +
+// + +
+// + +
+// if.end ("footer"):
+// <...>
+//
+// Diamond shaped code after merge:
+//
+// header:
+// %l = load %addr_l
+// br %cond, label %if.then, label %if.else
+// + +
+// + +
+// + +
+// if.then: if.else:
+// <use %l> <use %l>
+// <...> <...>
+// br label %if.end br label %if.end
+// + +
+// + +
+// + +
+// if.end ("footer"):
+// %s.sink = phi [%st, if.then], [%se, if.else]
+// <...>
+// store %s.sink, %addr_s
+// <...>
+//
+//
+//===----------------------- TODO -----------------------------------------===//
+//
+// 1) Generalize to regions other than diamonds
+// 2) Be more aggressive merging memory operations
+// Note that both changes require register pressure control
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "mldst-motion"
+
+//===----------------------------------------------------------------------===//
+// MergedLoadStoreMotion Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class MergedLoadStoreMotion : public FunctionPass {
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit MergedLoadStoreMotion(void)
+ : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
+ initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ // This transformation requires dominator postdominator info
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ }
+
+ // Helper routines
+
+ ///
+ /// \brief Remove instruction from parent and update memory dependence
+ /// analysis.
+ ///
+ void removeInstruction(Instruction *Inst);
+ BasicBlock *getDiamondTail(BasicBlock *BB);
+ bool isDiamondHead(BasicBlock *BB);
+ // Routines for hoisting loads
+ bool isLoadHoistBarrierInRange(const Instruction& Start,
+ const Instruction& End,
+ LoadInst* LI);
+ LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
+ void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
+ Instruction *ElseInst);
+ bool isSafeToHoist(Instruction *I) const;
+ bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
+ bool mergeLoads(BasicBlock *BB);
+ // Routines for sinking stores
+ StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
+ PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
+ bool isStoreSinkBarrier(Instruction *Inst);
+ bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
+ bool mergeStores(BasicBlock *BB);
+ // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
+ // where Size0 and Size1 are the #instructions on the two sides of
+ // the diamond. The constant chosen here is arbitrary. Compiler Time
+ // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
+ const int MagicCompileTimeControl;
+};
+
+char MergedLoadStoreMotion::ID = 0;
+}
+
+///
+/// \brief createMergedLoadStoreMotionPass - The public interface to this file.
+///
+FunctionPass *llvm::createMergedLoadStoreMotionPass() {
+ return new MergedLoadStoreMotion();
+}
+
+INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
+ "MergedLoadStoreMotion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
+ "MergedLoadStoreMotion", false, false)
+
+///
+/// \brief Remove instruction from parent and update memory dependence analysis.
+///
+void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) {
+ // Notify the memory dependence analysis.
+ if (MD) {
+ MD->removeInstruction(Inst);
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ MD->invalidateCachedPointerInfo(LI->getPointerOperand());
+ if (Inst->getType()->getScalarType()->isPointerTy()) {
+ MD->invalidateCachedPointerInfo(Inst);
+ }
+ }
+ Inst->eraseFromParent();
+}
+
+///
+/// \brief Return tail block of a diamond.
+///
+BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
+ assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
+ BranchInst *BI = (BranchInst *)(BB->getTerminator());
+ BasicBlock *Succ0 = BI->getSuccessor(0);
+ BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
+ return Tail;
+}
+
+///
+/// \brief True when BB is the head of a diamond (hammock)
+///
+bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
+ if (!BB)
+ return false;
+ if (!isa<BranchInst>(BB->getTerminator()))
+ return false;
+ if (BB->getTerminator()->getNumSuccessors() != 2)
+ return false;
+
+ BranchInst *BI = (BranchInst *)(BB->getTerminator());
+ BasicBlock *Succ0 = BI->getSuccessor(0);
+ BasicBlock *Succ1 = BI->getSuccessor(1);
+
+ if (!Succ0->getSinglePredecessor() ||
+ Succ0->getTerminator()->getNumSuccessors() != 1)
+ return false;
+ if (!Succ1->getSinglePredecessor() ||
+ Succ1->getTerminator()->getNumSuccessors() != 1)
+ return false;
+
+ BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
+ // Ignore triangles.
+ if (Succ1->getTerminator()->getSuccessor(0) != Tail)
+ return false;
+ return true;
+}
+
+///
+/// \brief True when instruction is a hoist barrier for a load
+///
+/// Whenever an instruction could possibly modify the value
+/// being loaded or protect against the load from happening
+/// it is considered a hoist barrier.
+///
+
+bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
+ const Instruction& End,
+ LoadInst* LI) {
+ AliasAnalysis::Location Loc = AA->getLocation(LI);
+ return AA->canInstructionRangeModify(Start, End, Loc);
+}
+
+///
+/// \brief Decide if a load can be hoisted
+///
+/// When there is a load in \p BB to the same address as \p LI
+/// and it can be hoisted from \p BB, return that load.
+/// Otherwise return Null.
+///
+LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
+ LoadInst *Load0) {
+
+ for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
+ ++BBI) {
+ Instruction *Inst = BBI;
+
+ // Only merge and hoist loads when their result in used only in BB
+ if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
+ continue;
+
+ LoadInst *Load1 = dyn_cast<LoadInst>(Inst);
+ BasicBlock *BB0 = Load0->getParent();
+
+ AliasAnalysis::Location Loc0 = AA->getLocation(Load0);
+ AliasAnalysis::Location Loc1 = AA->getLocation(Load1);
+ if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
+ !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&
+ !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {
+ return Load1;
+ }
+ }
+ return nullptr;
+}
+
+///
+/// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
+/// \p BB
+///
+/// BB is the head of a diamond
+///
+void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
+ Instruction *HoistCand,
+ Instruction *ElseInst) {
+ DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
+ dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
+ dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
+ // Hoist the instruction.
+ assert(HoistCand->getParent() != BB);
+
+ // Intersect optional metadata.
+ HoistCand->intersectOptionalDataWith(ElseInst);
+ HoistCand->dropUnknownMetadata();
+
+ // Prepend point for instruction insert
+ Instruction *HoistPt = BB->getTerminator();
+
+ // Merged instruction
+ Instruction *HoistedInst = HoistCand->clone();
+
+ // Notify AA of the new value.
+ if (isa<LoadInst>(HoistCand))
+ AA->copyValue(HoistCand, HoistedInst);
+
+ // Hoist instruction.
+ HoistedInst->insertBefore(HoistPt);
+
+ HoistCand->replaceAllUsesWith(HoistedInst);
+ removeInstruction(HoistCand);
+ // Replace the else block instruction.
+ ElseInst->replaceAllUsesWith(HoistedInst);
+ removeInstruction(ElseInst);
+}
+
+///
+/// \brief Return true if no operand of \p I is defined in I's parent block
+///
+bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
+ BasicBlock *Parent = I->getParent();
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Instruction *Instr = dyn_cast<Instruction>(I->getOperand(i));
+ if (Instr && Instr->getParent() == Parent)
+ return false;
+ }
+ return true;
+}
+
+///
+/// \brief Merge two equivalent loads and GEPs and hoist into diamond head
+///
+bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
+ LoadInst *L1) {
+ // Only one definition?
+ Instruction *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
+ Instruction *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
+ if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
+ A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
+ A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
+ isa<GetElementPtrInst>(A0)) {
+ DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
+ dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
+ dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
+ hoistInstruction(BB, A0, A1);
+ hoistInstruction(BB, L0, L1);
+ return true;
+ } else
+ return false;
+}
+
+///
+/// \brief Try to hoist two loads to same address into diamond header
+///
+/// Starting from a diamond head block, iterate over the instructions in one
+/// successor block and try to match a load in the second successor.
+///
+bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
+ bool MergedLoads = false;
+ assert(isDiamondHead(BB));
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ BasicBlock *Succ0 = BI->getSuccessor(0);
+ BasicBlock *Succ1 = BI->getSuccessor(1);
+ // #Instructions in Succ1 for Compile Time Control
+ int Size1 = Succ1->size();
+ int NLoads = 0;
+ for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
+ BBI != BBE;) {
+
+ Instruction *I = BBI;
+ ++BBI;
+
+ // Only move non-simple (atomic, volatile) loads.
+ LoadInst *L0 = dyn_cast<LoadInst>(I);
+ if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
+ continue;
+
+ ++NLoads;
+ if (NLoads * Size1 >= MagicCompileTimeControl)
+ break;
+ if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
+ bool Res = hoistLoad(BB, L0, L1);
+ MergedLoads |= Res;
+ // Don't attempt to hoist above loads that had not been hoisted.
+ if (!Res)
+ break;
+ }
+ }
+ return MergedLoads;
+}
+
+///
+/// \brief True when instruction is sink barrier for a store
+///
+bool MergedLoadStoreMotion::isStoreSinkBarrier(Instruction *Inst) {
+ // FIXME: Conservatively let a load instruction block the store.
+ // Use alias analysis instead.
+ if (isa<LoadInst>(Inst))
+ return true;
+ if (isa<CallInst>(Inst))
+ return true;
+ if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst))
+ return true;
+ // Note: mayHaveSideEffects covers all instructions that could
+ // trigger a change to state. Eg. in-flight stores have to be executed
+ // before ordered loads or fences, calls could invoke functions that store
+ // data to memory etc.
+ if (!isa<StoreInst>(Inst) && Inst->mayHaveSideEffects()) {
+ return true;
+ }
+ DEBUG(dbgs() << "No Sink Barrier\n");
+ return false;
+}
+
+///
+/// \brief Check if \p BB contains a store to the same address as \p SI
+///
+/// \return The store in \p when it is safe to sink. Otherwise return Null.
+///
+StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB,
+ StoreInst *SI) {
+ StoreInst *I = 0;
+ DEBUG(dbgs() << "can Sink? : "; SI->dump(); dbgs() << "\n");
+ for (BasicBlock::reverse_iterator RBI = BB->rbegin(), RBE = BB->rend();
+ RBI != RBE; ++RBI) {
+ Instruction *Inst = &*RBI;
+
+ // Only move loads if they are used in the block.
+ if (isStoreSinkBarrier(Inst))
+ break;
+ if (isa<StoreInst>(Inst)) {
+ AliasAnalysis::Location LocSI = AA->getLocation(SI);
+ AliasAnalysis::Location LocInst = AA->getLocation((StoreInst *)Inst);
+ if (AA->isMustAlias(LocSI, LocInst)) {
+ I = (StoreInst *)Inst;
+ break;
+ }
+ }
+ }
+ return I;
+}
+
+///
+/// \brief Create a PHI node in BB for the operands of S0 and S1
+///
+PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
+ StoreInst *S1) {
+ // Create a phi if the values mismatch.
+ PHINode *NewPN = 0;
+ Value *Opd1 = S0->getValueOperand();
+ Value *Opd2 = S1->getValueOperand();
+ if (Opd1 != Opd2) {
+ NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
+ BB->begin());
+ NewPN->addIncoming(Opd1, S0->getParent());
+ NewPN->addIncoming(Opd2, S1->getParent());
+ if (NewPN->getType()->getScalarType()->isPointerTy()) {
+ // Notify AA of the new value.
+ AA->copyValue(Opd1, NewPN);
+ AA->copyValue(Opd2, NewPN);
+ // AA needs to be informed when a PHI-use of the pointer value is added
+ for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
+ unsigned J = PHINode::getOperandNumForIncomingValue(I);
+ AA->addEscapingUse(NewPN->getOperandUse(J));
+ }
+ if (MD)
+ MD->invalidateCachedPointerInfo(NewPN);
+ }
+ }
+ return NewPN;
+}
+
+///
+/// \brief Merge two stores to same address and sink into \p BB
+///
+/// Also sinks GEP instruction computing the store address
+///
+bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
+ StoreInst *S1) {
+ // Only one definition?
+ Instruction *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
+ Instruction *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
+ if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
+ (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
+ (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
+ DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
+ dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
+ dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
+ // Hoist the instruction.
+ BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
+ // Intersect optional metadata.
+ S0->intersectOptionalDataWith(S1);
+ S0->dropUnknownMetadata();
+
+ // Create the new store to be inserted at the join point.
+ StoreInst *SNew = (StoreInst *)(S0->clone());
+ Instruction *ANew = A0->clone();
+ AA->copyValue(S0, SNew);
+ SNew->insertBefore(InsertPt);
+ ANew->insertBefore(SNew);
+
+ assert(S0->getParent() == A0->getParent());
+ assert(S1->getParent() == A1->getParent());
+
+ PHINode *NewPN = getPHIOperand(BB, S0, S1);
+ // New PHI operand? Use it.
+ if (NewPN)
+ SNew->setOperand(0, NewPN);
+ removeInstruction(S0);
+ removeInstruction(S1);
+ A0->replaceAllUsesWith(ANew);
+ removeInstruction(A0);
+ A1->replaceAllUsesWith(ANew);
+ removeInstruction(A1);
+ return true;
+ }
+ return false;
+}
+
+///
+/// \brief True when two stores are equivalent and can sink into the footer
+///
+/// Starting from a diamond tail block, iterate over the instructions in one
+/// predecessor block and try to match a store in the second predecessor.
+///
+bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
+
+ bool MergedStores = false;
+ assert(T && "Footer of a diamond cannot be empty");
+
+ pred_iterator PI = pred_begin(T), E = pred_end(T);
+ assert(PI != E);
+ BasicBlock *Pred0 = *PI;
+ ++PI;
+ BasicBlock *Pred1 = *PI;
+ ++PI;
+ // tail block of a diamond/hammock?
+ if (Pred0 == Pred1)
+ return false; // No.
+ if (PI != E)
+ return false; // No. More than 2 predecessors.
+
+ // #Instructions in Succ1 for Compile Time Control
+ int Size1 = Pred1->size();
+ int NStores = 0;
+
+ for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
+ RBI != RBE;) {
+
+ Instruction *I = &*RBI;
+ ++RBI;
+ if (isStoreSinkBarrier(I))
+ break;
+ // Sink move non-simple (atomic, volatile) stores
+ if (!isa<StoreInst>(I))
+ continue;
+ StoreInst *S0 = (StoreInst *)I;
+ if (!S0->isSimple())
+ continue;
+
+ ++NStores;
+ if (NStores * Size1 >= MagicCompileTimeControl)
+ break;
+ if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
+ bool Res = sinkStore(T, S0, S1);
+ MergedStores |= Res;
+ // Don't attempt to sink below stores that had to stick around
+ // But after removal of a store and some of its feeding
+ // instruction search again from the beginning since the iterator
+ // is likely stale at this point.
+ if (!Res)
+ break;
+ else {
+ RBI = Pred0->rbegin();
+ RBE = Pred0->rend();
+ DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
+ }
+ }
+ }
+ return MergedStores;
+}
+///
+/// \brief Run the transformation for each function
+///
+bool MergedLoadStoreMotion::runOnFunction(Function &F) {
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool Changed = false;
+ DEBUG(dbgs() << "Instruction Merger\n");
+
+ // Merge unconditional branches, allowing PRE to catch more
+ // optimization opportunities.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = FI++;
+
+ // Hoist equivalent loads and sink stores
+ // outside diamonds when possible
+ if (isDiamondHead(BB)) {
+ Changed |= mergeLoads(BB);
+ Changed |= mergeStores(getDiamondTail(BB));
+ }
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 7cce89e..5c8bed5 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -108,6 +108,10 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
if (Call->onlyReadsMemory())
return false;
+ // The call must have the expected result type.
+ if (!Call->getType()->isFloatingPointTy())
+ return false;
+
// Do the following transformation:
//
// (before)
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index ea2cf7c..1bbaaf3 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -176,6 +176,7 @@ namespace {
private:
void BuildRankMap(Function &F);
unsigned getRank(Value *V);
+ void canonicalizeOperands(Instruction *I);
void ReassociateExpression(BinaryOperator *I);
void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
Value *OptimizeExpression(BinaryOperator *I,
@@ -194,6 +195,7 @@ namespace {
Value *RemoveFactorFromExpression(Value *V, Value *Factor);
void EraseInst(Instruction *I);
void OptimizeInst(Instruction *I);
+ Instruction *canonicalizeNegConstExpr(Instruction *I);
};
}
@@ -235,7 +237,20 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
/// opcode and if it only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
if (V->hasOneUse() && isa<Instruction>(V) &&
- cast<Instruction>(V)->getOpcode() == Opcode)
+ cast<Instruction>(V)->getOpcode() == Opcode &&
+ (!isa<FPMathOperator>(V) ||
+ cast<Instruction>(V)->hasUnsafeAlgebra()))
+ return cast<BinaryOperator>(V);
+ return nullptr;
+}
+
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
+ unsigned Opcode2) {
+ if (V->hasOneUse() && isa<Instruction>(V) &&
+ (cast<Instruction>(V)->getOpcode() == Opcode1 ||
+ cast<Instruction>(V)->getOpcode() == Opcode2) &&
+ (!isa<FPMathOperator>(V) ||
+ cast<Instruction>(V)->hasUnsafeAlgebra()))
return cast<BinaryOperator>(V);
return nullptr;
}
@@ -264,9 +279,11 @@ static bool isUnmovableInstruction(Instruction *I) {
void Reassociate::BuildRankMap(Function &F) {
unsigned i = 2;
- // Assign distinct ranks to function arguments
- for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ // Assign distinct ranks to function arguments.
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
ValueRankMap[&*I] = ++i;
+ DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
+ }
ReversePostOrderTraversal<Function*> RPOT(&F);
for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
@@ -304,24 +321,78 @@ unsigned Reassociate::getRank(Value *V) {
// If this is a not or neg instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank.
- if (!I->getType()->isIntegerTy() ||
- (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
+ Type *Ty = V->getType();
+ if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) ||
+ (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
+ !BinaryOperator::isFNeg(I)))
++Rank;
- //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
- // << Rank << "\n");
+ DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
return ValueRankMap[I] = Rank;
}
+// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
+void Reassociate::canonicalizeOperands(Instruction *I) {
+ assert(isa<BinaryOperator>(I) && "Expected binary operator.");
+ assert(I->isCommutative() && "Expected commutative operator.");
+
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ unsigned LHSRank = getRank(LHS);
+ unsigned RHSRank = getRank(RHS);
+
+ if (isa<Constant>(RHS))
+ return;
+
+ if (isa<Constant>(LHS) || RHSRank < LHSRank)
+ cast<BinaryOperator>(I)->swapOperands();
+}
+
+static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
+ Instruction *InsertBefore, Value *FlagsOp) {
+ if (S1->getType()->isIntegerTy())
+ return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
+ else {
+ BinaryOperator *Res =
+ BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
+ Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
+ return Res;
+ }
+}
+
+static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
+ Instruction *InsertBefore, Value *FlagsOp) {
+ if (S1->getType()->isIntegerTy())
+ return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
+ else {
+ BinaryOperator *Res =
+ BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
+ Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
+ return Res;
+ }
+}
+
+static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
+ Instruction *InsertBefore, Value *FlagsOp) {
+ if (S1->getType()->isIntegerTy())
+ return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
+ else {
+ BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
+ Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
+ return Res;
+ }
+}
+
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
- Constant *Cst = Constant::getAllOnesValue(Neg->getType());
+ Type *Ty = Neg->getType();
+ Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty)
+ : ConstantFP::get(Ty, -1.0);
- BinaryOperator *Res =
- BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
- Neg->setOperand(1, Constant::getNullValue(Neg->getType())); // Drop use of op.
+ BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
+ Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
Res->setDebugLoc(Neg->getDebugLoc());
@@ -377,13 +448,14 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
LHS = 0; // 1 + 1 === 0 modulo 2.
return;
}
- if (Opcode == Instruction::Add) {
+ if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
// TODO: Reduce the weight by exploiting nsw/nuw?
LHS += RHS;
return;
}
- assert(Opcode == Instruction::Mul && "Unknown associative operation!");
+ assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
+ "Unknown associative operation!");
unsigned Bitwidth = LHS.getBitWidth();
// If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
// can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
@@ -499,8 +571,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
unsigned Opcode = I->getOpcode();
- assert(Instruction::isAssociative(Opcode) &&
- Instruction::isCommutative(Opcode) &&
+ assert(I->isAssociative() && I->isCommutative() &&
"Expected an associative and commutative operation!");
// Visit all operands of the expression, keeping track of their weight (the
@@ -515,7 +586,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// ways to get to it.
SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
- bool MadeChange = false;
+ bool Changed = false;
// Leaves of the expression are values that either aren't the right kind of
// operation (eg: a constant, or a multiply in an add tree), or are, but have
@@ -552,7 +623,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// If this is a binary operation of the right kind with only one use then
// add its operands to the expression.
if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
- assert(Visited.insert(Op) && "Not first visit!");
+ assert(Visited.insert(Op).second && "Not first visit!");
DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
Worklist.push_back(std::make_pair(BO, Weight));
continue;
@@ -562,7 +633,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
LeafMap::iterator It = Leaves.find(Op);
if (It == Leaves.end()) {
// Not in the leaf map. Must be the first time we saw this operand.
- assert(Visited.insert(Op) && "Not first visit!");
+ assert(Visited.insert(Op).second && "Not first visit!");
if (!Op->hasOneUse()) {
// This value has uses not accounted for by the expression, so it is
// not safe to modify. Mark it as being a leaf.
@@ -584,7 +655,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// exactly one such use, drop this new use of the leaf.
assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
I->setOperand(OpIdx, UndefValue::get(I->getType()));
- MadeChange = true;
+ Changed = true;
// If the leaf is a binary operation of the right kind and we now see
// that its multiple original uses were in fact all by nodes belonging
@@ -613,21 +684,24 @@ static bool LinearizeExprTree(BinaryOperator *I,
// expression. This means that it can safely be modified. See if we
// can usefully morph it into an expression of the right kind.
assert((!isa<Instruction>(Op) ||
- cast<Instruction>(Op)->getOpcode() != Opcode) &&
+ cast<Instruction>(Op)->getOpcode() != Opcode
+ || (isa<FPMathOperator>(Op) &&
+ !cast<Instruction>(Op)->hasUnsafeAlgebra())) &&
"Should have been handled above!");
assert(Op->hasOneUse() && "Has uses outside the expression tree!");
// If this is a multiply expression, turn any internal negations into
// multiplies by -1 so they can be reassociated.
- BinaryOperator *BO = dyn_cast<BinaryOperator>(Op);
- if (Opcode == Instruction::Mul && BO && BinaryOperator::isNeg(BO)) {
- DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
- BO = LowerNegateToMultiply(BO);
- DEBUG(dbgs() << *BO << 'n');
- Worklist.push_back(std::make_pair(BO, Weight));
- MadeChange = true;
- continue;
- }
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
+ if ((Opcode == Instruction::Mul && BinaryOperator::isNeg(BO)) ||
+ (Opcode == Instruction::FMul && BinaryOperator::isFNeg(BO))) {
+ DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
+ BO = LowerNegateToMultiply(BO);
+ DEBUG(dbgs() << *BO << '\n');
+ Worklist.push_back(std::make_pair(BO, Weight));
+ Changed = true;
+ continue;
+ }
// Failed to morph into an expression of the right type. This really is
// a leaf.
@@ -665,7 +739,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
}
- return MadeChange;
+ return Changed;
}
// RewriteExprTree - Now that the operands for this expression tree are
@@ -798,6 +872,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
Constant *Undef = UndefValue::get(I->getType());
NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
Undef, Undef, "", I);
+ if (NewOp->getType()->isFloatingPointTy())
+ NewOp->setFastMathFlags(I->getFastMathFlags());
} else {
NewOp = NodesToRewrite.pop_back_val();
}
@@ -817,7 +893,14 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// expression tree is dominated by all of Ops.
if (ExpressionChanged)
do {
- ExpressionChanged->clearSubclassOptionalData();
+ // Preserve FastMathFlags.
+ if (isa<FPMathOperator>(I)) {
+ FastMathFlags Flags = I->getFastMathFlags();
+ ExpressionChanged->clearSubclassOptionalData();
+ ExpressionChanged->setFastMathFlags(Flags);
+ } else
+ ExpressionChanged->clearSubclassOptionalData();
+
if (ExpressionChanged == I)
break;
ExpressionChanged->moveBefore(I);
@@ -834,6 +917,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
/// version of the value is returned, and BI is left pointing at the instruction
/// that should be processed next by the reassociation pass.
static Value *NegateValue(Value *V, Instruction *BI) {
+ if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+ return ConstantExpr::getFNeg(C);
if (Constant *C = dyn_cast<Constant>(V))
return ConstantExpr::getNeg(C);
@@ -846,7 +931,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// the constants. We assume that instcombine will clean up the mess later if
// we introduce tons of unnecessary negation instructions.
//
- if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) {
+ if (BinaryOperator *I =
+ isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
// Push the negates through the add.
I->setOperand(0, NegateValue(I->getOperand(0), BI));
I->setOperand(1, NegateValue(I->getOperand(1), BI));
@@ -864,7 +950,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
for (User *U : V->users()) {
- if (!BinaryOperator::isNeg(U)) continue;
+ if (!BinaryOperator::isNeg(U) && !BinaryOperator::isFNeg(U))
+ continue;
// We found one! Now we have to make sure that the definition dominates
// this use. We do this by moving it to the entry block (if it is a
@@ -894,27 +981,34 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
- return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
+ return CreateNeg(V, V->getName() + ".neg", BI, BI);
}
/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
/// X-Y into (X + -Y).
static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
- if (BinaryOperator::isNeg(Sub))
+ if (BinaryOperator::isNeg(Sub) || BinaryOperator::isFNeg(Sub))
+ return false;
+
+ // Don't breakup X - undef.
+ if (isa<UndefValue>(Sub->getOperand(1)))
return false;
// Don't bother to break this up unless either the LHS is an associable add or
// subtract or if this is only used by one.
- if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
- isReassociableOp(Sub->getOperand(0), Instruction::Sub))
+ Value *V0 = Sub->getOperand(0);
+ if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) ||
+ isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
return true;
- if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
- isReassociableOp(Sub->getOperand(1), Instruction::Sub))
+ Value *V1 = Sub->getOperand(1);
+ if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) ||
+ isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
return true;
+ Value *VB = Sub->user_back();
if (Sub->hasOneUse() &&
- (isReassociableOp(Sub->user_back(), Instruction::Add) ||
- isReassociableOp(Sub->user_back(), Instruction::Sub)))
+ (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) ||
+ isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
return true;
return false;
@@ -931,8 +1025,7 @@ static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
// and set it as the RHS of the add instruction we just made.
//
Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
- BinaryOperator *New =
- BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+ BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
New->takeName(Sub);
@@ -956,8 +1049,19 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
Mul->takeName(Shl);
+
+ // Everyone now refers to the mul instruction.
Shl->replaceAllUsesWith(Mul);
Mul->setDebugLoc(Shl->getDebugLoc());
+
+ // We can safely preserve the nuw flag in all cases. It's also safe to turn a
+ // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
+ // handling.
+ bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
+ bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
+ if (NSW && NUW)
+ Mul->setHasNoSignedWrap(true);
+ Mul->setHasNoUnsignedWrap(NUW);
return Mul;
}
@@ -969,13 +1073,23 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
Value *X) {
unsigned XRank = Ops[i].Rank;
unsigned e = Ops.size();
- for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
+ for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
if (Ops[j].Op == X)
return j;
+ if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
+ if (Instruction *I2 = dyn_cast<Instruction>(X))
+ if (I1->isIdenticalTo(I2))
+ return j;
+ }
// Scan backwards.
- for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
+ for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
if (Ops[j].Op == X)
return j;
+ if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
+ if (Instruction *I2 = dyn_cast<Instruction>(X))
+ if (I1->isIdenticalTo(I2))
+ return j;
+ }
return i;
}
@@ -988,15 +1102,16 @@ static Value *EmitAddTreeOfValues(Instruction *I,
Value *V1 = Ops.back();
Ops.pop_back();
Value *V2 = EmitAddTreeOfValues(I, Ops);
- return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
+ return CreateAdd(V2, V1, "tmp", I, I);
}
/// RemoveFactorFromExpression - If V is an expression tree that is a
/// multiplication sequence, and if this sequence contains a multiply by Factor,
/// remove Factor from the tree and return the new tree.
Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
- BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
- if (!BO) return nullptr;
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
+ if (!BO)
+ return nullptr;
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(BO, Tree);
@@ -1018,13 +1133,25 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
}
// If this is a negative version of this factor, remove it.
- if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
+ if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
if (FC1->getValue() == -FC2->getValue()) {
FoundFactor = NeedsNegate = true;
Factors.erase(Factors.begin()+i);
break;
}
+ } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
+ if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
+ APFloat F1(FC1->getValueAPF());
+ APFloat F2(FC2->getValueAPF());
+ F2.changeSign();
+ if (F1.compare(F2) == APFloat::cmpEqual) {
+ FoundFactor = NeedsNegate = true;
+ Factors.erase(Factors.begin() + i);
+ break;
+ }
+ }
+ }
}
if (!FoundFactor) {
@@ -1046,7 +1173,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
}
if (NeedsNegate)
- V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
+ V = CreateNeg(V, "neg", InsertPt, BO);
return V;
}
@@ -1058,7 +1185,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
static void FindSingleUseMultiplyFactors(Value *V,
SmallVectorImpl<Value*> &Factors,
const SmallVectorImpl<ValueEntry> &Ops) {
- BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+ BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
if (!BO) {
Factors.push_back(V);
return;
@@ -1389,13 +1516,15 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
++NumFactor;
// Insert a new multiply.
- Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
- Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
+ Type *Ty = TheOp->getType();
+ Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound)
+ : ConstantFP::get(Ty, NumFound);
+ Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
// Now that we have inserted a multiply, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
// (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
- RedoInsts.insert(cast<Instruction>(Mul));
+ RedoInsts.insert(Mul);
// If every add operand was a duplicate, return the multiply.
if (Ops.empty())
@@ -1412,11 +1541,12 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
}
// Check for X and -X or X and ~X in the operand list.
- if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isNot(TheOp))
+ if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isFNeg(TheOp) &&
+ !BinaryOperator::isNot(TheOp))
continue;
Value *X = nullptr;
- if (BinaryOperator::isNeg(TheOp))
+ if (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp))
X = BinaryOperator::getNegArgument(TheOp);
else if (BinaryOperator::isNot(TheOp))
X = BinaryOperator::getNotArgument(TheOp);
@@ -1426,7 +1556,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
continue;
// Remove X and -X from the operand list.
- if (Ops.size() == 2 && BinaryOperator::isNeg(TheOp))
+ if (Ops.size() == 2 &&
+ (BinaryOperator::isNeg(TheOp) || BinaryOperator::isFNeg(TheOp)))
return Constant::getNullValue(X->getType());
// Remove X and ~X from the operand list.
@@ -1463,7 +1594,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
unsigned MaxOcc = 0;
Value *MaxOccVal = nullptr;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
- BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
+ BinaryOperator *BOp =
+ isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
if (!BOp)
continue;
@@ -1476,23 +1608,43 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
SmallPtrSet<Value*, 8> Duplicates;
for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
Value *Factor = Factors[i];
- if (!Duplicates.insert(Factor)) continue;
+ if (!Duplicates.insert(Factor).second)
+ continue;
unsigned Occ = ++FactorOccurrences[Factor];
- if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+ if (Occ > MaxOcc) {
+ MaxOcc = Occ;
+ MaxOccVal = Factor;
+ }
// If Factor is a negative constant, add the negated value as a factor
// because we can percolate the negate out. Watch for minint, which
// cannot be positivified.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
if (CI->isNegative() && !CI->isMinValue(true)) {
Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
assert(!Duplicates.count(Factor) &&
"Shouldn't have two constant factors, missed a canonicalize");
-
unsigned Occ = ++FactorOccurrences[Factor];
- if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+ if (Occ > MaxOcc) {
+ MaxOcc = Occ;
+ MaxOccVal = Factor;
+ }
+ }
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
+ if (CF->isNegative()) {
+ APFloat F(CF->getValueAPF());
+ F.changeSign();
+ Factor = ConstantFP::get(CF->getContext(), F);
+ assert(!Duplicates.count(Factor) &&
+ "Shouldn't have two constant factors, missed a canonicalize");
+ unsigned Occ = ++FactorOccurrences[Factor];
+ if (Occ > MaxOcc) {
+ MaxOcc = Occ;
+ MaxOccVal = Factor;
+ }
}
+ }
}
}
@@ -1505,11 +1657,16 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// this, we could otherwise run into situations where removing a factor
// from an expression will drop a use of maxocc, and this can cause
// RemoveFactorFromExpression on successive values to behave differently.
- Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+ Instruction *DummyInst =
+ I->getType()->isIntegerTy()
+ ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
+ : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
+
SmallVector<WeakVH, 4> NewMulOps;
for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
- BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
+ BinaryOperator *BOp =
+ isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
if (!BOp)
continue;
@@ -1542,7 +1699,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
RedoInsts.insert(VI);
// Create the multiply.
- Instruction *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+ Instruction *V2 = CreateMul(V, MaxOccVal, "tmp", I, I);
// Rerun associate on the multiply in case the inner expression turned into
// a multiply. We want to make sure that we keep things in canonical form.
@@ -1632,7 +1789,10 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder,
Value *LHS = Ops.pop_back_val();
do {
- LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
+ if (LHS->getType()->isIntegerTy())
+ LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
+ else
+ LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
} while (!Ops.empty());
return LHS;
@@ -1765,11 +1925,13 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
break;
case Instruction::Add:
+ case Instruction::FAdd:
if (Value *Result = OptimizeAdd(I, Ops))
return Result;
break;
case Instruction::Mul:
+ case Instruction::FMul:
if (Value *Result = OptimizeMul(I, Ops))
return Result;
break;
@@ -1797,12 +1959,104 @@ void Reassociate::EraseInst(Instruction *I) {
// and add that since that's where optimization actually happens.
unsigned Opcode = Op->getOpcode();
while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
- Visited.insert(Op))
+ Visited.insert(Op).second)
Op = Op->user_back();
RedoInsts.insert(Op);
}
}
+// Canonicalize expressions of the following form:
+// x + (-Constant * y) -> x - (Constant * y)
+// x - (-Constant * y) -> x + (Constant * y)
+Instruction *Reassociate::canonicalizeNegConstExpr(Instruction *I) {
+ if (!I->hasOneUse() || I->getType()->isVectorTy())
+ return nullptr;
+
+ // Must be a mul, fmul, or fdiv instruction.
+ unsigned Opcode = I->getOpcode();
+ if (Opcode != Instruction::Mul && Opcode != Instruction::FMul &&
+ Opcode != Instruction::FDiv)
+ return nullptr;
+
+ // Must have at least one constant operand.
+ Constant *C0 = dyn_cast<Constant>(I->getOperand(0));
+ Constant *C1 = dyn_cast<Constant>(I->getOperand(1));
+ if (!C0 && !C1)
+ return nullptr;
+
+ // Must be a negative ConstantInt or ConstantFP.
+ Constant *C = C0 ? C0 : C1;
+ unsigned ConstIdx = C0 ? 0 : 1;
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
+ if (!CI->isNegative())
+ return nullptr;
+ } else if (auto *CF = dyn_cast<ConstantFP>(C)) {
+ if (!CF->isNegative())
+ return nullptr;
+ } else
+ return nullptr;
+
+ // User must be a binary operator with one or more uses.
+ Instruction *User = I->user_back();
+ if (!isa<BinaryOperator>(User) || !User->getNumUses())
+ return nullptr;
+
+ unsigned UserOpcode = User->getOpcode();
+ if (UserOpcode != Instruction::Add && UserOpcode != Instruction::FAdd &&
+ UserOpcode != Instruction::Sub && UserOpcode != Instruction::FSub)
+ return nullptr;
+
+ // Subtraction is not commutative. Explicitly, the following transform is
+ // not valid: (-Constant * y) - x -> x + (Constant * y)
+ if (!User->isCommutative() && User->getOperand(1) != I)
+ return nullptr;
+
+ // Change the sign of the constant.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ I->setOperand(ConstIdx, ConstantInt::get(CI->getContext(), -CI->getValue()));
+ else {
+ ConstantFP *CF = cast<ConstantFP>(C);
+ APFloat Val = CF->getValueAPF();
+ Val.changeSign();
+ I->setOperand(ConstIdx, ConstantFP::get(CF->getContext(), Val));
+ }
+
+ // Canonicalize I to RHS to simplify the next bit of logic. E.g.,
+ // ((-Const*y) + x) -> (x + (-Const*y)).
+ if (User->getOperand(0) == I && User->isCommutative())
+ cast<BinaryOperator>(User)->swapOperands();
+
+ Value *Op0 = User->getOperand(0);
+ Value *Op1 = User->getOperand(1);
+ BinaryOperator *NI;
+ switch(UserOpcode) {
+ default:
+ llvm_unreachable("Unexpected Opcode!");
+ case Instruction::Add:
+ NI = BinaryOperator::CreateSub(Op0, Op1);
+ break;
+ case Instruction::Sub:
+ NI = BinaryOperator::CreateAdd(Op0, Op1);
+ break;
+ case Instruction::FAdd:
+ NI = BinaryOperator::CreateFSub(Op0, Op1);
+ NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
+ break;
+ case Instruction::FSub:
+ NI = BinaryOperator::CreateFAdd(Op0, Op1);
+ NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
+ break;
+ }
+
+ NI->insertBefore(User);
+ NI->setName(User->getName());
+ User->replaceAllUsesWith(NI);
+ NI->setDebugLoc(I->getDebugLoc());
+ RedoInsts.insert(I);
+ MadeChange = true;
+ return NI;
+}
+
/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing
/// instructions is not allowed.
void Reassociate::OptimizeInst(Instruction *I) {
@@ -1810,8 +2064,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (!isa<BinaryOperator>(I))
return;
- if (I->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(I->getOperand(1)))
+ if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
@@ -1824,29 +2077,23 @@ void Reassociate::OptimizeInst(Instruction *I) {
I = NI;
}
- // Floating point binary operators are not associative, but we can still
- // commute (some) of them, to canonicalize the order of their operands.
- // This can potentially expose more CSE opportunities, and makes writing
- // other transformations simpler.
- if ((I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())) {
- // FAdd and FMul can be commuted.
- if (I->getOpcode() != Instruction::FMul &&
- I->getOpcode() != Instruction::FAdd)
- return;
+ // Canonicalize negative constants out of expressions.
+ if (Instruction *Res = canonicalizeNegConstExpr(I))
+ I = Res;
- Value *LHS = I->getOperand(0);
- Value *RHS = I->getOperand(1);
- unsigned LHSRank = getRank(LHS);
- unsigned RHSRank = getRank(RHS);
+ // Commute binary operators, to canonicalize the order of their operands.
+ // This can potentially expose more CSE opportunities, and makes writing other
+ // transformations simpler.
+ if (I->isCommutative())
+ canonicalizeOperands(I);
- // Sort the operands by rank.
- if (RHSRank < LHSRank) {
- I->setOperand(0, RHS);
- I->setOperand(1, LHS);
- }
+ // Don't optimize vector instructions.
+ if (I->getType()->isVectorTy())
+ return;
+ // Don't optimize floating point instructions that don't have unsafe algebra.
+ if (I->getType()->isFloatingPointTy() && !I->hasUnsafeAlgebra())
return;
- }
// Do not reassociate boolean (i1) expressions. We want to preserve the
// original order of evaluation for short-circuited comparisons that
@@ -1877,6 +2124,24 @@ void Reassociate::OptimizeInst(Instruction *I) {
I = NI;
}
}
+ } else if (I->getOpcode() == Instruction::FSub) {
+ if (ShouldBreakUpSubtract(I)) {
+ Instruction *NI = BreakUpSubtract(I);
+ RedoInsts.insert(I);
+ MadeChange = true;
+ I = NI;
+ } else if (BinaryOperator::isFNeg(I)) {
+ // Otherwise, this is a negation. See if the operand is a multiply tree
+ // and if this is not an inner node of a multiply tree.
+ if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
+ (!I->hasOneUse() ||
+ !isReassociableOp(I->user_back(), Instruction::FMul))) {
+ Instruction *NI = LowerNegateToMultiply(I);
+ RedoInsts.insert(I);
+ MadeChange = true;
+ I = NI;
+ }
+ }
}
// If this instruction is an associative binary operator, process it.
@@ -1894,11 +2159,16 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
return;
+ if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
+ cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
+ return;
ReassociateExpression(BO);
}
void Reassociate::ReassociateExpression(BinaryOperator *I) {
+ assert(!I->getType()->isVectorTy() &&
+ "Reassociation of vector instructions is not supported.");
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
@@ -1943,12 +2213,21 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
// this is a multiply tree used only by an add, and the immediate is a -1.
// In this case we reassociate to put the negation on the outside so that we
// can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
- if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
- cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
- isa<ConstantInt>(Ops.back().Op) &&
- cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
- ValueEntry Tmp = Ops.pop_back_val();
- Ops.insert(Ops.begin(), Tmp);
+ if (I->hasOneUse()) {
+ if (I->getOpcode() == Instruction::Mul &&
+ cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Ops.back().Op) &&
+ cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+ ValueEntry Tmp = Ops.pop_back_val();
+ Ops.insert(Ops.begin(), Tmp);
+ } else if (I->getOpcode() == Instruction::FMul &&
+ cast<Instruction>(I->user_back())->getOpcode() ==
+ Instruction::FAdd &&
+ isa<ConstantFP>(Ops.back().Op) &&
+ cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
+ ValueEntry Tmp = Ops.pop_back_val();
+ Ops.insert(Ops.begin(), Tmp);
+ }
}
DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 90c3520..cfc9a8e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -214,7 +214,8 @@ public:
///
/// This returns true if the block was not considered live before.
bool MarkBlockExecutable(BasicBlock *BB) {
- if (!BBExecutable.insert(BB)) return false;
+ if (!BBExecutable.insert(BB).second)
+ return false;
DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << '\n');
BBWorkList.push_back(BB); // Add the block to the work list!
return true;
@@ -1010,7 +1011,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
}
Constant *Ptr = Operands[0];
- ArrayRef<Constant *> Indices(Operands.begin() + 1, Operands.end());
+ auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
}
@@ -1107,6 +1108,9 @@ CallOverdefined:
Operands.push_back(State.getConstant());
}
+ if (getValueState(I).isOverdefined())
+ return;
+
// If we can constant fold this, mark the result of the call as a
// constant.
if (Constant *C = ConstantFoldCall(F, Operands, TLI))
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 8c7f253..6135114 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -224,36 +225,26 @@ public:
/// \brief Support for iterating over the slices.
/// @{
typedef SmallVectorImpl<Slice>::iterator iterator;
+ typedef iterator_range<iterator> range;
iterator begin() { return Slices.begin(); }
iterator end() { return Slices.end(); }
typedef SmallVectorImpl<Slice>::const_iterator const_iterator;
+ typedef iterator_range<const_iterator> const_range;
const_iterator begin() const { return Slices.begin(); }
const_iterator end() const { return Slices.end(); }
/// @}
- /// \brief Allow iterating the dead users for this alloca.
- ///
- /// These are instructions which will never actually use the alloca as they
- /// are outside the allocated range. They are safe to replace with undef and
- /// delete.
- /// @{
- typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator;
- dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); }
- dead_user_iterator dead_user_end() const { return DeadUsers.end(); }
- /// @}
+ /// \brief Access the dead users for this alloca.
+ ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
- /// \brief Allow iterating the dead expressions referring to this alloca.
+ /// \brief Access the dead operands referring to this alloca.
///
/// These are operands which have cannot actually be used to refer to the
/// alloca as they are outside its range and the user doesn't correct for
/// that. These mostly consist of PHI node inputs and the like which we just
/// need to replace with undef.
- /// @{
- typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator;
- dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); }
- dead_op_iterator dead_op_end() const { return DeadOperands.end(); }
- /// @}
+ ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
@@ -324,6 +315,15 @@ static Value *foldSelectInst(SelectInst &SI) {
return nullptr;
}
+/// \brief A helper that folds a PHI node or a select.
+static Value *foldPHINodeOrSelectInst(Instruction &I) {
+ if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+ // If PN merges together the same value, return that value.
+ return PN->hasConstantValue();
+ }
+ return foldSelectInst(cast<SelectInst>(I));
+}
+
/// \brief Builder for the alloca slices.
///
/// This class builds a set of alloca slices by recursively visiting the uses
@@ -334,7 +334,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
typedef PtrUseVisitor<SliceBuilder> Base;
const uint64_t AllocSize;
- AllocaSlices &S;
+ AllocaSlices &AS;
SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes;
@@ -343,14 +343,14 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
- SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
+ SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
: PtrUseVisitor<SliceBuilder>(DL),
- AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
+ AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {}
private:
void markAsDead(Instruction &I) {
- if (VisitedDeadInsts.insert(&I))
- S.DeadUsers.push_back(&I);
+ if (VisitedDeadInsts.insert(&I).second)
+ AS.DeadUsers.push_back(&I);
}
void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
@@ -361,7 +361,7 @@ private:
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
<< " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n"
- << " alloca: " << S.AI << "\n"
+ << " alloca: " << AS.AI << "\n"
<< " use: " << I << "\n");
return markAsDead(I);
}
@@ -379,12 +379,12 @@ private:
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
<< " to remain within the " << AllocSize << " byte alloca:\n"
- << " alloca: " << S.AI << "\n"
+ << " alloca: " << AS.AI << "\n"
<< " use: " << I << "\n");
EndOffset = AllocSize;
}
- S.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
+ AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
}
void visitBitCastInst(BitCastInst &BC) {
@@ -485,7 +485,7 @@ private:
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
<< " which extends past the end of the " << AllocSize
<< " byte alloca:\n"
- << " alloca: " << S.AI << "\n"
+ << " alloca: " << AS.AI << "\n"
<< " use: " << SI << "\n");
return markAsDead(SI);
}
@@ -535,7 +535,7 @@ private:
if (Offset.uge(AllocSize)) {
SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
if (MTPI != MemTransferSliceMap.end())
- S.Slices[MTPI->second].kill();
+ AS.Slices[MTPI->second].kill();
return markAsDead(II);
}
@@ -558,10 +558,10 @@ private:
bool Inserted;
SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
std::tie(MTPI, Inserted) =
- MemTransferSliceMap.insert(std::make_pair(&II, S.Slices.size()));
+ MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
unsigned PrevIdx = MTPI->second;
if (!Inserted) {
- Slice &PrevP = S.Slices[PrevIdx];
+ Slice &PrevP = AS.Slices[PrevIdx];
// Check if the begin offsets match and this is a non-volatile transfer.
// In that case, we can completely elide the transfer.
@@ -579,7 +579,7 @@ private:
insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
// Check that we ended up with a valid index in the map.
- assert(S.Slices[PrevIdx].getUse()->getUser() == &II &&
+ assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
"Map index doesn't point back to a slice with this user.");
}
@@ -639,64 +639,47 @@ private:
}
for (User *U : I->users())
- if (Visited.insert(cast<Instruction>(U)))
+ if (Visited.insert(cast<Instruction>(U)).second)
Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
return nullptr;
}
- void visitPHINode(PHINode &PN) {
- if (PN.use_empty())
- return markAsDead(PN);
- if (!IsOffsetKnown)
- return PI.setAborted(&PN);
-
- // See if we already have computed info on this node.
- uint64_t &PHISize = PHIOrSelectSizes[&PN];
- if (!PHISize) {
- // This is a new PHI node, check for an unsafe use of the PHI node.
- if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHISize))
- return PI.setAborted(UnsafeI);
- }
-
- // For PHI and select operands outside the alloca, we can't nuke the entire
- // phi or select -- the other side might still be relevant, so we special
- // case them here and use a separate structure to track the operands
- // themselves which should be replaced with undef.
- // FIXME: This should instead be escaped in the event we're instrumenting
- // for address sanitization.
- if (Offset.uge(AllocSize)) {
- S.DeadOperands.push_back(U);
- return;
- }
-
- insertUse(PN, Offset, PHISize);
- }
+ void visitPHINodeOrSelectInst(Instruction &I) {
+ assert(isa<PHINode>(I) || isa<SelectInst>(I));
+ if (I.use_empty())
+ return markAsDead(I);
- void visitSelectInst(SelectInst &SI) {
- if (SI.use_empty())
- return markAsDead(SI);
- if (Value *Result = foldSelectInst(SI)) {
+ // TODO: We could use SimplifyInstruction here to fold PHINodes and
+ // SelectInsts. However, doing so requires to change the current
+ // dead-operand-tracking mechanism. For instance, suppose neither loading
+ // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
+ // trap either. However, if we simply replace %U with undef using the
+ // current dead-operand-tracking mechanism, "load (select undef, undef,
+ // %other)" may trap because the select may return the first operand
+ // "undef".
+ if (Value *Result = foldPHINodeOrSelectInst(I)) {
if (Result == *U)
// If the result of the constant fold will be the pointer, recurse
- // through the select as if we had RAUW'ed it.
- enqueueUsers(SI);
+ // through the PHI/select as if we had RAUW'ed it.
+ enqueueUsers(I);
else
- // Otherwise the operand to the select is dead, and we can replace it
- // with undef.
- S.DeadOperands.push_back(U);
+ // Otherwise the operand to the PHI/select is dead, and we can replace
+ // it with undef.
+ AS.DeadOperands.push_back(U);
return;
}
+
if (!IsOffsetKnown)
- return PI.setAborted(&SI);
+ return PI.setAborted(&I);
// See if we already have computed info on this node.
- uint64_t &SelectSize = PHIOrSelectSizes[&SI];
- if (!SelectSize) {
- // This is a new Select, check for an unsafe use of it.
- if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectSize))
+ uint64_t &Size = PHIOrSelectSizes[&I];
+ if (!Size) {
+ // This is a new PHI/Select, check for an unsafe use of it.
+ if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
return PI.setAborted(UnsafeI);
}
@@ -707,11 +690,19 @@ private:
// FIXME: This should instead be escaped in the event we're instrumenting
// for address sanitization.
if (Offset.uge(AllocSize)) {
- S.DeadOperands.push_back(U);
+ AS.DeadOperands.push_back(U);
return;
}
- insertUse(SI, Offset, SelectSize);
+ insertUse(I, Offset, Size);
+ }
+
+ void visitPHINode(PHINode &PN) {
+ visitPHINodeOrSelectInst(PN);
+ }
+
+ void visitSelectInst(SelectInst &SI) {
+ visitPHINodeOrSelectInst(SI);
}
/// \brief Disable SROA entirely if there are unhandled users of the alloca.
@@ -857,23 +848,18 @@ public:
else
return false;
- } while (Visited.insert(Ptr));
+ } while (Visited.insert(Ptr).second);
return false;
}
void updateDebugInfo(Instruction *Inst) const override {
- for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
- E = DDIs.end(); I != E; ++I) {
- DbgDeclareInst *DDI = *I;
+ for (DbgDeclareInst *DDI : DDIs)
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- }
- for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
- E = DVIs.end(); I != E; ++I) {
- DbgValueInst *DVI = *I;
+ for (DbgValueInst *DVI : DVIs) {
Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
@@ -890,8 +876,8 @@ public:
continue;
}
Instruction *DbgVal =
- DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
- Inst);
+ DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+ DIExpression(DVI->getExpression()), Inst);
DbgVal->setDebugLoc(DVI->getDebugLoc());
}
}
@@ -924,6 +910,7 @@ class SROA : public FunctionPass {
LLVMContext *C;
const DataLayout *DL;
DominatorTree *DT;
+ AssumptionTracker *AT;
/// \brief Worklist of alloca instructions to simplify.
///
@@ -983,14 +970,14 @@ private:
friend class PHIOrSelectSpeculator;
friend class AllocaSliceRewriter;
- bool rewritePartition(AllocaInst &AI, AllocaSlices &S,
+ bool rewritePartition(AllocaInst &AI, AllocaSlices &AS,
AllocaSlices::iterator B, AllocaSlices::iterator E,
int64_t BeginOffset, int64_t EndOffset,
ArrayRef<AllocaSlices::iterator> SplitUses);
- bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
+ bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
bool runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
- void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
+ void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
bool promoteAllocas(Function &F);
};
}
@@ -1003,6 +990,7 @@ FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
@@ -1148,10 +1136,12 @@ static void speculatePHINodeLoads(PHINode &PN) {
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
- // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // Get the AA tags and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ.
LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
- MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+
+ AAMDNodes AATags;
+ SomeLoad->getAAMetadata(AATags);
unsigned Align = SomeLoad->getAlignment();
// Rewrite all loads of the PN to use the new PHI.
@@ -1172,8 +1162,8 @@ static void speculatePHINodeLoads(PHINode &PN) {
InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
++NumLoadsSpeculated;
Load->setAlignment(Align);
- if (TBAATag)
- Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ if (AATags)
+ Load->setAAMetadata(AATags);
NewPN->addIncoming(Load, Pred);
}
@@ -1238,12 +1228,15 @@ static void speculateSelectInstLoads(SelectInst &SI) {
IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
- // Transfer alignment and TBAA info if present.
+ // Transfer alignment and AA info if present.
TL->setAlignment(LI->getAlignment());
FL->setAlignment(LI->getAlignment());
- if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
- TL->setMetadata(LLVMContext::MD_tbaa, Tag);
- FL->setMetadata(LLVMContext::MD_tbaa, Tag);
+
+ AAMDNodes Tags;
+ LI->getAAMetadata(Tags);
+ if (Tags) {
+ TL->setAAMetadata(Tags);
+ FL->setAAMetadata(Tags);
}
Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
@@ -1468,7 +1461,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
break;
Offset += GEPOffset;
Ptr = GEP->getPointerOperand();
- if (!Visited.insert(Ptr))
+ if (!Visited.insert(Ptr).second)
break;
}
@@ -1505,7 +1498,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
break;
}
assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
- } while (Visited.insert(Ptr));
+ } while (Visited.insert(Ptr).second);
if (!OffsetPtr) {
if (!Int8Ptr) {
@@ -1621,39 +1614,43 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
///
/// This function is called to test each entry in a partioning which is slated
/// for a single slice.
-static bool isVectorPromotionViableForSlice(
- const DataLayout &DL, AllocaSlices &S, uint64_t SliceBeginOffset,
- uint64_t SliceEndOffset, VectorType *Ty, uint64_t ElementSize,
- AllocaSlices::const_iterator I) {
+static bool
+isVectorPromotionViableForSlice(const DataLayout &DL, uint64_t SliceBeginOffset,
+ uint64_t SliceEndOffset, VectorType *Ty,
+ uint64_t ElementSize, const Slice &S) {
// First validate the slice offsets.
uint64_t BeginOffset =
- std::max(I->beginOffset(), SliceBeginOffset) - SliceBeginOffset;
+ std::max(S.beginOffset(), SliceBeginOffset) - SliceBeginOffset;
uint64_t BeginIndex = BeginOffset / ElementSize;
if (BeginIndex * ElementSize != BeginOffset ||
BeginIndex >= Ty->getNumElements())
return false;
uint64_t EndOffset =
- std::min(I->endOffset(), SliceEndOffset) - SliceBeginOffset;
+ std::min(S.endOffset(), SliceEndOffset) - SliceBeginOffset;
uint64_t EndIndex = EndOffset / ElementSize;
if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->getNumElements())
return false;
assert(EndIndex > BeginIndex && "Empty vector!");
uint64_t NumElements = EndIndex - BeginIndex;
- Type *SliceTy =
- (NumElements == 1) ? Ty->getElementType()
- : VectorType::get(Ty->getElementType(), NumElements);
+ Type *SliceTy = (NumElements == 1)
+ ? Ty->getElementType()
+ : VectorType::get(Ty->getElementType(), NumElements);
Type *SplitIntTy =
Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
- Use *U = I->getUse();
+ Use *U = S.getUse();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (!I->isSplittable())
+ if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
} else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
@@ -1661,8 +1658,7 @@ static bool isVectorPromotionViableForSlice(
if (LI->isVolatile())
return false;
Type *LTy = LI->getType();
- if (SliceBeginOffset > I->beginOffset() ||
- SliceEndOffset < I->endOffset()) {
+ if (SliceBeginOffset > S.beginOffset() || SliceEndOffset < S.endOffset()) {
assert(LTy->isIntegerTy());
LTy = SplitIntTy;
}
@@ -1672,8 +1668,7 @@ static bool isVectorPromotionViableForSlice(
if (SI->isVolatile())
return false;
Type *STy = SI->getValueOperand()->getType();
- if (SliceBeginOffset > I->beginOffset() ||
- SliceEndOffset < I->endOffset()) {
+ if (SliceBeginOffset > S.beginOffset() || SliceEndOffset < S.endOffset()) {
assert(STy->isIntegerTy());
STy = SplitIntTy;
}
@@ -1695,39 +1690,113 @@ static bool isVectorPromotionViableForSlice(
/// SSA value. We only can ensure this for a limited set of operations, and we
/// don't want to do the rewrites unless we are confident that the result will
/// be promotable, so we have an early test here.
-static bool
-isVectorPromotionViable(const DataLayout &DL, Type *AllocaTy, AllocaSlices &S,
+static VectorType *
+isVectorPromotionViable(const DataLayout &DL, Type *AllocaTy,
uint64_t SliceBeginOffset, uint64_t SliceEndOffset,
- AllocaSlices::const_iterator I,
- AllocaSlices::const_iterator E,
+ AllocaSlices::const_range Slices,
ArrayRef<AllocaSlices::iterator> SplitUses) {
- VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
- if (!Ty)
- return false;
+ // Collect the candidate types for vector-based promotion. Also track whether
+ // we have different element types.
+ SmallVector<VectorType *, 4> CandidateTys;
+ Type *CommonEltTy = nullptr;
+ bool HaveCommonEltTy = true;
+ auto CheckCandidateType = [&](Type *Ty) {
+ if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+ CandidateTys.push_back(VTy);
+ if (!CommonEltTy)
+ CommonEltTy = VTy->getElementType();
+ else if (CommonEltTy != VTy->getElementType())
+ HaveCommonEltTy = false;
+ }
+ };
+ CheckCandidateType(AllocaTy);
+ // Consider any loads or stores that are the exact size of the slice.
+ for (const auto &S : Slices)
+ if (S.beginOffset() == SliceBeginOffset &&
+ S.endOffset() == SliceEndOffset) {
+ if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
+ CheckCandidateType(LI->getType());
+ else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
+ CheckCandidateType(SI->getValueOperand()->getType());
+ }
+
+ // If we didn't find a vector type, nothing to do here.
+ if (CandidateTys.empty())
+ return nullptr;
- uint64_t ElementSize = DL.getTypeSizeInBits(Ty->getScalarType());
+ // Remove non-integer vector types if we had multiple common element types.
+ // FIXME: It'd be nice to replace them with integer vector types, but we can't
+ // do that until all the backends are known to produce good code for all
+ // integer vector types.
+ if (!HaveCommonEltTy) {
+ CandidateTys.erase(std::remove_if(CandidateTys.begin(), CandidateTys.end(),
+ [](VectorType *VTy) {
+ return !VTy->getElementType()->isIntegerTy();
+ }),
+ CandidateTys.end());
+
+ // If there were no integer vector types, give up.
+ if (CandidateTys.empty())
+ return nullptr;
- // While the definition of LLVM vectors is bitpacked, we don't support sizes
- // that aren't byte sized.
- if (ElementSize % 8)
- return false;
- assert((DL.getTypeSizeInBits(Ty) % 8) == 0 &&
- "vector size not a multiple of element size?");
- ElementSize /= 8;
+ // Rank the remaining candidate vector types. This is easy because we know
+ // they're all integer vectors. We sort by ascending number of elements.
+ auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
+ assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&
+ "Cannot have vector types of different sizes!");
+ assert(RHSTy->getElementType()->isIntegerTy() &&
+ "All non-integer types eliminated!");
+ assert(LHSTy->getElementType()->isIntegerTy() &&
+ "All non-integer types eliminated!");
+ return RHSTy->getNumElements() < LHSTy->getNumElements();
+ };
+ std::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
+ CandidateTys.erase(
+ std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
+ CandidateTys.end());
+ } else {
+// The only way to have the same element type in every vector type is to
+// have the same vector type. Check that and remove all but one.
+#ifndef NDEBUG
+ for (VectorType *VTy : CandidateTys) {
+ assert(VTy->getElementType() == CommonEltTy &&
+ "Unaccounted for element type!");
+ assert(VTy == CandidateTys[0] &&
+ "Different vector types with the same element type!");
+ }
+#endif
+ CandidateTys.resize(1);
+ }
- for (; I != E; ++I)
- if (!isVectorPromotionViableForSlice(DL, S, SliceBeginOffset,
- SliceEndOffset, Ty, ElementSize, I))
- return false;
+ // Try each vector type, and return the one which works.
+ auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
+ uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType());
- for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
- SUE = SplitUses.end();
- SUI != SUE; ++SUI)
- if (!isVectorPromotionViableForSlice(DL, S, SliceBeginOffset,
- SliceEndOffset, Ty, ElementSize, *SUI))
+ // While the definition of LLVM vectors is bitpacked, we don't support sizes
+ // that aren't byte sized.
+ if (ElementSize % 8)
return false;
+ assert((DL.getTypeSizeInBits(VTy) % 8) == 0 &&
+ "vector size not a multiple of element size?");
+ ElementSize /= 8;
- return true;
+ for (const auto &S : Slices)
+ if (!isVectorPromotionViableForSlice(DL, SliceBeginOffset, SliceEndOffset,
+ VTy, ElementSize, S))
+ return false;
+
+ for (const auto &SI : SplitUses)
+ if (!isVectorPromotionViableForSlice(DL, SliceBeginOffset, SliceEndOffset,
+ VTy, ElementSize, *SI))
+ return false;
+
+ return true;
+ };
+ for (VectorType *VTy : CandidateTys)
+ if (CheckVectorTypeForPromotion(VTy))
+ return VTy;
+
+ return nullptr;
}
/// \brief Test whether a slice of an alloca is valid for integer widening.
@@ -1737,23 +1806,26 @@ isVectorPromotionViable(const DataLayout &DL, Type *AllocaTy, AllocaSlices &S,
static bool isIntegerWideningViableForSlice(const DataLayout &DL,
Type *AllocaTy,
uint64_t AllocBeginOffset,
- uint64_t Size, AllocaSlices &S,
- AllocaSlices::const_iterator I,
+ uint64_t Size,
+ const Slice &S,
bool &WholeAllocaOp) {
- uint64_t RelBegin = I->beginOffset() - AllocBeginOffset;
- uint64_t RelEnd = I->endOffset() - AllocBeginOffset;
+ uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
+ uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
// We can't reasonably handle cases where the load or store extends past
// the end of the aloca's type and into its padding.
if (RelEnd > Size)
return false;
- Use *U = I->getUse();
+ Use *U = S.getUse();
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
- if (RelBegin == 0 && RelEnd == Size)
+ // Note that we don't count vector loads or stores as whole-alloca
+ // operations which enable integer widening because we would prefer to use
+ // vector widening instead.
+ if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
@@ -1768,7 +1840,10 @@ static bool isIntegerWideningViableForSlice(const DataLayout &DL,
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
- if (RelBegin == 0 && RelEnd == Size)
+ // Note that we don't count vector loads or stores as whole-alloca
+ // operations which enable integer widening because we would prefer to use
+ // vector widening instead.
+ if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
@@ -1782,7 +1857,7 @@ static bool isIntegerWideningViableForSlice(const DataLayout &DL,
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (!I->isSplittable())
+ if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -1803,9 +1878,8 @@ static bool isIntegerWideningViableForSlice(const DataLayout &DL,
/// promote the resulting alloca.
static bool
isIntegerWideningViable(const DataLayout &DL, Type *AllocaTy,
- uint64_t AllocBeginOffset, AllocaSlices &S,
- AllocaSlices::const_iterator I,
- AllocaSlices::const_iterator E,
+ uint64_t AllocBeginOffset,
+ AllocaSlices::const_range Slices,
ArrayRef<AllocaSlices::iterator> SplitUses) {
uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
// Don't create integer types larger than the maximum bitwidth.
@@ -1831,18 +1905,17 @@ isIntegerWideningViable(const DataLayout &DL, Type *AllocaTy,
// promote due to some other unsplittable entry (which we may make splittable
// later). However, if there are only splittable uses, go ahead and assume
// that we cover the alloca.
- bool WholeAllocaOp = (I != E) ? false : DL.isLegalInteger(SizeInBits);
+ bool WholeAllocaOp =
+ Slices.begin() != Slices.end() ? false : DL.isLegalInteger(SizeInBits);
- for (; I != E; ++I)
+ for (const auto &S : Slices)
if (!isIntegerWideningViableForSlice(DL, AllocaTy, AllocBeginOffset, Size,
- S, I, WholeAllocaOp))
+ S, WholeAllocaOp))
return false;
- for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
- SUE = SplitUses.end();
- SUI != SUE; ++SUI)
+ for (const auto &SI : SplitUses)
if (!isIntegerWideningViableForSlice(DL, AllocaTy, AllocBeginOffset, Size,
- S, *SUI, WholeAllocaOp))
+ *SI, WholeAllocaOp))
return false;
return WholeAllocaOp;
@@ -1991,12 +2064,18 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
const DataLayout &DL;
- AllocaSlices &S;
+ AllocaSlices &AS;
SROA &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
Type *NewAllocaTy;
+ // This is a convenience and flag variable that will be null unless the new
+ // alloca's integer operations should be widened to this integer type due to
+ // passing isIntegerWideningViable above. If it is non-null, the desired
+ // integer type will be stored here for easy access during rewriting.
+ IntegerType *IntTy;
+
// If we are rewriting an alloca partition which can be written as pure
// vector operations, we stash extra information here. When VecTy is
// non-null, we have some strict guarantees about the rewritten alloca:
@@ -2010,12 +2089,6 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Type *ElementTy;
uint64_t ElementSize;
- // This is a convenience and flag variable that will be null unless the new
- // alloca's integer operations should be widened to this integer type due to
- // passing isIntegerWideningViable above. If it is non-null, the desired
- // integer type will be stored here for easy access during rewriting.
- IntegerType *IntTy;
-
// The original offset of the slice currently being rewritten relative to
// the original alloca.
uint64_t BeginOffset, EndOffset;
@@ -2038,25 +2111,25 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
IRBuilderTy IRB;
public:
- AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
+ AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
uint64_t NewAllocaBeginOffset,
- uint64_t NewAllocaEndOffset, bool IsVectorPromotable,
- bool IsIntegerPromotable,
+ uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
+ VectorType *PromotableVecTy,
SmallPtrSetImpl<PHINode *> &PHIUsers,
SmallPtrSetImpl<SelectInst *> &SelectUsers)
- : DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
+ : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
- VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr),
- ElementTy(VecTy ? VecTy->getElementType() : nullptr),
- ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
IntTy(IsIntegerPromotable
? Type::getIntNTy(
NewAI.getContext(),
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
: nullptr),
+ VecTy(PromotableVecTy),
+ ElementTy(VecTy ? VecTy->getElementType() : nullptr),
+ ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
@@ -2065,8 +2138,7 @@ public:
"Only multiple-of-8 sized vector elements are viable");
++NumVectorized;
}
- assert((!IsVectorPromotable && !IsIntegerPromotable) ||
- IsVectorPromotable != IsIntegerPromotable);
+ assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
}
bool visit(AllocaSlices::const_iterator I) {
@@ -2413,6 +2485,7 @@ private:
if (!VecTy && !IntTy &&
(BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset ||
+ SliceSize != DL.getTypeStoreSize(AllocaTy) ||
!AllocaTy->isSingleValueType() ||
!DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
DL.getTypeSizeInBits(ScalarTy)%8 != 0)) {
@@ -2535,10 +2608,11 @@ private:
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memcpy.
- bool EmitMemCpy
- = !VecTy && !IntTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset ||
- !NewAI.getAllocatedType()->isSingleValueType());
+ bool EmitMemCpy =
+ !VecTy && !IntTy &&
+ (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
+ SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()) ||
+ !NewAI.getAllocatedType()->isSingleValueType());
// If we're just going to emit a memcpy, the alloca hasn't changed, and the
// size hasn't been shrunk based on analysis of the viable range, this is
@@ -2697,7 +2771,10 @@ private:
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
IRBuilderTy PtrBuilder(IRB);
- PtrBuilder.SetInsertPoint(OldPtr);
+ if (isa<PHINode>(OldPtr))
+ PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
+ else
+ PtrBuilder.SetInsertPoint(OldPtr);
PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
@@ -2784,7 +2861,7 @@ private:
/// This uses a set to de-duplicate users.
void enqueueUsers(Instruction &I) {
for (Use &U : I.uses())
- if (Visited.insert(U.getUser()))
+ if (Visited.insert(U.getUser()).second)
Queue.push_back(&U);
}
@@ -3104,7 +3181,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
+bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
AllocaSlices::iterator B, AllocaSlices::iterator E,
int64_t BeginOffset, int64_t EndOffset,
ArrayRef<AllocaSlices::iterator> SplitUses) {
@@ -3130,12 +3207,16 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
SliceTy = ArrayType::get(Type::getInt8Ty(*C), SliceSize);
assert(DL->getTypeAllocSize(SliceTy) >= SliceSize);
- bool IsVectorPromotable = isVectorPromotionViable(
- *DL, SliceTy, S, BeginOffset, EndOffset, B, E, SplitUses);
+ bool IsIntegerPromotable = isIntegerWideningViable(
+ *DL, SliceTy, BeginOffset, AllocaSlices::const_range(B, E), SplitUses);
- bool IsIntegerPromotable =
- !IsVectorPromotable &&
- isIntegerWideningViable(*DL, SliceTy, BeginOffset, S, B, E, SplitUses);
+ VectorType *VecTy =
+ IsIntegerPromotable
+ ? nullptr
+ : isVectorPromotionViable(*DL, SliceTy, BeginOffset, EndOffset,
+ AllocaSlices::const_range(B, E), SplitUses);
+ if (VecTy)
+ SliceTy = VecTy;
// Check for the case where we're going to rewrite to a new alloca of the
// exact same type as the original, and with the same access offsets. In that
@@ -3161,8 +3242,9 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// the alloca's alignment unconstrained.
if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
- NewAI = new AllocaInst(SliceTy, nullptr, Alignment,
- AI.getName() + ".sroa." + Twine(B - S.begin()), &AI);
+ NewAI =
+ new AllocaInst(SliceTy, nullptr, Alignment,
+ AI.getName() + ".sroa." + Twine(B - AS.begin()), &AI);
++NumNewAllocas;
}
@@ -3178,21 +3260,19 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
SmallPtrSet<PHINode *, 8> PHIUsers;
SmallPtrSet<SelectInst *, 8> SelectUsers;
- AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
- EndOffset, IsVectorPromotable,
- IsIntegerPromotable, PHIUsers, SelectUsers);
+ AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, BeginOffset,
+ EndOffset, IsIntegerPromotable, VecTy, PHIUsers,
+ SelectUsers);
bool Promotable = true;
- for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
- SUE = SplitUses.end();
- SUI != SUE; ++SUI) {
+ for (auto & SplitUse : SplitUses) {
DEBUG(dbgs() << " rewriting split ");
- DEBUG(S.printSlice(dbgs(), *SUI, ""));
- Promotable &= Rewriter.visit(*SUI);
+ DEBUG(AS.printSlice(dbgs(), SplitUse, ""));
+ Promotable &= Rewriter.visit(SplitUse);
++NumUses;
}
for (AllocaSlices::iterator I = B; I != E; ++I) {
DEBUG(dbgs() << " rewriting ");
- DEBUG(S.printSlice(dbgs(), I, ""));
+ DEBUG(AS.printSlice(dbgs(), I, ""));
Promotable &= Rewriter.visit(I);
++NumUses;
}
@@ -3230,14 +3310,10 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// If we have either PHIs or Selects to speculate, add them to those
// worklists and re-queue the new alloca so that we promote in on the
// next iteration.
- for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
- E = PHIUsers.end();
- I != E; ++I)
- SpeculatablePHIs.insert(*I);
- for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
- E = SelectUsers.end();
- I != E; ++I)
- SpeculatableSelects.insert(*I);
+ for (PHINode *PHIUser : PHIUsers)
+ SpeculatablePHIs.insert(PHIUser);
+ for (SelectInst *SelectUser : SelectUsers)
+ SpeculatableSelects.insert(SelectUser);
Worklist.insert(NewAI);
}
} else {
@@ -3275,17 +3351,15 @@ removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
// Recompute the max. While this is linear, so is remove_if.
MaxSplitUseEndOffset = 0;
- for (SmallVectorImpl<AllocaSlices::iterator>::iterator
- SUI = SplitUses.begin(),
- SUE = SplitUses.end();
- SUI != SUE; ++SUI)
- MaxSplitUseEndOffset = std::max((*SUI)->endOffset(), MaxSplitUseEndOffset);
+ for (AllocaSlices::iterator SplitUse : SplitUses)
+ MaxSplitUseEndOffset =
+ std::max(SplitUse->endOffset(), MaxSplitUseEndOffset);
}
/// \brief Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
-bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
- if (S.begin() == S.end())
+bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
+ if (AS.begin() == AS.end())
return false;
unsigned NumPartitions = 0;
@@ -3293,9 +3367,10 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
SmallVector<AllocaSlices::iterator, 4> SplitUses;
uint64_t MaxSplitUseEndOffset = 0;
- uint64_t BeginOffset = S.begin()->beginOffset();
+ uint64_t BeginOffset = AS.begin()->beginOffset();
- for (AllocaSlices::iterator SI = S.begin(), SJ = std::next(SI), SE = S.end();
+ for (AllocaSlices::iterator SI = AS.begin(), SJ = std::next(SI),
+ SE = AS.end();
SI != SE; SI = SJ) {
uint64_t MaxEndOffset = SI->endOffset();
@@ -3333,8 +3408,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
// we'll have to rewrite uses and erase old split uses.
if (BeginOffset < MaxEndOffset) {
// Rewrite a sequence of overlapping slices.
- Changed |=
- rewritePartition(AI, S, SI, SJ, BeginOffset, MaxEndOffset, SplitUses);
+ Changed |= rewritePartition(AI, AS, SI, SJ, BeginOffset, MaxEndOffset,
+ SplitUses);
++NumPartitions;
removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset, MaxEndOffset);
@@ -3373,8 +3448,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
uint64_t PostSplitEndOffset =
SJ == SE ? MaxSplitUseEndOffset : SJ->beginOffset();
- Changed |= rewritePartition(AI, S, SJ, SJ, MaxEndOffset, PostSplitEndOffset,
- SplitUses);
+ Changed |= rewritePartition(AI, AS, SJ, SJ, MaxEndOffset,
+ PostSplitEndOffset, SplitUses);
++NumPartitions;
if (SJ == SE)
@@ -3437,38 +3512,34 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
- AllocaSlices S(*DL, AI);
- DEBUG(S.print(dbgs()));
- if (S.isEscaped())
+ AllocaSlices AS(*DL, AI);
+ DEBUG(AS.print(dbgs()));
+ if (AS.isEscaped())
return Changed;
// Delete all the dead users of this alloca before splitting and rewriting it.
- for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
- DE = S.dead_user_end();
- DI != DE; ++DI) {
+ for (Instruction *DeadUser : AS.getDeadUsers()) {
// Free up everything used by this instruction.
- for (Use &DeadOp : (*DI)->operands())
+ for (Use &DeadOp : DeadUser->operands())
clobberUse(DeadOp);
// Now replace the uses of this instruction.
- (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
+ DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType()));
// And mark it for deletion.
- DeadInsts.insert(*DI);
+ DeadInsts.insert(DeadUser);
Changed = true;
}
- for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
- DE = S.dead_op_end();
- DO != DE; ++DO) {
- clobberUse(**DO);
+ for (Use *DeadOp : AS.getDeadOperands()) {
+ clobberUse(*DeadOp);
Changed = true;
}
// No slices to split. Leave the dead alloca for a later pass to clean up.
- if (S.begin() == S.end())
+ if (AS.begin() == AS.end())
return Changed;
- Changed |= splitAlloca(AI, S);
+ Changed |= splitAlloca(AI, AS);
DEBUG(dbgs() << " Speculating PHIs\n");
while (!SpeculatablePHIs.empty())
@@ -3490,7 +3561,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
///
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
-void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
+void SROA::deleteDeadInstructions(SmallPtrSetImpl<AllocaInst*> &DeletedAllocas) {
while (!DeadInsts.empty()) {
Instruction *I = DeadInsts.pop_back_val();
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
@@ -3515,9 +3586,9 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
static void enqueueUsersInWorklist(Instruction &I,
SmallVectorImpl<Instruction *> &Worklist,
- SmallPtrSet<Instruction *, 8> &Visited) {
+ SmallPtrSetImpl<Instruction *> &Visited) {
for (User *U : I.users())
- if (Visited.insert(cast<Instruction>(U)))
+ if (Visited.insert(cast<Instruction>(U)).second)
Worklist.push_back(cast<Instruction>(U));
}
@@ -3537,7 +3608,7 @@ bool SROA::promoteAllocas(Function &F) {
if (DT && !ForceSSAUpdater) {
DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT);
+ PromoteMemToReg(PromotableAllocas, *DT, nullptr, AT);
PromotableAllocas.clear();
return true;
}
@@ -3619,6 +3690,7 @@ bool SROA::runOnFunction(Function &F) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ AT = &getAnalysis<AssumptionTracker>();
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
@@ -3662,6 +3734,7 @@ bool SROA::runOnFunction(Function &F) {
}
void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AssumptionTracker>();
if (RequiresDomTree)
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 73c97ff..179bbf7 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -26,7 +26,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
@@ -42,15 +41,14 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
using namespace llvm;
+using namespace sampleprof;
#define DEBUG_TYPE "sample-profile"
@@ -65,76 +63,48 @@ static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
"sample block/edge weights through the CFG."));
namespace {
-/// \brief Represents the relative location of an instruction.
-///
-/// Instruction locations are specified by the line offset from the
-/// beginning of the function (marked by the line where the function
-/// header is) and the discriminator value within that line.
-///
-/// The discriminator value is useful to distinguish instructions
-/// that are on the same line but belong to different basic blocks
-/// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
-struct InstructionLocation {
- InstructionLocation(int L, unsigned D) : LineOffset(L), Discriminator(D) {}
- int LineOffset;
- unsigned Discriminator;
-};
-}
-
-namespace llvm {
-template <> struct DenseMapInfo<InstructionLocation> {
- typedef DenseMapInfo<int> OffsetInfo;
- typedef DenseMapInfo<unsigned> DiscriminatorInfo;
- static inline InstructionLocation getEmptyKey() {
- return InstructionLocation(OffsetInfo::getEmptyKey(),
- DiscriminatorInfo::getEmptyKey());
- }
- static inline InstructionLocation getTombstoneKey() {
- return InstructionLocation(OffsetInfo::getTombstoneKey(),
- DiscriminatorInfo::getTombstoneKey());
- }
- static inline unsigned getHashValue(InstructionLocation Val) {
- return DenseMapInfo<std::pair<int, unsigned>>::getHashValue(
- std::pair<int, unsigned>(Val.LineOffset, Val.Discriminator));
- }
- static inline bool isEqual(InstructionLocation LHS, InstructionLocation RHS) {
- return LHS.LineOffset == RHS.LineOffset &&
- LHS.Discriminator == RHS.Discriminator;
- }
-};
-}
-
-namespace {
-typedef DenseMap<InstructionLocation, unsigned> BodySampleMap;
typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
typedef std::pair<BasicBlock *, BasicBlock *> Edge;
typedef DenseMap<Edge, unsigned> EdgeWeightMap;
typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
-/// \brief Representation of the runtime profile for a function.
+/// \brief Sample profile pass.
///
-/// This data structure contains the runtime profile for a given
-/// function. It contains the total number of samples collected
-/// in the function and a map of samples collected in every statement.
-class SampleFunctionProfile {
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public FunctionPass {
public:
- SampleFunctionProfile()
- : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr),
- PDT(nullptr), LI(nullptr), Ctx(nullptr) {}
+ // Class identification, replacement for typeinfo
+ static char ID;
+
+ SampleProfileLoader(StringRef Name = SampleProfileFile)
+ : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
+ Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
+ initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+
+ void dump() { Reader->dump(); }
+
+ const char *getPassName() const override { return "Sample profile pass"; }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ }
+protected:
unsigned getFunctionLoc(Function &F);
- bool emitAnnotations(Function &F, DominatorTree *DomTree,
- PostDominatorTree *PostDomTree, LoopInfo *Loops);
+ bool emitAnnotations(Function &F);
unsigned getInstWeight(Instruction &I);
- unsigned getBlockWeight(BasicBlock *B);
- void addTotalSamples(unsigned Num) { TotalSamples += Num; }
- void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; }
- void addBodySamples(int LineOffset, unsigned Discriminator, unsigned Num) {
- assert(LineOffset >= 0);
- BodySamples[InstructionLocation(LineOffset, Discriminator)] += Num;
- }
- void print(raw_ostream &OS);
+ unsigned getBlockWeight(BasicBlock *BB);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
@@ -147,32 +117,11 @@ public:
unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F);
- bool empty() { return BodySamples.empty(); }
-protected:
- /// \brief Total number of samples collected inside this function.
- ///
- /// Samples are cumulative, they include all the samples collected
- /// inside this function and all its inlined callees.
- unsigned TotalSamples;
-
- /// \brief Total number of samples collected at the head of the function.
- /// FIXME: Use head samples to estimate a cold/hot attribute for the function.
- unsigned TotalHeadSamples;
-
- /// \brief Line number for the function header. Used to compute relative
- /// line numbers from the absolute line LOCs found in instruction locations.
- /// The relative line numbers are needed to address the samples from the
- /// profile file.
+ /// \brief Line number for the function header. Used to compute absolute
+ /// line numbers from the relative line numbers found in the profile.
unsigned HeaderLineno;
- /// \brief Map line offsets to collected samples.
- ///
- /// Each entry in this map contains the number of samples
- /// collected at the corresponding line offset. All line locations
- /// are an offset from the start of the function.
- BodySampleMap BodySamples;
-
/// \brief Map basic blocks to their computed weights.
///
/// The weight of a basic block is defined to be the maximum
@@ -212,105 +161,12 @@ protected:
/// \brief LLVM context holding the debug data we need.
LLVMContext *Ctx;
-};
-
-/// \brief Sample-based profile reader.
-///
-/// Each profile contains sample counts for all the functions
-/// executed. Inside each function, statements are annotated with the
-/// collected samples on all the instructions associated with that
-/// statement.
-///
-/// For this to produce meaningful data, the program needs to be
-/// compiled with some debug information (at minimum, line numbers:
-/// -gline-tables-only). Otherwise, it will be impossible to match IR
-/// instructions to the line numbers collected by the profiler.
-///
-/// From the profile file, we are interested in collecting the
-/// following information:
-///
-/// * A list of functions included in the profile (mangled names).
-///
-/// * For each function F:
-/// 1. The total number of samples collected in F.
-///
-/// 2. The samples collected at each line in F. To provide some
-/// protection against source code shuffling, line numbers should
-/// be relative to the start of the function.
-class SampleModuleProfile {
-public:
- SampleModuleProfile(const Module &M, StringRef F)
- : Profiles(0), Filename(F), M(M) {}
-
- void dump();
- bool loadText();
- void loadNative() { llvm_unreachable("not implemented"); }
- void printFunctionProfile(raw_ostream &OS, StringRef FName);
- void dumpFunctionProfile(StringRef FName);
- SampleFunctionProfile &getProfile(const Function &F) {
- return Profiles[F.getName()];
- }
- /// \brief Report a parse error message.
- void reportParseError(int64_t LineNumber, Twine Msg) const {
- DiagnosticInfoSampleProfile Diag(Filename.data(), LineNumber, Msg);
- M.getContext().diagnose(Diag);
- }
-
-protected:
- /// \brief Map every function to its associated profile.
- ///
- /// The profile of every function executed at runtime is collected
- /// in the structure SampleFunctionProfile. This maps function objects
- /// to their corresponding profiles.
- StringMap<SampleFunctionProfile> Profiles;
-
- /// \brief Path name to the file holding the profile data.
- ///
- /// The format of this file is defined by each profiler
- /// independently. If possible, the profiler should have a text
- /// version of the profile format to be used in constructing test
- /// cases and debugging.
- StringRef Filename;
-
- /// \brief Module being compiled. Used mainly to access the current
- /// LLVM context for diagnostics.
- const Module &M;
-};
-
-/// \brief Sample profile pass.
-///
-/// This pass reads profile data from the file specified by
-/// -sample-profile-file and annotates every affected function with the
-/// profile information found in that file.
-class SampleProfileLoader : public FunctionPass {
-public:
- // Class identification, replacement for typeinfo
- static char ID;
-
- SampleProfileLoader(StringRef Name = SampleProfileFile)
- : FunctionPass(ID), Profiler(), Filename(Name), ProfileIsValid(false) {
- initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
- }
-
- bool doInitialization(Module &M) override;
-
- void dump() { Profiler->dump(); }
-
- const char *getPassName() const override { return "Sample profile pass"; }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<LoopInfo>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTree>();
- }
-
-protected:
/// \brief Profile reader object.
- std::unique_ptr<SampleModuleProfile> Profiler;
+ std::unique_ptr<SampleProfileReader> Reader;
+
+ /// \brief Samples collected for the body of this function.
+ FunctionSamples *Samples;
/// \brief Name of the profile file to load.
StringRef Filename;
@@ -320,26 +176,11 @@ protected:
};
}
-/// \brief Print this function profile on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-void SampleFunctionProfile::print(raw_ostream &OS) {
- OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
- << " sampled lines\n";
- for (BodySampleMap::const_iterator SI = BodySamples.begin(),
- SE = BodySamples.end();
- SI != SE; ++SI)
- OS << "\tline offset: " << SI->first.LineOffset
- << ", discriminator: " << SI->first.Discriminator
- << ", number of samples: " << SI->second << "\n";
- OS << "\n";
-}
-
/// \brief Print the weight of edge \p E on stream \p OS.
///
/// \param OS Stream to emit the output to.
/// \param E Edge to print.
-void SampleFunctionProfile::printEdgeWeight(raw_ostream &OS, Edge E) {
+void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
OS << "weight[" << E.first->getName() << "->" << E.second->getName()
<< "]: " << EdgeWeights[E] << "\n";
}
@@ -348,8 +189,8 @@ void SampleFunctionProfile::printEdgeWeight(raw_ostream &OS, Edge E) {
///
/// \param OS Stream to emit the output to.
/// \param BB Block to print.
-void SampleFunctionProfile::printBlockEquivalence(raw_ostream &OS,
- BasicBlock *BB) {
+void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
+ BasicBlock *BB) {
BasicBlock *Equiv = EquivalenceClass[BB];
OS << "equivalence[" << BB->getName()
<< "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
@@ -359,174 +200,10 @@ void SampleFunctionProfile::printBlockEquivalence(raw_ostream &OS,
///
/// \param OS Stream to emit the output to.
/// \param BB Block to print.
-void SampleFunctionProfile::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
+void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
}
-/// \brief Print the function profile for \p FName on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-/// \param FName Name of the function to print.
-void SampleModuleProfile::printFunctionProfile(raw_ostream &OS,
- StringRef FName) {
- OS << "Function: " << FName << ":\n";
- Profiles[FName].print(OS);
-}
-
-/// \brief Dump the function profile for \p FName.
-///
-/// \param FName Name of the function to print.
-void SampleModuleProfile::dumpFunctionProfile(StringRef FName) {
- printFunctionProfile(dbgs(), FName);
-}
-
-/// \brief Dump all the function profiles found.
-void SampleModuleProfile::dump() {
- for (StringMap<SampleFunctionProfile>::const_iterator I = Profiles.begin(),
- E = Profiles.end();
- I != E; ++I)
- dumpFunctionProfile(I->getKey());
-}
-
-/// \brief Load samples from a text file.
-///
-/// The file contains a list of samples for every function executed at
-/// runtime. Each function profile has the following format:
-///
-/// function1:total_samples:total_head_samples
-/// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
-/// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
-/// ...
-/// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
-///
-/// Function names must be mangled in order for the profile loader to
-/// match them in the current translation unit. The two numbers in the
-/// function header specify how many total samples were accumulated in
-/// the function (first number), and the total number of samples accumulated
-/// at the prologue of the function (second number). This head sample
-/// count provides an indicator of how frequent is the function invoked.
-///
-/// Each sampled line may contain several items. Some are optional
-/// (marked below):
-///
-/// a- Source line offset. This number represents the line number
-/// in the function where the sample was collected. The line number
-/// is always relative to the line where symbol of the function
-/// is defined. So, if the function has its header at line 280,
-/// the offset 13 is at line 293 in the file.
-///
-/// b- [OPTIONAL] Discriminator. This is used if the sampled program
-/// was compiled with DWARF discriminator support
-/// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators)
-///
-/// c- Number of samples. This is the number of samples collected by
-/// the profiler at this source location.
-///
-/// d- [OPTIONAL] Potential call targets and samples. If present, this
-/// line contains a call instruction. This models both direct and
-/// indirect calls. Each called target is listed together with the
-/// number of samples. For example,
-///
-/// 130: 7 foo:3 bar:2 baz:7
-///
-/// The above means that at relative line offset 130 there is a
-/// call instruction that calls one of foo(), bar() and baz(). With
-/// baz() being the relatively more frequent call target.
-///
-/// FIXME: This is currently unhandled, but it has a lot of
-/// potential for aiding the inliner.
-///
-///
-/// Since this is a flat profile, a function that shows up more than
-/// once gets all its samples aggregated across all its instances.
-///
-/// FIXME: flat profiles are too imprecise to provide good optimization
-/// opportunities. Convert them to context-sensitive profile.
-///
-/// This textual representation is useful to generate unit tests and
-/// for debugging purposes, but it should not be used to generate
-/// profiles for large programs, as the representation is extremely
-/// inefficient.
-///
-/// \returns true if the file was loaded successfully, false otherwise.
-bool SampleModuleProfile::loadText() {
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
- MemoryBuffer::getFile(Filename);
- if (std::error_code EC = BufferOrErr.getError()) {
- std::string Msg(EC.message());
- M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
- return false;
- }
- std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
- line_iterator LineIt(*Buffer, '#');
-
- // Read the profile of each function. Since each function may be
- // mentioned more than once, and we are collecting flat profiles,
- // accumulate samples as we parse them.
- Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
- Regex LineSample("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
- while (!LineIt.is_at_eof()) {
- // Read the header of each function.
- //
- // Note that for function identifiers we are actually expecting
- // mangled names, but we may not always get them. This happens when
- // the compiler decides not to emit the function (e.g., it was inlined
- // and removed). In this case, the binary will not have the linkage
- // name for the function, so the profiler will emit the function's
- // unmangled name, which may contain characters like ':' and '>' in its
- // name (member functions, templates, etc).
- //
- // The only requirement we place on the identifier, then, is that it
- // should not begin with a number.
- SmallVector<StringRef, 3> Matches;
- if (!HeadRE.match(*LineIt, &Matches)) {
- reportParseError(LineIt.line_number(),
- "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
- return false;
- }
- assert(Matches.size() == 4);
- StringRef FName = Matches[1];
- unsigned NumSamples, NumHeadSamples;
- Matches[2].getAsInteger(10, NumSamples);
- Matches[3].getAsInteger(10, NumHeadSamples);
- Profiles[FName] = SampleFunctionProfile();
- SampleFunctionProfile &FProfile = Profiles[FName];
- FProfile.addTotalSamples(NumSamples);
- FProfile.addHeadSamples(NumHeadSamples);
- ++LineIt;
-
- // Now read the body. The body of the function ends when we reach
- // EOF or when we see the start of the next function.
- while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
- if (!LineSample.match(*LineIt, &Matches)) {
- reportParseError(
- LineIt.line_number(),
- "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt);
- return false;
- }
- assert(Matches.size() == 5);
- unsigned LineOffset, NumSamples, Discriminator = 0;
- Matches[1].getAsInteger(10, LineOffset);
- if (Matches[2] != "")
- Matches[2].getAsInteger(10, Discriminator);
- Matches[3].getAsInteger(10, NumSamples);
-
- // FIXME: Handle called targets (in Matches[4]).
-
- // When dealing with instruction weights, we use the value
- // zero to indicate the absence of a sample. If we read an
- // actual zero from the profile file, return it as 1 to
- // avoid the confusion later on.
- if (NumSamples == 0)
- NumSamples = 1;
- FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
- ++LineIt;
- }
- }
-
- return true;
-}
-
/// \brief Get the weight for an instruction.
///
/// The "weight" of an instruction \p Inst is the number of samples
@@ -538,7 +215,7 @@ bool SampleModuleProfile::loadText() {
/// \param Inst Instruction to query.
///
/// \returns The profiled weight of I.
-unsigned SampleFunctionProfile::getInstWeight(Instruction &Inst) {
+unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
DebugLoc DLoc = Inst.getDebugLoc();
unsigned Lineno = DLoc.getLine();
if (Lineno < HeaderLineno)
@@ -547,8 +224,7 @@ unsigned SampleFunctionProfile::getInstWeight(Instruction &Inst) {
DILocation DIL(DLoc.getAsMDNode(*Ctx));
int LOffset = Lineno - HeaderLineno;
unsigned Discriminator = DIL.getDiscriminator();
- unsigned Weight =
- BodySamples.lookup(InstructionLocation(LOffset, Discriminator));
+ unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
<< " (line offset: " << LOffset << "." << Discriminator
<< " - weight: " << Weight << ")\n");
@@ -557,24 +233,24 @@ unsigned SampleFunctionProfile::getInstWeight(Instruction &Inst) {
/// \brief Compute the weight of a basic block.
///
-/// The weight of basic block \p B is the maximum weight of all the
-/// instructions in B. The weight of \p B is computed and cached in
+/// The weight of basic block \p BB is the maximum weight of all the
+/// instructions in BB. The weight of \p BB is computed and cached in
/// the BlockWeights map.
///
-/// \param B The basic block to query.
+/// \param BB The basic block to query.
///
-/// \returns The computed weight of B.
-unsigned SampleFunctionProfile::getBlockWeight(BasicBlock *B) {
- // If we've computed B's weight before, return it.
+/// \returns The computed weight of BB.
+unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
+ // If we've computed BB's weight before, return it.
std::pair<BlockWeightMap::iterator, bool> Entry =
- BlockWeights.insert(std::make_pair(B, 0));
+ BlockWeights.insert(std::make_pair(BB, 0));
if (!Entry.second)
return Entry.first->second;
- // Otherwise, compute and cache B's weight.
+ // Otherwise, compute and cache BB's weight.
unsigned Weight = 0;
- for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
- unsigned InstWeight = getInstWeight(*I);
+ for (auto &I : BB->getInstList()) {
+ unsigned InstWeight = getInstWeight(I);
if (InstWeight > Weight)
Weight = InstWeight;
}
@@ -588,13 +264,13 @@ unsigned SampleFunctionProfile::getBlockWeight(BasicBlock *B) {
/// the weights of every basic block in the CFG.
///
/// \param F The function to query.
-bool SampleFunctionProfile::computeBlockWeights(Function &F) {
+bool SampleProfileLoader::computeBlockWeights(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "Block weights\n");
- for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
- unsigned Weight = getBlockWeight(B);
+ for (auto &BB : F) {
+ unsigned Weight = getBlockWeight(&BB);
Changed |= (Weight > 0);
- DEBUG(printBlockWeight(dbgs(), B));
+ DEBUG(printBlockWeight(dbgs(), &BB));
}
return Changed;
@@ -623,16 +299,13 @@ bool SampleFunctionProfile::computeBlockWeights(Function &F) {
/// \param DomTree Opposite dominator tree. If \p Descendants is filled
/// with blocks from \p BB1's dominator tree, then
/// this is the post-dominator tree, and vice versa.
-void SampleFunctionProfile::findEquivalencesFor(
+void SampleProfileLoader::findEquivalencesFor(
BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
DominatorTreeBase<BasicBlock> *DomTree) {
- for (SmallVectorImpl<BasicBlock *>::iterator I = Descendants.begin(),
- E = Descendants.end();
- I != E; ++I) {
- BasicBlock *BB2 = *I;
+ for (auto *BB2 : Descendants) {
bool IsDomParent = DomTree->dominates(BB2, BB1);
bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
- if (BB1 != BB2 && VisitedBlocks.insert(BB2) && IsDomParent &&
+ if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
IsInSameLoop) {
EquivalenceClass[BB2] = BB1;
@@ -660,12 +333,12 @@ void SampleFunctionProfile::findEquivalencesFor(
/// dominates B2, B2 post-dominates B1 and both are in the same loop.
///
/// \param F The function to query.
-void SampleFunctionProfile::findEquivalenceClasses(Function &F) {
+void SampleProfileLoader::findEquivalenceClasses(Function &F) {
SmallVector<BasicBlock *, 8> DominatedBBs;
DEBUG(dbgs() << "\nBlock equivalence classes\n");
// Find equivalence sets based on dominance and post-dominance information.
- for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
- BasicBlock *BB1 = B;
+ for (auto &BB : F) {
+ BasicBlock *BB1 = &BB;
// Compute BB1's equivalence class once.
if (EquivalenceClass.count(BB1)) {
@@ -712,8 +385,8 @@ void SampleFunctionProfile::findEquivalenceClasses(Function &F) {
// each equivalence class has the largest weight, assign that weight
// to all the blocks in that equivalence class.
DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
- for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
- BasicBlock *BB = B;
+ for (auto &BI : F) {
+ BasicBlock *BB = &BI;
BasicBlock *EquivBB = EquivalenceClass[BB];
if (BB != EquivBB)
BlockWeights[BB] = BlockWeights[EquivBB];
@@ -731,8 +404,8 @@ void SampleFunctionProfile::findEquivalenceClasses(Function &F) {
/// \param UnknownEdge Set if E has not been visited before.
///
/// \returns E's weight, if known. Otherwise, return 0.
-unsigned SampleFunctionProfile::visitEdge(Edge E, unsigned *NumUnknownEdges,
- Edge *UnknownEdge) {
+unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
+ Edge *UnknownEdge) {
if (!VisitedEdges.count(E)) {
(*NumUnknownEdges)++;
*UnknownEdge = E;
@@ -753,11 +426,11 @@ unsigned SampleFunctionProfile::visitEdge(Edge E, unsigned *NumUnknownEdges,
/// \param F Function to process.
///
/// \returns True if new weights were assigned to edges or blocks.
-bool SampleFunctionProfile::propagateThroughEdges(Function &F) {
+bool SampleProfileLoader::propagateThroughEdges(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "\nPropagation through edges\n");
- for (Function::iterator BI = F.begin(), EI = F.end(); BI != EI; ++BI) {
- BasicBlock *BB = BI;
+ for (auto &BI : F) {
+ BasicBlock *BB = &BI;
// Visit all the predecessor and successor edges to determine
// which ones have a weight assigned already. Note that it doesn't
@@ -771,16 +444,16 @@ bool SampleFunctionProfile::propagateThroughEdges(Function &F) {
if (i == 0) {
// First, visit all predecessor edges.
- for (size_t I = 0; I < Predecessors[BB].size(); I++) {
- Edge E = std::make_pair(Predecessors[BB][I], BB);
+ for (auto *Pred : Predecessors[BB]) {
+ Edge E = std::make_pair(Pred, BB);
TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
if (E.first == E.second)
SelfReferentialEdge = E;
}
} else {
// On the second round, visit all successor edges.
- for (size_t I = 0; I < Successors[BB].size(); I++) {
- Edge E = std::make_pair(BB, Successors[BB][I]);
+ for (auto *Succ : Successors[BB]) {
+ Edge E = std::make_pair(BB, Succ);
TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
}
}
@@ -821,7 +494,7 @@ bool SampleFunctionProfile::propagateThroughEdges(Function &F) {
<< " known. Set weight for block: ";
printBlockWeight(dbgs(), BB););
}
- if (VisitedBlocks.insert(BB))
+ if (VisitedBlocks.insert(BB).second)
Changed = true;
} else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
// If there is a single unknown edge and the block has been
@@ -857,9 +530,9 @@ bool SampleFunctionProfile::propagateThroughEdges(Function &F) {
///
/// We are interested in unique edges. If a block B1 has multiple
/// edges to another block B2, we only add a single B1->B2 edge.
-void SampleFunctionProfile::buildEdges(Function &F) {
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *B1 = I;
+void SampleProfileLoader::buildEdges(Function &F) {
+ for (auto &BI : F) {
+ BasicBlock *B1 = &BI;
// Add predecessors for B1.
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -867,7 +540,7 @@ void SampleFunctionProfile::buildEdges(Function &F) {
llvm_unreachable("Found a stale predecessors list in a basic block.");
for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
BasicBlock *B2 = *PI;
- if (Visited.insert(B2))
+ if (Visited.insert(B2).second)
Predecessors[B1].push_back(B2);
}
@@ -877,7 +550,7 @@ void SampleFunctionProfile::buildEdges(Function &F) {
llvm_unreachable("Found a stale successors list in a basic block.");
for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
BasicBlock *B2 = *SI;
- if (Visited.insert(B2))
+ if (Visited.insert(B2).second)
Successors[B1].push_back(B2);
}
}
@@ -885,22 +558,22 @@ void SampleFunctionProfile::buildEdges(Function &F) {
/// \brief Propagate weights into edges
///
-/// The following rules are applied to every block B in the CFG:
+/// The following rules are applied to every block BB in the CFG:
///
-/// - If B has a single predecessor/successor, then the weight
+/// - If BB has a single predecessor/successor, then the weight
/// of that edge is the weight of the block.
///
/// - If all incoming or outgoing edges are known except one, and the
/// weight of the block is already known, the weight of the unknown
/// edge will be the weight of the block minus the sum of all the known
-/// edges. If the sum of all the known edges is larger than B's weight,
+/// edges. If the sum of all the known edges is larger than BB's weight,
/// we set the unknown edge weight to zero.
///
/// - If there is a self-referential edge, and the weight of the block is
/// known, the weight for that edge is set to the weight of the block
/// minus the weight of the other incoming edges to that block (if
/// known).
-void SampleFunctionProfile::propagateWeights(Function &F) {
+void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
unsigned i = 0;
@@ -920,9 +593,9 @@ void SampleFunctionProfile::propagateWeights(Function &F) {
// edge weights computed during propagation.
DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
MDBuilder MDB(F.getContext());
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *B = I;
- TerminatorInst *TI = B->getTerminator();
+ for (auto &BI : F) {
+ BasicBlock *BB = &BI;
+ TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 1)
continue;
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
@@ -934,7 +607,7 @@ void SampleFunctionProfile::propagateWeights(Function &F) {
bool AllWeightsZero = true;
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
- Edge E = std::make_pair(B, Succ);
+ Edge E = std::make_pair(BB, Succ);
unsigned Weight = EdgeWeights[E];
DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
Weights.push_back(Weight);
@@ -965,22 +638,17 @@ void SampleFunctionProfile::propagateWeights(Function &F) {
///
/// \returns the line number where \p F is defined. If it returns 0,
/// it means that there is no debug information available for \p F.
-unsigned SampleFunctionProfile::getFunctionLoc(Function &F) {
- NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
- if (CUNodes) {
- for (unsigned I = 0, E1 = CUNodes->getNumOperands(); I != E1; ++I) {
- DICompileUnit CU(CUNodes->getOperand(I));
- DIArray Subprograms = CU.getSubprograms();
- for (unsigned J = 0, E2 = Subprograms.getNumElements(); J != E2; ++J) {
- DISubprogram Subprogram(Subprograms.getElement(J));
- if (Subprogram.describes(&F))
- return Subprogram.getLineNumber();
- }
- }
- }
+unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
+ DISubprogram S = getDISubprogram(&F);
+ if (S.isSubprogram())
+ return S.getLineNumber();
+ // If could not find the start of \p F, emit a diagnostic to inform the user
+ // about the missed opportunity.
F.getContext().diagnose(DiagnosticInfoSampleProfile(
- "No debug information found in function " + F.getName()));
+ "No debug information found in function " + F.getName() +
+ ": Function profile not used",
+ DS_Warning));
return 0;
}
@@ -1002,15 +670,15 @@ unsigned SampleFunctionProfile::getFunctionLoc(Function &F) {
///
/// 3- Propagation of block weights into edges. This uses a simple
/// propagation heuristic. The following rules are applied to every
-/// block B in the CFG:
+/// block BB in the CFG:
///
-/// - If B has a single predecessor/successor, then the weight
+/// - If BB has a single predecessor/successor, then the weight
/// of that edge is the weight of the block.
///
/// - If all the edges are known except one, and the weight of the
/// block is already known, the weight of the unknown edge will
/// be the weight of the block minus the sum of all the known
-/// edges. If the sum of all the known edges is larger than B's weight,
+/// edges. If the sum of all the known edges is larger than BB's weight,
/// we set the unknown edge weight to zero.
///
/// - If there is a self-referential edge, and the weight of the block is
@@ -1028,14 +696,12 @@ unsigned SampleFunctionProfile::getFunctionLoc(Function &F) {
/// work here.
///
/// Once all the branch weights are computed, we emit the MD_prof
-/// metadata on B using the computed values for each of its branches.
+/// metadata on BB using the computed values for each of its branches.
///
/// \param F The function to query.
///
/// \returns true if \p F was modified. Returns false, otherwise.
-bool SampleFunctionProfile::emitAnnotations(Function &F, DominatorTree *DomTree,
- PostDominatorTree *PostDomTree,
- LoopInfo *Loops) {
+bool SampleProfileLoader::emitAnnotations(Function &F) {
bool Changed = false;
// Initialize invariants used during computation and propagation.
@@ -1045,10 +711,6 @@ bool SampleFunctionProfile::emitAnnotations(Function &F, DominatorTree *DomTree,
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << HeaderLineno << "\n");
- DT = DomTree;
- PDT = PostDomTree;
- LI = Loops;
- Ctx = &F.getParent()->getContext();
// Compute basic block weights.
Changed |= computeBlockWeights(F);
@@ -1075,8 +737,14 @@ INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
bool SampleProfileLoader::doInitialization(Module &M) {
- Profiler.reset(new SampleModuleProfile(M, Filename));
- ProfileIsValid = Profiler->loadText();
+ auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
+ return false;
+ }
+ Reader = std::move(ReaderOrErr.get());
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
return true;
}
@@ -1091,11 +759,13 @@ FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnFunction(Function &F) {
if (!ProfileIsValid)
return false;
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- PostDominatorTree *PDT = &getAnalysis<PostDominatorTree>();
- LoopInfo *LI = &getAnalysis<LoopInfo>();
- SampleFunctionProfile &FunctionProfile = Profiler->getProfile(F);
- if (!FunctionProfile.empty())
- return FunctionProfile.emitAnnotations(F, DT, PDT, LI);
+
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PDT = &getAnalysis<PostDominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ Ctx = &F.getParent()->getContext();
+ Samples = Reader->getSamplesFor(F);
+ if (!Samples->empty())
+ return emitAnnotations(F);
return false;
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index edf012d..a16e9e2 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -28,6 +28,7 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
+ initializeAlignmentFromAssumptionsPass(Registry);
initializeSampleProfileLoaderPass(Registry);
initializeConstantHoistingPass(Registry);
initializeConstantPropagationPass(Registry);
@@ -38,6 +39,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDSEPass(Registry);
initializeGVNPass(Registry);
initializeEarlyCSEPass(Registry);
+ initializeFlattenCFGPassPass(Registry);
initializeIndVarSimplifyPass(Registry);
initializeJumpThreadingPass(Registry);
initializeLICMPass(Registry);
@@ -52,6 +54,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerAtomicPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
+ initializeMergedLoadStoreMotionPass(Registry);
initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
@@ -76,6 +79,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAggressiveDCEPass());
}
+void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAlignmentFromAssumptionsPass());
+}
+
void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCFGSimplificationPass());
}
@@ -92,6 +99,10 @@ void LLVMAddGVNPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createGVNPass());
}
+void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createMergedLoadStoreMotionPass());
+}
+
void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createIndVarSimplifyPass());
}
@@ -140,6 +151,10 @@ void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPartiallyInlineLibCallsPass());
}
+void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLowerSwitchPass());
+}
+
void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPromoteMemoryToRegisterPass());
}
@@ -198,6 +213,10 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
}
+void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScopedNoAliasAAPass());
+}
+
void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBasicAliasAnalysisPass());
}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e2a24a7..f7fa917 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
@@ -197,6 +198,7 @@ namespace {
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
@@ -214,6 +216,7 @@ namespace {
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.setPreservesCFG();
}
};
@@ -225,12 +228,14 @@ char SROA_SSAUp::ID = 0;
INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
"Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
"Scalar Replacement of Aggregates (SSAUp)", false, false)
@@ -1119,9 +1124,9 @@ public:
} else {
continue;
}
- Instruction *DbgVal =
- DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
- Inst);
+ Instruction *DbgVal = DIB->insertDbgValueIntrinsic(
+ Arg, 0, DIVariable(DVI->getVariable()),
+ DIExpression(DVI->getExpression()), Inst);
DbgVal->setDebugLoc(DVI->getDebugLoc());
}
}
@@ -1333,12 +1338,15 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
LoadInst *FalseLoad =
Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
- // Transfer alignment and TBAA info if present.
+ // Transfer alignment and AA info if present.
TrueLoad->setAlignment(LI->getAlignment());
FalseLoad->setAlignment(LI->getAlignment());
- if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
- TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
- FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+
+ AAMDNodes Tags;
+ LI->getAAMetadata(Tags);
+ if (Tags) {
+ TrueLoad->setAAMetadata(Tags);
+ FalseLoad->setAAMetadata(Tags);
}
Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
@@ -1364,10 +1372,12 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
PN->getName()+".ld", PN);
- // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // Get the AA tags and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ, it doesn't matter.
LoadInst *SomeLoad = cast<LoadInst>(PN->user_back());
- MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+
+ AAMDNodes AATags;
+ SomeLoad->getAAMetadata(AATags);
unsigned Align = SomeLoad->getAlignment();
// Rewrite all loads of the PN to use the new PHI.
@@ -1389,7 +1399,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
PN->getName() + "." + Pred->getName(),
Pred->getTerminator());
Load->setAlignment(Align);
- if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ if (AATags) Load->setAAMetadata(AATags);
}
NewPN->addIncoming(Load, Pred);
@@ -1407,6 +1417,7 @@ bool SROA::performPromotion(Function &F) {
DominatorTree *DT = nullptr;
if (HasDomTree)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
DIBuilder DIB(*F.getParent());
@@ -1425,7 +1436,7 @@ bool SROA::performPromotion(Function &F) {
if (Allocas.empty()) break;
if (HasDomTree)
- PromoteMemToReg(Allocas, *DT);
+ PromoteMemToReg(Allocas, *DT, nullptr, AT);
else {
SSAUpdater SSA;
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
@@ -1658,7 +1669,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
// If we've already checked this PHI, don't do it again.
if (PHINode *PN = dyn_cast<PHINode>(I))
- if (!Info.CheckedPHIs.insert(PN))
+ if (!Info.CheckedPHIs.insert(PN).second)
return;
for (User *U : I->users()) {
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 7a73f11..6036c09 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -150,6 +150,16 @@ public:
bool visitLoadInst(LoadInst &);
bool visitStoreInst(StoreInst &);
+ static void registerOptions() {
+ // This is disabled by default because having separate loads and stores
+ // makes it more likely that the -combiner-alias-analysis limits will be
+ // reached.
+ OptionRegistry::registerOption<bool, Scalarizer,
+ &Scalarizer::ScalarizeLoadStore>(
+ "scalarize-load-store",
+ "Allow the scalarizer pass to scalarize loads and store", false);
+ }
+
private:
Scatterer scatter(Instruction *, Value *);
void gather(Instruction *, const ValueVector &);
@@ -164,19 +174,14 @@ private:
GatherList Gathered;
unsigned ParallelLoopAccessMDKind;
const DataLayout *DL;
+ bool ScalarizeLoadStore;
};
char Scalarizer::ID = 0;
} // end anonymous namespace
-// This is disabled by default because having separate loads and stores makes
-// it more likely that the -combiner-alias-analysis limits will be reached.
-static cl::opt<bool> ScalarizeLoadStore
- ("scalarize-load-store", cl::Hidden, cl::init(false),
- cl::desc("Allow the scalarizer pass to scalarize loads and store"));
-
-INITIALIZE_PASS(Scalarizer, "scalarizer", "Scalarize vector operations",
- false, false)
+INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
+ "Scalarize vector operations", false, false)
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
ValueVector *cachePtr)
@@ -236,7 +241,9 @@ Value *Scatterer::operator[](unsigned I) {
bool Scalarizer::doInitialization(Module &M) {
ParallelLoopAccessMDKind =
- M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
+ M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
+ ScalarizeLoadStore =
+ M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
return false;
}
@@ -312,6 +319,8 @@ bool Scalarizer::canTransferMetadata(unsigned Tag) {
|| Tag == LLVMContext::MD_fpmath
|| Tag == LLVMContext::MD_tbaa_struct
|| Tag == LLVMContext::MD_invariant_load
+ || Tag == LLVMContext::MD_alias_scope
+ || Tag == LLVMContext::MD_noalias
|| Tag == ParallelLoopAccessMDKind);
}
@@ -322,8 +331,10 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
Op->getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned I = 0, E = CV.size(); I != E; ++I) {
if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
- for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
- MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
+ MI = MDs.begin(),
+ ME = MDs.end();
+ MI != ME; ++MI)
if (canTransferMetadata(MI->first))
New->setMetadata(MI->first, MI->second);
New->setDebugLoc(Op->getDebugLoc());
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 62f2026..6157746 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -79,6 +79,81 @@
// ld.global.f32 %f3, [%rl6+128]; // much better
// ld.global.f32 %f4, [%rl6+132]; // much better
//
+// Another improvement enabled by the LowerGEP flag is to lower a GEP with
+// multiple indices to either multiple GEPs with a single index or arithmetic
+// operations (depending on whether the target uses alias analysis in codegen).
+// Such transformation can have following benefits:
+// (1) It can always extract constants in the indices of structure type.
+// (2) After such Lowering, there are more optimization opportunities such as
+// CSE, LICM and CGP.
+//
+// E.g. The following GEPs have multiple indices:
+// BB1:
+// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
+// load %p
+// ...
+// BB2:
+// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
+// load %p2
+// ...
+//
+// We can not do CSE for to the common part related to index "i64 %i". Lowering
+// GEPs can achieve such goals.
+// If the target does not use alias analysis in codegen, this pass will
+// lower a GEP with multiple indices into arithmetic operations:
+// BB1:
+// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
+// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
+// %3 = add i64 %1, %2 ; CSE opportunity
+// %4 = mul i64 %j1, length_of_struct
+// %5 = add i64 %3, %4
+// %6 = add i64 %3, struct_field_3 ; Constant offset
+// %p = inttoptr i64 %6 to i32*
+// load %p
+// ...
+// BB2:
+// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
+// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
+// %9 = add i64 %7, %8 ; CSE opportunity
+// %10 = mul i64 %j2, length_of_struct
+// %11 = add i64 %9, %10
+// %12 = add i64 %11, struct_field_2 ; Constant offset
+// %p = inttoptr i64 %12 to i32*
+// load %p2
+// ...
+//
+// If the target uses alias analysis in codegen, this pass will lower a GEP
+// with multiple indices into multiple GEPs with a single index:
+// BB1:
+// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
+// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
+// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
+// %4 = mul i64 %j1, length_of_struct
+// %5 = getelementptr i8* %3, i64 %4
+// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
+// %p = bitcast i8* %6 to i32*
+// load %p
+// ...
+// BB2:
+// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
+// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
+// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
+// %10 = mul i64 %j2, length_of_struct
+// %11 = getelementptr i8* %9, i64 %10
+// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
+// %p2 = bitcast i8* %12 to i32*
+// load %p2
+// ...
+//
+// Lowering GEPs can also benefit other passes such as LICM and CGP.
+// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
+// indices if one of the index is variant. If we lower such GEP into invariant
+// parts and variant parts, LICM can hoist/sink those invariant parts.
+// CGP (CodeGen Prepare) tries to sink address calculations that match the
+// target's addressing modes. A GEP with multiple indices may not match and will
+// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
+// them. So we end up with a better addressing mode.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -92,6 +167,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/IR/IRBuilder.h"
using namespace llvm;
@@ -117,18 +195,17 @@ namespace {
/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
class ConstantOffsetExtractor {
public:
- /// Extracts a constant offset from the given GEP index. It outputs the
- /// numeric value of the extracted constant offset (0 if failed), and a
+ /// Extracts a constant offset from the given GEP index. It returns the
/// new index representing the remainder (equal to the original index minus
- /// the constant offset).
+ /// the constant offset), or nullptr if we cannot extract a constant offset.
/// \p Idx The given GEP index
- /// \p NewIdx The new index to replace (output)
/// \p DL The datalayout of the module
/// \p GEP The given GEP
- static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
- GetElementPtrInst *GEP);
- /// Looks for a constant offset without extracting it. The meaning of the
- /// arguments and the return value are the same as Extract.
+ static Value *Extract(Value *Idx, const DataLayout *DL,
+ GetElementPtrInst *GEP);
+ /// Looks for a constant offset from the given GEP index without extracting
+ /// it. It returns the numeric value of the extracted constant offset (0 if
+ /// failed). The meaning of the arguments are the same as Extract.
static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
private:
@@ -228,7 +305,9 @@ class ConstantOffsetExtractor {
class SeparateConstOffsetFromGEP : public FunctionPass {
public:
static char ID;
- SeparateConstOffsetFromGEP() : FunctionPass(ID) {
+ SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr,
+ bool LowerGEP = false)
+ : FunctionPass(ID), TM(TM), LowerGEP(LowerGEP) {
initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
}
@@ -251,10 +330,29 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
/// Tries to split the given GEP into a variadic base and a constant offset,
/// and returns true if the splitting succeeds.
bool splitGEP(GetElementPtrInst *GEP);
- /// Finds the constant offset within each index, and accumulates them. This
- /// function only inspects the GEP without changing it. The output
- /// NeedsExtraction indicates whether we can extract a non-zero constant
- /// offset from any index.
+ /// Lower a GEP with multiple indices into multiple GEPs with a single index.
+ /// Function splitGEP already split the original GEP into a variadic part and
+ /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
+ /// variadic part into a set of GEPs with a single index and applies
+ /// AccumulativeByteOffset to it.
+ /// \p Variadic The variadic part of the original GEP.
+ /// \p AccumulativeByteOffset The constant offset.
+ void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
+ int64_t AccumulativeByteOffset);
+ /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
+ /// Function splitGEP already split the original GEP into a variadic part and
+ /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
+ /// variadic part into a set of arithmetic operations and applies
+ /// AccumulativeByteOffset to it.
+ /// \p Variadic The variadic part of the original GEP.
+ /// \p AccumulativeByteOffset The constant offset.
+ void lowerToArithmetics(GetElementPtrInst *Variadic,
+ int64_t AccumulativeByteOffset);
+ /// Finds the constant offset within each index and accumulates them. If
+ /// LowerGEP is true, it finds in indices of both sequential and structure
+ /// types, otherwise it only finds in sequential indices. The output
+ /// NeedsExtraction indicates whether we successfully find a non-zero constant
+ /// offset.
int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
/// Canonicalize array indices to pointer-size integers. This helps to
/// simplify the logic of splitting a GEP. For example, if a + b is a
@@ -272,25 +370,12 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
///
/// Verified in @i32_add in split-gep.ll
bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
- /// For each array index that is in the form of zext(a), convert it to sext(a)
- /// if we can prove zext(a) <= max signed value of typeof(a). We prefer
- /// sext(a) to zext(a), because in the special case where x + y >= 0 and
- /// (x >= 0 or y >= 0), function CanTraceInto can split sext(x + y),
- /// while no such case exists for zext(x + y).
- ///
- /// Note that
- /// zext(x + y) = zext(x) + zext(y)
- /// is wrong, e.g.,
- /// zext i32(UINT_MAX + 1) to i64 !=
- /// (zext i32 UINT_MAX to i64) + (zext i32 1 to i64)
- ///
- /// Returns true if the module changes.
- ///
- /// Verified in @inbounds_zext_add in split-gep.ll and @sum_of_array3 in
- /// split-gep-and-gvn.ll
- bool convertInBoundsZExtToSExt(GetElementPtrInst *GEP);
const DataLayout *DL;
+ const TargetMachine *TM;
+ /// Whether to lower a GEP with multiple indices into arithmetic operations or
+ /// multiple GEPs with a single index.
+ bool LowerGEP;
};
} // anonymous namespace
@@ -306,8 +391,10 @@ INITIALIZE_PASS_END(
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
-FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() {
- return new SeparateConstOffsetFromGEP();
+FunctionPass *
+llvm::createSeparateConstOffsetFromGEPPass(const TargetMachine *TM,
+ bool LowerGEP) {
+ return new SeparateConstOffsetFromGEP(TM, LowerGEP);
}
bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
@@ -536,8 +623,13 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
//
// Replacing the "or" with "add" is fine, because
// a | (b + 5) = a + (b + 5) = (a + b) + 5
- return BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1),
- BO->getName(), IP);
+ if (OpNo == 0) {
+ return BinaryOperator::CreateAdd(NextInChain, TheOther, BO->getName(),
+ IP);
+ } else {
+ return BinaryOperator::CreateAdd(TheOther, NextInChain, BO->getName(),
+ IP);
+ }
}
// We can reuse BO in this case, because the new expression shares the same
@@ -554,19 +646,17 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
return BO;
}
-int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
- const DataLayout *DL,
- GetElementPtrInst *GEP) {
+Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL,
+ GetElementPtrInst *GEP) {
ConstantOffsetExtractor Extractor(DL, GEP);
// Find a non-zero constant offset first.
APInt ConstantOffset =
Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
GEP->isInBounds());
- if (ConstantOffset != 0) {
- // Separates the constant offset from the GEP index.
- NewIdx = Extractor.rebuildWithoutConstOffset();
- }
- return ConstantOffset.getSExtValue();
+ if (ConstantOffset == 0)
+ return nullptr;
+ // Separates the constant offset from the GEP index.
+ return Extractor.rebuildWithoutConstOffset();
}
int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
@@ -613,43 +703,6 @@ bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
return Changed;
}
-bool
-SeparateConstOffsetFromGEP::convertInBoundsZExtToSExt(GetElementPtrInst *GEP) {
- if (!GEP->isInBounds())
- return false;
-
- // TODO: consider alloca
- GlobalVariable *UnderlyingObject =
- dyn_cast<GlobalVariable>(GEP->getPointerOperand());
- if (UnderlyingObject == nullptr)
- return false;
-
- uint64_t ObjectSize =
- DL->getTypeAllocSize(UnderlyingObject->getType()->getElementType());
- gep_type_iterator GTI = gep_type_begin(*GEP);
- bool Changed = false;
- for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
- ++I, ++GTI) {
- if (isa<SequentialType>(*GTI)) {
- if (ZExtInst *Extended = dyn_cast<ZExtInst>(*I)) {
- unsigned SrcBitWidth =
- cast<IntegerType>(Extended->getSrcTy())->getBitWidth();
- // For GEP operand zext(a), if a <= max signed value of typeof(a), then
- // the sign bit of a is zero and sext(a) = zext(a). Because the GEP is
- // in bounds, we know a <= ObjectSize, so the condition can be reduced
- // to ObjectSize <= max signed value of typeof(a).
- if (ObjectSize <=
- APInt::getSignedMaxValue(SrcBitWidth).getZExtValue()) {
- *I = new SExtInst(Extended->getOperand(0), Extended->getType(),
- Extended->getName(), GEP);
- Changed = true;
- }
- }
- }
- }
- return Changed;
-}
-
int64_t
SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
bool &NeedsExtraction) {
@@ -669,11 +722,116 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
AccumulativeByteOffset +=
ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
}
+ } else if (LowerGEP) {
+ StructType *StTy = cast<StructType>(*GTI);
+ uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
+ // Skip field 0 as the offset is always 0.
+ if (Field != 0) {
+ NeedsExtraction = true;
+ AccumulativeByteOffset +=
+ DL->getStructLayout(StTy)->getElementOffset(Field);
+ }
}
}
return AccumulativeByteOffset;
}
+void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
+ GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
+ IRBuilder<> Builder(Variadic);
+ Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+
+ Type *I8PtrTy =
+ Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
+ Value *ResultPtr = Variadic->getOperand(0);
+ if (ResultPtr->getType() != I8PtrTy)
+ ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+
+ gep_type_iterator GTI = gep_type_begin(*Variadic);
+ // Create an ugly GEP for each sequential index. We don't create GEPs for
+ // structure indices, as they are accumulated in the constant offset index.
+ for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ Value *Idx = Variadic->getOperand(I);
+ // Skip zero indices.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
+ if (CI->isZero())
+ continue;
+
+ APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
+ DL->getTypeAllocSize(GTI.getIndexedType()));
+ // Scale the index by element size.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ Idx = Builder.CreateShl(
+ Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
+ } else {
+ Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
+ }
+ }
+ // Create an ugly GEP with a single index for each index.
+ ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep");
+ }
+ }
+
+ // Create a GEP with the constant offset index.
+ if (AccumulativeByteOffset != 0) {
+ Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
+ ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep");
+ }
+ if (ResultPtr->getType() != Variadic->getType())
+ ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
+
+ Variadic->replaceAllUsesWith(ResultPtr);
+ Variadic->eraseFromParent();
+}
+
+void
+SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
+ int64_t AccumulativeByteOffset) {
+ IRBuilder<> Builder(Variadic);
+ Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+
+ Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
+ gep_type_iterator GTI = gep_type_begin(*Variadic);
+ // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
+ // don't create arithmetics for structure indices, as they are accumulated
+ // in the constant offset index.
+ for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ Value *Idx = Variadic->getOperand(I);
+ // Skip zero indices.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
+ if (CI->isZero())
+ continue;
+
+ APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
+ DL->getTypeAllocSize(GTI.getIndexedType()));
+ // Scale the index by element size.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ Idx = Builder.CreateShl(
+ Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
+ } else {
+ Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
+ }
+ }
+ // Create an ADD for each index.
+ ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
+ }
+ }
+
+ // Create an ADD for the constant offset index.
+ if (AccumulativeByteOffset != 0) {
+ ResultPtr = Builder.CreateAdd(
+ ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
+ }
+
+ ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
+ Variadic->replaceAllUsesWith(ResultPtr);
+ Variadic->eraseFromParent();
+}
+
bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Skip vector GEPs.
if (GEP->getType()->isVectorTy())
@@ -684,41 +842,49 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (GEP->hasAllConstantIndices())
return false;
- bool Changed = false;
- Changed |= canonicalizeArrayIndicesToPointerSize(GEP);
- Changed |= convertInBoundsZExtToSExt(GEP);
+ bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
bool NeedsExtraction;
int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
if (!NeedsExtraction)
return Changed;
- // Before really splitting the GEP, check whether the backend supports the
- // addressing mode we are about to produce. If no, this splitting probably
- // won't be beneficial.
- TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
- if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
- /*BaseGV=*/nullptr, AccumulativeByteOffset,
- /*HasBaseReg=*/true, /*Scale=*/0)) {
- return Changed;
+ // If LowerGEP is disabled, before really splitting the GEP, check whether the
+ // backend supports the addressing mode we are about to produce. If no, this
+ // splitting probably won't be beneficial.
+ // If LowerGEP is enabled, even the extracted constant offset can not match
+ // the addressing mode, we can still do optimizations to other lowered parts
+ // of variable indices. Therefore, we don't check for addressing modes in that
+ // case.
+ if (!LowerGEP) {
+ TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
+ /*BaseGV=*/nullptr, AccumulativeByteOffset,
+ /*HasBaseReg=*/true, /*Scale=*/0)) {
+ return Changed;
+ }
}
- // Remove the constant offset in each GEP index. The resultant GEP computes
- // the variadic base.
+ // Remove the constant offset in each sequential index. The resultant GEP
+ // computes the variadic base.
+ // Notice that we don't remove struct field indices here. If LowerGEP is
+ // disabled, a structure index is not accumulated and we still use the old
+ // one. If LowerGEP is enabled, a structure index is accumulated in the
+ // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
+ // handle the constant offset and won't need a new structure index.
gep_type_iterator GTI = gep_type_begin(*GEP);
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (isa<SequentialType>(*GTI)) {
- Value *NewIdx = nullptr;
- // Tries to extract a constant offset from this GEP index.
- int64_t ConstantOffset =
- ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
- if (ConstantOffset != 0) {
- assert(NewIdx != nullptr &&
- "ConstantOffset != 0 implies NewIdx is set");
+ // Splits this GEP index into a variadic part and a constant offset, and
+ // uses the variadic part as the new index.
+ Value *NewIdx =
+ ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP);
+ if (NewIdx != nullptr) {
GEP->setOperand(I, NewIdx);
}
}
}
+
// Clear the inbounds attribute because the new index may be off-bound.
// e.g.,
//
@@ -740,6 +906,21 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// possible. GEPs with inbounds are more friendly to alias analysis.
GEP->setIsInBounds(false);
+ // Lowers a GEP to either GEPs with a single index or arithmetic operations.
+ if (LowerGEP) {
+ // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
+ // arithmetic operations if the target uses alias analysis in codegen.
+ if (TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())
+ lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
+ else
+ lowerToArithmetics(GEP, AccumulativeByteOffset);
+ return true;
+ }
+
+ // No need to create another GEP if the accumulative byte offset is 0.
+ if (AccumulativeByteOffset == 0)
+ return true;
+
// Offsets the base with the accumulative byte offset.
//
// %gep ; the base
@@ -771,16 +952,16 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Instruction *NewGEP = GEP->clone();
NewGEP->insertBefore(GEP);
- uint64_t ElementTypeSizeOfGEP =
- DL->getTypeAllocSize(GEP->getType()->getElementType());
+ // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
+ // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
+ // used with unsigned integers later.
+ int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
+ DL->getTypeAllocSize(GEP->getType()->getElementType()));
Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
// Very likely. As long as %gep is natually aligned, the byte offset we
// extracted should be a multiple of sizeof(*%gep).
- // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
- // cast ElementTypeSizeOfGEP to signed.
- int64_t Index =
- AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
+ int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
NewGEP = GetElementPtrInst::Create(
NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
} else {
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 5d5606b..046a7cb 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
@@ -34,22 +35,30 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "simplifycfg"
+static cl::opt<unsigned>
+UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1),
+ cl::desc("Control the number of bonus instructions (default = 1)"));
+
STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(ID) {
+ unsigned BonusInstThreshold;
+ CFGSimplifyPass(int T = -1) : FunctionPass(ID) {
+ BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetTransformInfo>();
}
};
@@ -59,12 +68,13 @@ char CFGSimplifyPass::ID = 0;
INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
// Public interface to the CFGSimplification pass
-FunctionPass *llvm::createCFGSimplificationPass() {
- return new CFGSimplifyPass();
+FunctionPass *llvm::createCFGSimplificationPass(int Threshold) {
+ return new CFGSimplifyPass(Threshold);
}
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
@@ -146,7 +156,9 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
- const DataLayout *DL) {
+ const DataLayout *DL,
+ AssumptionTracker *AT,
+ unsigned BonusInstThreshold) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -155,7 +167,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, DL)) {
+ if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, DL, AT)) {
LocalChange = true;
++NumSimpl;
}
@@ -172,12 +184,13 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, DL);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AT, BonusInstThreshold);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -191,7 +204,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, DL);
+ EverChanged = iterativelySimplifyCFG(F, TTI, DL, AT, BonusInstThreshold);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 7348c45..903b675 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -56,7 +56,7 @@ namespace {
}
private:
bool ProcessBlock(BasicBlock &BB);
- bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+ bool SinkInstruction(Instruction *I, SmallPtrSetImpl<Instruction*> &Stores);
bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const;
};
@@ -157,7 +157,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
}
static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
- SmallPtrSet<Instruction *, 8> &Stores) {
+ SmallPtrSetImpl<Instruction *> &Stores) {
if (Inst->mayWriteToMemory()) {
Stores.insert(Inst);
@@ -166,9 +166,8 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
AliasAnalysis::Location Loc = AA->getLocation(L);
- for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
- E = Stores.end(); I != E; ++I)
- if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
+ for (Instruction *S : Stores)
+ if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)
return false;
}
@@ -220,7 +219,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool Sinking::SinkInstruction(Instruction *Inst,
- SmallPtrSet<Instruction *, 8> &Stores) {
+ SmallPtrSetImpl<Instruction *> &Stores) {
// Don't sink static alloca instructions. CodeGen assumes allocas outside the
// entry block are dynamically sized stack objects.
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 7b77ae1..b9673ed 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -260,7 +260,7 @@ INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(RegionInfo)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
@@ -406,11 +406,11 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
} else {
// It's an exit from a sub region
- while(R->getParent() != ParentRegion)
+ while (R->getParent() != ParentRegion)
R = R->getParent();
// Edge from inside a subregion to its entry, ignore it
- if (R == N)
+ if (*R == *N)
continue;
BasicBlock *Entry = R->getEntry();
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 05b9892..f3c3e30 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -63,6 +63,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -86,6 +87,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
const TargetTransformInfo *TTI;
+ const DataLayout *DL;
static char ID; // Pass identification, replacement for typeid
TailCallElim() : FunctionPass(ID) {
@@ -157,6 +159,8 @@ bool TailCallElim::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
+ DL = F.getParent()->getDataLayout();
+
bool AllCallsAreTailCalls = false;
bool Modified = markTails(F, AllCallsAreTailCalls);
if (AllCallsAreTailCalls)
@@ -175,7 +179,7 @@ struct AllocaDerivedValueTracker {
auto AddUsesToWorklist = [&](Value *V) {
for (auto &U : V->uses()) {
- if (!Visited.insert(&U))
+ if (!Visited.insert(&U).second)
continue;
Worklist.push_back(&U);
}
@@ -227,12 +231,10 @@ struct AllocaDerivedValueTracker {
}
void callUsesLocalStack(CallSite CS, bool IsNocapture) {
- // Add it to the list of alloca users. If it's already there, skip further
- // processing.
- if (!AllocaUsers.insert(CS.getInstruction()))
- return;
+ // Add it to the list of alloca users.
+ AllocaUsers.insert(CS.getInstruction());
- // If it's nocapture then it can't capture the alloca.
+ // If it's nocapture then it can't capture this alloca.
if (IsNocapture)
return;
@@ -402,18 +404,28 @@ bool TailCallElim::runTRE(Function &F) {
// alloca' is changed from being a static alloca to being a dynamic alloca.
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
+ SmallVector<BasicBlock*, 8> BBToErase;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret) {
Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
TailCallsAreMarkedTail, ArgumentPHIs,
!CanTRETailMarkedCall);
+ // FoldReturnAndProcessPred may have emptied some BB. Remember to
+ // erase them.
+ if (Change && BB->empty())
+ BBToErase.push_back(BB);
+
+ }
MadeChange |= Change;
}
}
+ for (auto BB: BBToErase)
+ BB->eraseFromParent();
+
// If we eliminated any tail recursions, it's possible that we inserted some
// silly PHI nodes which just merge an initial value (the incoming operand)
// with themselves. Check to see if we did and clean up our mess if so. This
@@ -452,7 +464,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// being loaded from.
if (CI->mayWriteToMemory() ||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L,
- L->getAlignment()))
+ L->getAlignment(), DL))
return false;
}
}
@@ -821,8 +833,20 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
DEBUG(dbgs() << "FOLDING: " << *BB
<< "INTO UNCOND BRANCH PRED: " << *Pred);
- EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
- OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
+
+ // Cleanup: if all predecessors of BB have been eliminated by
+ // FoldReturnIntoUncondBranch, we would like to delete it, but we
+ // can not just nuke it as it is being used as an iterator by our caller.
+ // Just empty it, and the caller will erase it when it is safe to do so.
+ // It is important to empty it, because the ret instruction in there is
+ // still using a value which EliminateRecursiveTailCall will attempt
+ // to remove.
+ if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
+ BB->getInstList().clear();
+
+ EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,
CannotTailCallElimCallsMarkedTail);
++NumRetDuped;
Change = true;
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 196ac79..f8e5af5 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -193,13 +193,11 @@ bool AddDiscriminators::runOnFunction(Function &F) {
// Create a new lexical scope and compute a new discriminator
// number for it.
StringRef Filename = FirstDIL.getFilename();
- unsigned LineNumber = FirstDIL.getLineNumber();
- unsigned ColumnNumber = FirstDIL.getColumnNumber();
DIScope Scope = FirstDIL.getScope();
DIFile File = Builder.createFile(Filename, Scope.getDirectory());
unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
- DILexicalBlock NewScope = Builder.createLexicalBlock(
- Scope, File, LineNumber, ColumnNumber, Discriminator);
+ DILexicalBlockFile NewScope =
+ Builder.createLexicalBlockFile(Scope, File, Discriminator);
DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index 2390027..e20dc0a 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -13,6 +13,7 @@ transforms_utils_SRC_FILES := \
CodeExtractor.cpp \
CtorUtils.cpp \
DemoteRegToStack.cpp \
+ FlattenCFG.cpp \
GlobalStatus.cpp \
InlineFunction.cpp \
InstructionNamer.cpp \
@@ -33,6 +34,7 @@ transforms_utils_SRC_FILES := \
SimplifyIndVar.cpp \
SimplifyInstructions.cpp \
SimplifyLibCalls.cpp \
+ SymbolRewriter.cpp \
UnifyFunctionExitNodes.cpp \
Utils.cpp \
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 80b7e22..983f025 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -265,6 +265,18 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
return SplitBlock(BB, BB->getTerminator(), P);
}
+unsigned llvm::SplitAllCriticalEdges(Function &F, Pass *P) {
+ unsigned NumBroken = 0;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ TerminatorInst *TI = I->getTerminator();
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ if (SplitCriticalEdge(TI, i, P))
+ ++NumBroken;
+ }
+ return NumBroken;
+}
+
/// SplitBlock - Split the specified block at the specified instruction - every
/// thing before SplitPt stays in Old and everything starting with SplitPt moves
/// to a new block. The two blocks are joined by an unconditional branch and
@@ -673,7 +685,8 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
Instruction *SplitBefore,
bool Unreachable,
- MDNode *BranchWeights) {
+ MDNode *BranchWeights,
+ DominatorTree *DT) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
TerminatorInst *HeadOldTerm = Head->getTerminator();
@@ -690,6 +703,20 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+
+ if (DT) {
+ if (DomTreeNode *OldNode = DT->getNode(Head)) {
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+ DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
+ for (auto Child : Children)
+ DT->changeImmediateDominator(Child, NewNode);
+
+ // Head dominates ThenBlock.
+ DT->addNewBlock(ThenBlock, Head);
+ }
+ }
+
return CheckTerm;
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 80bd516..eda22cf 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -40,7 +40,11 @@ namespace {
initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F) override;
+ bool runOnFunction(Function &F) override {
+ unsigned N = SplitAllCriticalEdges(F, this);
+ NumBroken += N;
+ return N > 0;
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -62,24 +66,6 @@ FunctionPass *llvm::createBreakCriticalEdgesPass() {
return new BreakCriticalEdges();
}
-// runOnFunction - Loop over all of the edges in the CFG, breaking critical
-// edges as they are found.
-//
-bool BreakCriticalEdges::runOnFunction(Function &F) {
- bool Changed = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- TerminatorInst *TI = I->getTerminator();
- if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (SplitCriticalEdge(TI, i, this)) {
- ++NumBroken;
- Changed = true;
- }
- }
-
- return Changed;
-}
-
//===----------------------------------------------------------------------===//
// Implementation of the external critical edge manipulation functions
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index be00b69..112d26c 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -42,8 +42,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
AttributeSet AS[2];
AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrLen = M->getOrInsertFunction("strlen",
@@ -51,7 +50,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
AS),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
- NULL);
+ nullptr);
CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -71,8 +70,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
AttributeSet AS[2];
AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrNLen = M->getOrInsertFunction("strnlen",
@@ -81,7 +79,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
- NULL);
+ nullptr);
CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -100,15 +98,14 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
AttributeSet AS =
- AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
Constant *StrChr = M->getOrInsertFunction("strchr",
AttributeSet::get(M->getContext(),
AS),
- I8Ptr, I8Ptr, I32Ty, NULL);
+ I8Ptr, I8Ptr, I32Ty, nullptr);
CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
ConstantInt::get(I32Ty, C), "strchr");
if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
@@ -128,8 +125,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *StrNCmp = M->getOrInsertFunction("strncmp",
@@ -138,7 +134,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Context), nullptr);
CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
CastToCStr(Ptr2, B), Len, "strncmp");
@@ -164,7 +160,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
Type *I8Ptr = B.getInt8PtrTy();
Value *StrCpy = M->getOrInsertFunction(Name,
AttributeSet::get(M->getContext(), AS),
- I8Ptr, I8Ptr, I8Ptr, NULL);
+ I8Ptr, I8Ptr, I8Ptr, nullptr);
CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
Name);
if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
@@ -190,7 +186,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
AttributeSet::get(M->getContext(),
AS),
I8Ptr, I8Ptr, I8Ptr,
- Len->getType(), NULL);
+ Len->getType(), nullptr);
CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
Len, "strncpy");
if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
@@ -218,7 +214,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
B.getInt8PtrTy(),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Context), nullptr);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
@@ -238,8 +234,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS;
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemChr = M->getOrInsertFunction("memchr",
AttributeSet::get(M->getContext(), AS),
@@ -247,7 +242,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
B.getInt8PtrTy(),
B.getInt32Ty(),
TD->getIntPtrType(Context),
- NULL);
+ nullptr);
CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
@@ -268,8 +263,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCmp = M->getOrInsertFunction("memcmp",
@@ -277,7 +271,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
- TD->getIntPtrType(Context), NULL);
+ TD->getIntPtrType(Context), nullptr);
CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
Len, "memcmp");
@@ -313,7 +307,7 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType(), NULL);
+ Op->getType(), nullptr);
CallInst *CI = B.CreateCall(Callee, Op, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
@@ -334,7 +328,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op1->getType(),
- Op1->getType(), Op2->getType(), NULL);
+ Op1->getType(), Op2->getType(), nullptr);
CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
@@ -352,7 +346,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
- B.getInt32Ty(), NULL);
+ B.getInt32Ty(), nullptr);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
B.getInt32Ty(),
@@ -382,7 +376,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
AttributeSet::get(M->getContext(), AS),
B.getInt32Ty(),
B.getInt8PtrTy(),
- NULL);
+ nullptr);
CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -407,12 +401,12 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
AttributeSet::get(M->getContext(), AS),
B.getInt32Ty(),
B.getInt32Ty(), File->getType(),
- NULL);
+ nullptr);
else
F = M->getOrInsertFunction("fputc",
B.getInt32Ty(),
B.getInt32Ty(),
- File->getType(), NULL);
+ File->getType(), nullptr);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
"chari");
CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
@@ -442,11 +436,11 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
AttributeSet::get(M->getContext(), AS),
B.getInt32Ty(),
B.getInt8PtrTy(),
- File->getType(), NULL);
+ File->getType(), nullptr);
else
F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
B.getInt8PtrTy(),
- File->getType(), NULL);
+ File->getType(), nullptr);
CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
@@ -478,13 +472,13 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
TD->getIntPtrType(Context),
- File->getType(), NULL);
+ File->getType(), nullptr);
else
F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
TD->getIntPtrType(Context),
- File->getType(), NULL);
+ File->getType(), nullptr);
CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
ConstantInt::get(TD->getIntPtrType(Context), 1), File);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index fcf548f..6ce22b1 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,16 +1,17 @@
add_llvm_library(LLVMTransformUtils
- AddDiscriminators.cpp
ASanStackFrameLayout.cpp
+ AddDiscriminators.cpp
BasicBlockUtils.cpp
BreakCriticalEdges.cpp
BuildLibCalls.cpp
BypassSlowDivision.cpp
- CtorUtils.cpp
CloneFunction.cpp
CloneModule.cpp
CmpInstAnalysis.cpp
CodeExtractor.cpp
+ CtorUtils.cpp
DemoteRegToStack.cpp
+ FlattenCFG.cpp
GlobalStatus.cpp
InlineFunction.cpp
InstructionNamer.cpp
@@ -29,10 +30,10 @@ add_llvm_library(LLVMTransformUtils
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
SimplifyCFG.cpp
- FlattenCFG.cpp
SimplifyIndVar.cpp
SimplifyInstructions.cpp
SimplifyLibCalls.cpp
+ SymbolRewriter.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 3f75b3e..d078c96 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm-c/Core.h"
using namespace llvm;
/// CloneModule - Return an exact copy of the specified module. This is not as
@@ -122,3 +123,11 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
return New;
}
+
+extern "C" {
+
+LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
+ return wrap(CloneModule(unwrap(M)));
+}
+
+}
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index a359424..26875e8 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -24,41 +25,22 @@
namespace llvm {
namespace {
-/// Given a specified llvm.global_ctors list, install the
-/// specified array.
-void installGlobalCtors(GlobalVariable *GCL,
- const std::vector<Function *> &Ctors) {
- // If we made a change, reassemble the initializer list.
- Constant *CSVals[3];
-
- StructType *StructTy =
- cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
-
- // Create the new init list.
- std::vector<Constant *> CAList;
- for (Function *F : Ctors) {
- Type *Int32Ty = Type::getInt32Ty(GCL->getContext());
- if (F) {
- CSVals[0] = ConstantInt::get(Int32Ty, 65535);
- CSVals[1] = F;
- } else {
- CSVals[0] = ConstantInt::get(Int32Ty, 0x7fffffff);
- CSVals[1] = Constant::getNullValue(StructTy->getElementType(1));
- }
- // FIXME: Only allow the 3-field form in LLVM 4.0.
- size_t NumElts = StructTy->getNumElements();
- if (NumElts > 2)
- CSVals[2] = Constant::getNullValue(StructTy->getElementType(2));
- CAList.push_back(
- ConstantStruct::get(StructTy, makeArrayRef(CSVals, NumElts)));
- }
-
- // Create the array initializer.
- Constant *CA =
- ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList);
+/// Given a specified llvm.global_ctors list, remove the listed elements.
+void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
+ // Filter out the initializer elements to remove.
+ ConstantArray *OldCA = cast<ConstantArray>(GCL->getInitializer());
+ SmallVector<Constant *, 10> CAList;
+ for (unsigned I = 0, E = OldCA->getNumOperands(); I < E; ++I)
+ if (!CtorsToRemove.test(I))
+ CAList.push_back(OldCA->getOperand(I));
+
+ // Create the new array initializer.
+ ArrayType *ATy =
+ ArrayType::get(OldCA->getType()->getElementType(), CAList.size());
+ Constant *CA = ConstantArray::get(ATy, CAList);
// If we didn't change the number of elements, don't create a new GV.
- if (CA->getType() == GCL->getInitializer()->getType()) {
+ if (CA->getType() == OldCA->getType()) {
GCL->setInitializer(CA);
return;
}
@@ -82,7 +64,7 @@ void installGlobalCtors(GlobalVariable *GCL,
/// Given a llvm.global_ctors list that we can understand,
/// return a list of the functions and null terminator as a vector.
-std::vector<Function*> parseGlobalCtors(GlobalVariable *GV) {
+std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
if (GV->getInitializer()->isNullValue())
return std::vector<Function *>();
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
@@ -147,17 +129,15 @@ bool optimizeGlobalCtorsList(Module &M,
bool MadeChange = false;
// Loop over global ctors, optimizing them when we can.
- for (unsigned i = 0; i != Ctors.size(); ++i) {
+ unsigned NumCtors = Ctors.size();
+ BitVector CtorsToRemove(NumCtors);
+ for (unsigned i = 0; i != Ctors.size() && NumCtors > 0; ++i) {
Function *F = Ctors[i];
// Found a null terminator in the middle of the list, prune off the rest of
// the list.
- if (!F) {
- if (i != Ctors.size() - 1) {
- Ctors.resize(i + 1);
- MadeChange = true;
- }
- break;
- }
+ if (!F)
+ continue;
+
DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
// We cannot simplify external ctor functions.
@@ -166,9 +146,10 @@ bool optimizeGlobalCtorsList(Module &M,
// If we can evaluate the ctor at compile time, do.
if (ShouldRemove(F)) {
- Ctors.erase(Ctors.begin() + i);
+ Ctors[i] = nullptr;
+ CtorsToRemove.set(i);
+ NumCtors--;
MadeChange = true;
- --i;
continue;
}
}
@@ -176,7 +157,7 @@ bool optimizeGlobalCtorsList(Module &M,
if (!MadeChange)
return false;
- installGlobalCtors(GlobalCtors, Ctors);
+ removeGlobalCtors(GlobalCtors, CtorsToRemove);
return true;
}
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 51ead40..4eb3e3d 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -238,9 +238,13 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
// Do branch inversion.
BasicBlock *CurrBlock = LastCondBlock;
bool EverChanged = false;
- while (1) {
+ for (;CurrBlock != FirstCondBlock;
+ CurrBlock = CurrBlock->getSinglePredecessor()) {
BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI)
+ continue;
+
CmpInst::Predicate Predicate = CI->getPredicate();
// Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
@@ -248,9 +252,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
BI->swapSuccessors();
EverChanged = true;
}
- if (CurrBlock == FirstCondBlock)
- break;
- CurrBlock = CurrBlock->getSinglePredecessor();
}
return EverChanged;
}
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 12057e4..52e2d59 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -35,6 +35,9 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
if (isa<GlobalValue>(C))
return false;
+ if (isa<ConstantInt>(C) || isa<ConstantFP>(C))
+ return false;
+
for (const User *U : C->users())
if (const Constant *CU = dyn_cast<Constant>(U)) {
if (!isSafeToDestroyConstant(CU))
@@ -45,7 +48,7 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
}
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
- SmallPtrSet<const PHINode *, 16> &PhiUsers) {
+ SmallPtrSetImpl<const PHINode *> &PhiUsers) {
for (const Use &U : V->uses()) {
const User *UR = U.getUser();
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
@@ -130,7 +133,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
} else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
// PHI nodes we can check just like select or GEP instructions, but we
// have to be careful about infinite recursion.
- if (PhiUsers.insert(PN)) // Not already visited.
+ if (PhiUsers.insert(PN).second) // Not already visited.
if (analyzeGlobalAux(I, GS, PhiUsers))
return true;
} else if (isa<CmpInst>(I)) {
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index f0a9f2b..2d0b7dc 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,10 +13,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CFG.h"
@@ -24,14 +30,28 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
using namespace llvm;
+static cl::opt<bool>
+EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
+ cl::Hidden,
+ cl::desc("Convert noalias attributes to metadata during inlining."));
+
+static cl::opt<bool>
+PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
+ cl::init(true), cl::Hidden,
+ cl::desc("Convert align attributes to assumptions during inlining."));
+
bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
bool InsertLifetime) {
return InlineFunction(CallSite(CI), IFI, InsertLifetime);
@@ -84,7 +104,7 @@ namespace {
/// split the landing pad block after the landingpad instruction and jump
/// to there.
void forwardResume(ResumeInst *RI,
- SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
+ SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
/// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
/// destination block for the given basic block, using the values for the
@@ -143,7 +163,7 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
void InvokeInliningInfo::forwardResume(ResumeInst *RI,
- SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
+ SmallPtrSetImpl<LandingPadInst*> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
BasicBlock *Src = RI->getParent();
@@ -233,9 +253,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// Append the clauses from the outer landing pad instruction into the inlined
// landing pad instructions.
LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
- for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
- E = InlinedLPads.end(); I != E; ++I) {
- LandingPadInst *InlinedLPad = *I;
+ for (LandingPadInst *InlinedLPad : InlinedLPads) {
unsigned OuterNum = OuterLPad->getNumClauses();
InlinedLPad->reserveClauses(OuterNum);
for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
@@ -260,6 +278,385 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
InvokeDest->removePredecessor(II->getParent());
}
+/// CloneAliasScopeMetadata - When inlining a function that contains noalias
+/// scope metadata, this metadata needs to be cloned so that the inlined blocks
+/// have different "unqiue scopes" at every call site. Were this not done, then
+/// aliasing scopes from a function inlined into a caller multiple times could
+/// not be differentiated (and this would lead to miscompiles because the
+/// non-aliasing property communicated by the metadata could have
+/// call-site-specific control dependencies).
+static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
+ const Function *CalledFunc = CS.getCalledFunction();
+ SetVector<const MDNode *> MD;
+
+ // Note: We could only clone the metadata if it is already used in the
+ // caller. I'm omitting that check here because it might confuse
+ // inter-procedural alias analysis passes. We can revisit this if it becomes
+ // an efficiency or overhead problem.
+
+ for (Function::const_iterator I = CalledFunc->begin(), IE = CalledFunc->end();
+ I != IE; ++I)
+ for (BasicBlock::const_iterator J = I->begin(), JE = I->end(); J != JE; ++J) {
+ if (const MDNode *M = J->getMetadata(LLVMContext::MD_alias_scope))
+ MD.insert(M);
+ if (const MDNode *M = J->getMetadata(LLVMContext::MD_noalias))
+ MD.insert(M);
+ }
+
+ if (MD.empty())
+ return;
+
+ // Walk the existing metadata, adding the complete (perhaps cyclic) chain to
+ // the set.
+ SmallVector<const Value *, 16> Queue(MD.begin(), MD.end());
+ while (!Queue.empty()) {
+ const MDNode *M = cast<MDNode>(Queue.pop_back_val());
+ for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i)
+ if (const MDNode *M1 = dyn_cast<MDNode>(M->getOperand(i)))
+ if (MD.insert(M1))
+ Queue.push_back(M1);
+ }
+
+ // Now we have a complete set of all metadata in the chains used to specify
+ // the noalias scopes and the lists of those scopes.
+ SmallVector<MDNode *, 16> DummyNodes;
+ DenseMap<const MDNode *, TrackingVH<MDNode> > MDMap;
+ for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
+ I != IE; ++I) {
+ MDNode *Dummy = MDNode::getTemporary(CalledFunc->getContext(), None);
+ DummyNodes.push_back(Dummy);
+ MDMap[*I] = Dummy;
+ }
+
+ // Create new metadata nodes to replace the dummy nodes, replacing old
+ // metadata references with either a dummy node or an already-created new
+ // node.
+ for (SetVector<const MDNode *>::iterator I = MD.begin(), IE = MD.end();
+ I != IE; ++I) {
+ SmallVector<Value *, 4> NewOps;
+ for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) {
+ const Value *V = (*I)->getOperand(i);
+ if (const MDNode *M = dyn_cast<MDNode>(V))
+ NewOps.push_back(MDMap[M]);
+ else
+ NewOps.push_back(const_cast<Value *>(V));
+ }
+
+ MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps),
+ *TempM = MDMap[*I];
+
+ TempM->replaceAllUsesWith(NewM);
+ }
+
+ // Now replace the metadata in the new inlined instructions with the
+ // repacements from the map.
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI)
+ continue;
+
+ if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) {
+ MDNode *NewMD = MDMap[M];
+ // If the call site also had alias scope metadata (a list of scopes to
+ // which instructions inside it might belong), propagate those scopes to
+ // the inlined instructions.
+ if (MDNode *CSM =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+ NewMD = MDNode::concatenate(NewMD, CSM);
+ NI->setMetadata(LLVMContext::MD_alias_scope, NewMD);
+ } else if (NI->mayReadOrWriteMemory()) {
+ if (MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope))
+ NI->setMetadata(LLVMContext::MD_alias_scope, M);
+ }
+
+ if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) {
+ MDNode *NewMD = MDMap[M];
+ // If the call site also had noalias metadata (a list of scopes with
+ // which instructions inside it don't alias), propagate those scopes to
+ // the inlined instructions.
+ if (MDNode *CSM =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+ NewMD = MDNode::concatenate(NewMD, CSM);
+ NI->setMetadata(LLVMContext::MD_noalias, NewMD);
+ } else if (NI->mayReadOrWriteMemory()) {
+ if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias))
+ NI->setMetadata(LLVMContext::MD_noalias, M);
+ }
+ }
+
+ // Now that everything has been replaced, delete the dummy nodes.
+ for (unsigned i = 0, ie = DummyNodes.size(); i != ie; ++i)
+ MDNode::deleteTemporary(DummyNodes[i]);
+}
+
+/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then
+/// add new alias scopes for each noalias argument, tag the mapped noalias
+/// parameters with noalias metadata specifying the new scope, and tag all
+/// non-derived loads, stores and memory intrinsics with the new alias scopes.
+static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
+ const DataLayout *DL, AliasAnalysis *AA) {
+ if (!EnableNoAliasConversion)
+ return;
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ SmallVector<const Argument *, 4> NoAliasArgs;
+
+ for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I) {
+ if (I->hasNoAliasAttr() && !I->hasNUses(0))
+ NoAliasArgs.push_back(I);
+ }
+
+ if (NoAliasArgs.empty())
+ return;
+
+ // To do a good job, if a noalias variable is captured, we need to know if
+ // the capture point dominates the particular use we're considering.
+ DominatorTree DT;
+ DT.recalculate(const_cast<Function&>(*CalledFunc));
+
+ // noalias indicates that pointer values based on the argument do not alias
+ // pointer values which are not based on it. So we add a new "scope" for each
+ // noalias function argument. Accesses using pointers based on that argument
+ // become part of that alias scope, accesses using pointers not based on that
+ // argument are tagged as noalias with that scope.
+
+ DenseMap<const Argument *, MDNode *> NewScopes;
+ MDBuilder MDB(CalledFunc->getContext());
+
+ // Create a new scope domain for this function.
+ MDNode *NewDomain =
+ MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
+ for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
+ const Argument *A = NoAliasArgs[i];
+
+ std::string Name = CalledFunc->getName();
+ if (A->hasName()) {
+ Name += ": %";
+ Name += A->getName();
+ } else {
+ Name += ": argument ";
+ Name += utostr(i);
+ }
+
+ // Note: We always create a new anonymous root here. This is true regardless
+ // of the linkage of the callee because the aliasing "scope" is not just a
+ // property of the callee, but also all control dependencies in the caller.
+ MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+ NewScopes.insert(std::make_pair(A, NewScope));
+ }
+
+ // Iterate over all new instructions in the map; for all memory-access
+ // instructions, add the alias scope metadata.
+ for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
+ VMI != VMIE; ++VMI) {
+ if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
+ if (!VMI->second)
+ continue;
+
+ Instruction *NI = dyn_cast<Instruction>(VMI->second);
+ if (!NI)
+ continue;
+
+ bool IsArgMemOnlyCall = false, IsFuncCall = false;
+ SmallVector<const Value *, 2> PtrArgs;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+ PtrArgs.push_back(LI->getPointerOperand());
+ else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ PtrArgs.push_back(SI->getPointerOperand());
+ else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ PtrArgs.push_back(VAAI->getPointerOperand());
+ else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
+ PtrArgs.push_back(CXI->getPointerOperand());
+ else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
+ PtrArgs.push_back(RMWI->getPointerOperand());
+ else if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+ // If we know that the call does not access memory, then we'll still
+ // know that about the inlined clone of this call site, and we don't
+ // need to add metadata.
+ if (ICS.doesNotAccessMemory())
+ continue;
+
+ IsFuncCall = true;
+ if (AA) {
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(ICS);
+ if (MRB == AliasAnalysis::OnlyAccessesArgumentPointees ||
+ MRB == AliasAnalysis::OnlyReadsArgumentPointees)
+ IsArgMemOnlyCall = true;
+ }
+
+ for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(),
+ AE = ICS.arg_end(); AI != AE; ++AI) {
+ // We need to check the underlying objects of all arguments, not just
+ // the pointer arguments, because we might be passing pointers as
+ // integers, etc.
+ // However, if we know that the call only accesses pointer arguments,
+ // then we only need to check the pointer arguments.
+ if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy())
+ continue;
+
+ PtrArgs.push_back(*AI);
+ }
+ }
+
+ // If we found no pointers, then this instruction is not suitable for
+ // pairing with an instruction to receive aliasing metadata.
+ // However, if this is a call, this we might just alias with none of the
+ // noalias arguments.
+ if (PtrArgs.empty() && !IsFuncCall)
+ continue;
+
+ // It is possible that there is only one underlying object, but you
+ // need to go through several PHIs to see it, and thus could be
+ // repeated in the Objects list.
+ SmallPtrSet<const Value *, 4> ObjSet;
+ SmallVector<Value *, 4> Scopes, NoAliases;
+
+ SmallSetVector<const Argument *, 4> NAPtrArgs;
+ for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) {
+ SmallVector<Value *, 4> Objects;
+ GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]),
+ Objects, DL, /* MaxLookup = */ 0);
+
+ for (Value *O : Objects)
+ ObjSet.insert(O);
+ }
+
+ // Figure out if we're derived from anything that is not a noalias
+ // argument.
+ bool CanDeriveViaCapture = false, UsesAliasingPtr = false;
+ for (const Value *V : ObjSet) {
+ // Is this value a constant that cannot be derived from any pointer
+ // value (we need to exclude constant expressions, for example, that
+ // are formed from arithmetic on global symbols).
+ bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
+ isa<ConstantPointerNull>(V) ||
+ isa<ConstantDataVector>(V) || isa<UndefValue>(V);
+ if (IsNonPtrConst)
+ continue;
+
+ // If this is anything other than a noalias argument, then we cannot
+ // completely describe the aliasing properties using alias.scope
+ // metadata (and, thus, won't add any).
+ if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (!A->hasNoAliasAttr())
+ UsesAliasingPtr = true;
+ } else {
+ UsesAliasingPtr = true;
+ }
+
+ // If this is not some identified function-local object (which cannot
+ // directly alias a noalias argument), or some other argument (which,
+ // by definition, also cannot alias a noalias argument), then we could
+ // alias a noalias argument that has been captured).
+ if (!isa<Argument>(V) &&
+ !isIdentifiedFunctionLocal(const_cast<Value*>(V)))
+ CanDeriveViaCapture = true;
+ }
+
+ // A function call can always get captured noalias pointers (via other
+ // parameters, globals, etc.).
+ if (IsFuncCall && !IsArgMemOnlyCall)
+ CanDeriveViaCapture = true;
+
+ // First, we want to figure out all of the sets with which we definitely
+ // don't alias. Iterate over all noalias set, and add those for which:
+ // 1. The noalias argument is not in the set of objects from which we
+ // definitely derive.
+ // 2. The noalias argument has not yet been captured.
+ // An arbitrary function that might load pointers could see captured
+ // noalias arguments via other noalias arguments or globals, and so we
+ // must always check for prior capture.
+ for (const Argument *A : NoAliasArgs) {
+ if (!ObjSet.count(A) && (!CanDeriveViaCapture ||
+ // It might be tempting to skip the
+ // PointerMayBeCapturedBefore check if
+ // A->hasNoCaptureAttr() is true, but this is
+ // incorrect because nocapture only guarantees
+ // that no copies outlive the function, not
+ // that the value cannot be locally captured.
+ !PointerMayBeCapturedBefore(A,
+ /* ReturnCaptures */ false,
+ /* StoreCaptures */ false, I, &DT)))
+ NoAliases.push_back(NewScopes[A]);
+ }
+
+ if (!NoAliases.empty())
+ NI->setMetadata(LLVMContext::MD_noalias,
+ MDNode::concatenate(
+ NI->getMetadata(LLVMContext::MD_noalias),
+ MDNode::get(CalledFunc->getContext(), NoAliases)));
+
+ // Next, we want to figure out all of the sets to which we might belong.
+ // We might belong to a set if the noalias argument is in the set of
+ // underlying objects. If there is some non-noalias argument in our list
+ // of underlying objects, then we cannot add a scope because the fact
+ // that some access does not alias with any set of our noalias arguments
+ // cannot itself guarantee that it does not alias with this access
+ // (because there is some pointer of unknown origin involved and the
+ // other access might also depend on this pointer). We also cannot add
+ // scopes to arbitrary functions unless we know they don't access any
+ // non-parameter pointer-values.
+ bool CanAddScopes = !UsesAliasingPtr;
+ if (CanAddScopes && IsFuncCall)
+ CanAddScopes = IsArgMemOnlyCall;
+
+ if (CanAddScopes)
+ for (const Argument *A : NoAliasArgs) {
+ if (ObjSet.count(A))
+ Scopes.push_back(NewScopes[A]);
+ }
+
+ if (!Scopes.empty())
+ NI->setMetadata(
+ LLVMContext::MD_alias_scope,
+ MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
+ MDNode::get(CalledFunc->getContext(), Scopes)));
+ }
+ }
+}
+
+/// If the inlined function has non-byval align arguments, then
+/// add @llvm.assume-based alignment assumptions to preserve this information.
+static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
+ if (!PreserveAlignmentAssumptions || !IFI.DL)
+ return;
+
+ // To avoid inserting redundant assumptions, we should check for assumptions
+ // already in the caller. To do this, we might need a DT of the caller.
+ DominatorTree DT;
+ bool DTCalculated = false;
+
+ const Function *CalledFunc = CS.getCalledFunction();
+ for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ E = CalledFunc->arg_end(); I != E; ++I) {
+ unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0;
+ if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) {
+ if (!DTCalculated) {
+ DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent()
+ ->getParent()));
+ DTCalculated = true;
+ }
+
+ // If we can already prove the asserted alignment in the context of the
+ // caller, then don't bother inserting the assumption.
+ Value *Arg = CS.getArgument(I->getArgNo());
+ if (getKnownAlignment(Arg, IFI.DL, IFI.AT, CS.getInstruction(),
+ &DT) >= Align)
+ continue;
+
+ IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg,
+ Align);
+ }
+ }
+}
+
/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
/// into the caller, update the specified callgraph to reflect the changes we
/// made. Note that it's possible that not all code was copied over, so only
@@ -327,31 +724,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
BasicBlock *InsertBlock,
InlineFunctionInfo &IFI) {
- LLVMContext &Context = Src->getContext();
- Type *VoidPtrTy = Type::getInt8PtrTy(Context);
Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
- Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
- Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
- IRBuilder<> builder(InsertBlock->begin());
- Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
- Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
+ IRBuilder<> Builder(InsertBlock->begin());
Value *Size;
if (IFI.DL == nullptr)
Size = ConstantExpr::getSizeOf(AggTy);
else
- Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.DL->getTypeStoreSize(AggTy));
+ Size = Builder.getInt64(IFI.DL->getTypeStoreSize(AggTy));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
// better alignment.
- Value *CallArgs[] = {
- DstCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- ConstantInt::getFalse(Context) // isVolatile
- };
- builder.CreateCall(MemCpyFn, CallArgs);
+ Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1);
}
/// HandleByValArgument - When inlining a call site that has a byval argument,
@@ -376,7 +761,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
- IFI.DL) >= ByValAlignment)
+ IFI.DL, IFI.AT, TheCall) >= ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
@@ -472,6 +857,12 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
// originates from the call location. This is important for
// ((__always_inline__, __nodebug__)) functions which must use caller
// location for all instructions in their function body.
+
+ // Don't update static allocas, as they may get moved later.
+ if (auto *AI = dyn_cast<AllocaInst>(BI))
+ if (isa<Constant>(AI->getArraySize()))
+ continue;
+
BI->setDebugLoc(TheCallDL);
} else {
BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
@@ -486,33 +877,6 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
-/// Returns a musttail call instruction if one immediately precedes the given
-/// return instruction with an optional bitcast instruction between them.
-static CallInst *getPrecedingMustTailCall(ReturnInst *RI) {
- Instruction *Prev = RI->getPrevNode();
- if (!Prev)
- return nullptr;
-
- if (Value *RV = RI->getReturnValue()) {
- if (RV != Prev)
- return nullptr;
-
- // Look through the optional bitcast.
- if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
- RV = BI->getOperand(0);
- Prev = BI->getPrevNode();
- if (!Prev || RV != Prev)
- return nullptr;
- }
- }
-
- if (auto *CI = dyn_cast<CallInst>(Prev)) {
- if (CI->isMustTailCall())
- return CI;
- }
- return nullptr;
-}
-
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller. This returns false if it is not possible to inline
/// this call. The program is still in a well defined state if this occurs
@@ -626,6 +990,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
VMap[I] = ActualArg;
}
+ // Add alignment assumptions if necessary. We do this before the inlined
+ // instructions are actually cloned into the caller so that we can easily
+ // check what will be known at the start of the inlined code.
+ AddAlignmentAssumptions(CS, IFI);
+
// We want the inliner to prune the code as it copies. We would LOVE to
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
@@ -648,6 +1017,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Update inlined instructions' line number information.
fixupLineNumbers(Caller, FirstNewBlock, TheCall);
+
+ // Clone existing noalias metadata if necessary.
+ CloneAliasScopeMetadata(CS, VMap);
+
+ // Add noalias metadata if necessary.
+ AddAliasScopeMetadata(CS, VMap, IFI.DL, IFI.AA);
+
+ // FIXME: We could register any cloned assumptions instead of clearing the
+ // whole function's cache.
+ if (IFI.AT)
+ IFI.AT->forgetCachedAssumptions(Caller);
}
// If there are any alloca instructions in the block that used to be the entry
@@ -765,7 +1145,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
for (ReturnInst *RI : Returns) {
// Don't insert llvm.lifetime.end calls between a musttail call and a
// return. The return kills all local allocas.
- if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ if (InlinedMustTailCalls &&
+ RI->getParent()->getTerminatingMustTailCall())
continue;
IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
}
@@ -789,7 +1170,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
for (ReturnInst *RI : Returns) {
// Don't insert llvm.stackrestore calls between a musttail call and a
// return. The return will restore the stack pointer.
- if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
continue;
IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
}
@@ -812,7 +1193,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Handle the returns preceded by musttail calls separately.
SmallVector<ReturnInst *, 8> NormalReturns;
for (ReturnInst *RI : Returns) {
- CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI);
+ CallInst *ReturnedMustTail =
+ RI->getParent()->getTerminatingMustTailCall();
if (!ReturnedMustTail) {
NormalReturns.push_back(RI);
continue;
@@ -1016,7 +1398,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
if (PHI) {
- if (Value *V = SimplifyInstruction(PHI, IFI.DL)) {
+ if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr, IFI.AT)) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 9f91eeb..0ae746c 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -398,11 +398,13 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
Rem->dropAllReferences();
Rem->eraseFromParent();
- // If we didn't actually generate a udiv instruction, we're done
- BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
- if (!BO || BO->getOpcode() != Instruction::URem)
+ // If we didn't actually generate an urem instruction, we're done
+ // This happens for example if the input were constant. In this case the
+ // Builder insertion point was unchanged
+ if (Rem == Builder.GetInsertPoint())
return true;
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
Rem = BO;
}
@@ -456,11 +458,13 @@ bool llvm::expandDivision(BinaryOperator *Div) {
Div->dropAllReferences();
Div->eraseFromParent();
- // If we didn't actually generate a udiv instruction, we're done
- BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
- if (!BO || BO->getOpcode() != Instruction::UDiv)
+ // If we didn't actually generate an udiv instruction, we're done
+ // This happens for example if the input were constant. In this case the
+ // Builder insertion point was unchanged
+ if (Div == Builder.GetInsertPoint())
return true;
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
Div = BO;
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index aedd787..c963c51 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -128,7 +128,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
if (i.getCaseSuccessor() == DefaultDest) {
- MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
unsigned NCases = SI->getNumCases();
// Fold the case metadata into the default if there will be any branches
// left, unless the metadata doesn't match the switch.
@@ -206,7 +206,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
BranchInst *NewBr = Builder.CreateCondBr(Cond,
FirstCase.getCaseSuccessor(),
SI->getDefaultDest());
- MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
if (MD && MD->getNumOperands() == 3) {
ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
@@ -301,6 +301,14 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end)
return isa<UndefValue>(II->getArgOperand(1));
+
+ // Assumptions are dead if their condition is trivially true.
+ if (II->getIntrinsicID() == Intrinsic::assume) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ return !Cond->isZero();
+
+ return false;
+ }
}
if (isAllocLikeFn(I, TLI)) return true;
@@ -384,7 +392,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
// If we find an instruction more than once, we're on a cycle that
// won't prove fruitful.
- if (!Visited.insert(I)) {
+ if (!Visited.insert(I).second) {
// Break the cycle and delete the instruction and its operands.
I->replaceAllUsesWith(UndefValue::get(I->getType()));
(void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
@@ -509,6 +517,11 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PredBB->getTerminator()->eraseFromParent();
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+ // If the PredBB is the entry block of the function, move DestBB up to
+ // become the entry block after we erase PredBB.
+ if (PredBB == &DestBB->getParent()->getEntryBlock())
+ DestBB->moveAfter(PredBB);
+
if (P) {
if (DominatorTreeWrapperPass *DTWP =
P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
@@ -926,13 +939,16 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
/// and it is more than the alignment of the ultimate object, see if we can
/// increase the alignment of the ultimate object, making this check succeed.
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const DataLayout *DL) {
+ const DataLayout *DL,
+ AssumptionTracker *AT,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL);
+ computeKnownBits(V, KnownZero, KnownOne, DL, 0, AT, CxtI, DT);
unsigned TrailZ = KnownZero.countTrailingOnes();
// Avoid trouble with ridiculously large TrailZ values, such as
@@ -977,6 +993,7 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
DIVariable DIVar(DDI->getVariable());
+ DIExpression DIExpr(DDI->getExpression());
assert((!DIVar || DIVar.isVariable()) &&
"Variable in DbgDeclareInst should be either null or a DIVariable.");
if (!DIVar)
@@ -994,9 +1011,10 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
if (ExtendedArg)
- DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
+ DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI);
else
- DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
+ DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar,
+ DIExpr, SI);
DbgVal->setDebugLoc(DDI->getDebugLoc());
return true;
}
@@ -1006,6 +1024,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
LoadInst *LI, DIBuilder &Builder) {
DIVariable DIVar(DDI->getVariable());
+ DIExpression DIExpr(DDI->getExpression());
assert((!DIVar || DIVar.isVariable()) &&
"Variable in DbgDeclareInst should be either null or a DIVariable.");
if (!DIVar)
@@ -1015,8 +1034,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
return true;
Instruction *DbgVal =
- Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
- DIVar, LI);
+ Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI);
DbgVal->setDebugLoc(DDI->getDebugLoc());
return true;
}
@@ -1056,14 +1074,14 @@ bool llvm::LowerDbgDeclare(Function &F) {
else if (LoadInst *LI = dyn_cast<LoadInst>(U))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
else if (CallInst *CI = dyn_cast<CallInst>(U)) {
- // This is a call by-value or some other instruction that
- // takes a pointer to the variable. Insert a *value*
- // intrinsic that describes the alloca.
- auto DbgVal =
- DIB.insertDbgValueIntrinsic(AI, 0,
- DIVariable(DDI->getVariable()), CI);
- DbgVal->setDebugLoc(DDI->getDebugLoc());
- }
+ // This is a call by-value or some other instruction that
+ // takes a pointer to the variable. Insert a *value*
+ // intrinsic that describes the alloca.
+ auto DbgVal = DIB.insertDbgValueIntrinsic(
+ AI, 0, DIVariable(DDI->getVariable()),
+ DIExpression(DDI->getExpression()), CI);
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ }
DDI->eraseFromParent();
}
}
@@ -1087,6 +1105,7 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
if (!DDI)
return false;
DIVariable DIVar(DDI->getVariable());
+ DIExpression DIExpr(DDI->getExpression());
assert((!DIVar || DIVar.isVariable()) &&
"Variable in DbgDeclareInst should be either null or a DIVariable.");
if (!DIVar)
@@ -1096,24 +1115,19 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
// "deref" operation to a list of address elements, as new llvm.dbg.declare
// will take a value storing address of the memory for variable, not
// alloca itself.
- Type *Int64Ty = Type::getInt64Ty(AI->getContext());
- SmallVector<Value*, 4> NewDIVarAddress;
- if (DIVar.hasComplexAddress()) {
- for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) {
- NewDIVarAddress.push_back(
- ConstantInt::get(Int64Ty, DIVar.getAddrElement(i)));
+ SmallVector<int64_t, 4> NewDIExpr;
+ if (DIExpr) {
+ for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) {
+ NewDIExpr.push_back(DIExpr.getElement(i));
}
}
- NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref));
- DIVariable NewDIVar = Builder.createComplexVariable(
- DIVar.getTag(), DIVar.getContext(), DIVar.getName(),
- DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(),
- NewDIVarAddress, DIVar.getArgNumber());
+ NewDIExpr.push_back(dwarf::DW_OP_deref);
// Insert llvm.dbg.declare in the same basic block as the original alloca,
// and remove old llvm.dbg.declare.
BasicBlock *BB = AI->getParent();
- Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB);
+ Builder.insertDeclare(NewAllocaAddress, DIVar,
+ Builder.createExpression(NewDIExpr), BB);
DDI->eraseFromParent();
return true;
}
@@ -1165,7 +1179,7 @@ static void changeToCall(InvokeInst *II) {
}
static bool markAliveBlocks(BasicBlock *BB,
- SmallPtrSet<BasicBlock*, 128> &Reachable) {
+ SmallPtrSetImpl<BasicBlock*> &Reachable) {
SmallVector<BasicBlock*, 128> Worklist;
Worklist.push_back(BB);
@@ -1178,6 +1192,26 @@ static bool markAliveBlocks(BasicBlock *BB,
// instructions into LLVM unreachable insts. The instruction combining pass
// canonicalizes unreachable insts into stores to null or undef.
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ // Assumptions that are known to be false are equivalent to unreachable.
+ // Also, if the condition is undefined, then we make the choice most
+ // beneficial to the optimizer, and choose that to also be unreachable.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
+ if (II->getIntrinsicID() == Intrinsic::assume) {
+ bool MakeUnreachable = false;
+ if (isa<UndefValue>(II->getArgOperand(0)))
+ MakeUnreachable = true;
+ else if (ConstantInt *Cond =
+ dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ MakeUnreachable = Cond->isZero();
+
+ if (MakeUnreachable) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(BBI, false);
+ Changed = true;
+ break;
+ }
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
if (CI->doesNotReturn()) {
// If we found a call to a no-return function, insert an unreachable
@@ -1232,7 +1266,7 @@ static bool markAliveBlocks(BasicBlock *BB,
Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.insert(*SI))
+ if (Reachable.insert(*SI).second)
Worklist.push_back(*SI);
} while (!Worklist.empty());
return Changed;
@@ -1272,3 +1306,43 @@ bool llvm::removeUnreachableBlocks(Function &F) {
return true;
}
+
+void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+ K->dropUnknownMetadata(KnownIDs);
+ K->getAllMetadataOtherThanDebugLoc(Metadata);
+ for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *JMD = J->getMetadata(Kind);
+ MDNode *KMD = Metadata[i].second;
+
+ switch (Kind) {
+ default:
+ K->setMetadata(Kind, nullptr); // Remove unknown metadata
+ break;
+ case LLVMContext::MD_dbg:
+ llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
+ case LLVMContext::MD_tbaa:
+ K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
+ break;
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
+ break;
+ case LLVMContext::MD_range:
+ K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
+ break;
+ case LLVMContext::MD_fpmath:
+ K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
+ break;
+ case LLVMContext::MD_invariant_load:
+ // Only set the !invariant.load if it is present in both instructions.
+ K->setMetadata(Kind, JMD);
+ break;
+ case LLVMContext::MD_nonnull:
+ // Only set the !nonnull if it is present in both instructions.
+ K->setMetadata(Kind, JMD);
+ break;
+ }
+ }
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index ef42291..af0501f 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -44,6 +44,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -173,8 +174,7 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
if (Exit->isLandingPad()) {
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
- LoopBlocks.size()),
+ SplitLandingPadPredecessors(Exit, LoopBlocks,
".loopexit", ".nonloopexit",
PP, NewBBs);
NewExitBB = NewBBs[0];
@@ -209,11 +209,12 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
/// \brief The first part of loop-nestification is to find a PHI node that tells
/// us how to partition the loops.
static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
- DominatorTree *DT) {
+ DominatorTree *DT,
+ AssumptionTracker *AT) {
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -252,7 +253,8 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
///
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
AliasAnalysis *AA, DominatorTree *DT,
- LoopInfo *LI, ScalarEvolution *SE, Pass *PP) {
+ LoopInfo *LI, ScalarEvolution *SE, Pass *PP,
+ AssumptionTracker *AT) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return nullptr;
@@ -261,7 +263,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
assert(!L->getHeader()->isLandingPad() &&
"Can't insert backedge to landing pad");
- PHINode *PN = findPHIToPartitionLoops(L, AA, DT);
+ PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AT);
if (!PN) return nullptr; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -475,7 +477,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, Pass *PP,
- const DataLayout *DL) {
+ const DataLayout *DL, AssumptionTracker *AT) {
bool Changed = false;
ReprocessLoop:
@@ -496,20 +498,19 @@ ReprocessLoop:
}
// Delete each unique out-of-loop (and thus dead) predecessor.
- for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
- E = BadPreds.end(); I != E; ++I) {
+ for (BasicBlock *P : BadPreds) {
DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
- << (*I)->getName() << "\n");
+ << P->getName() << "\n");
// Inform each successor of each dead pred.
- for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
- (*SI)->removePredecessor(*I);
+ for (succ_iterator SI = succ_begin(P), SE = succ_end(P); SI != SE; ++SI)
+ (*SI)->removePredecessor(P);
// Zap the dead pred's terminator and replace it with unreachable.
- TerminatorInst *TI = (*I)->getTerminator();
+ TerminatorInst *TI = P->getTerminator();
TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
- (*I)->getTerminator()->eraseFromParent();
- new UnreachableInst((*I)->getContext(), *I);
+ P->getTerminator()->eraseFromParent();
+ new UnreachableInst(P->getContext(), P);
Changed = true;
}
}
@@ -582,7 +583,8 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP)) {
+ if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE,
+ PP, AT)) {
++NumNested;
// Enqueue the outer loop as it should be processed next in our
// depth-first nest walk.
@@ -612,7 +614,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AT)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -712,7 +714,7 @@ ReprocessLoop:
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
AliasAnalysis *AA, ScalarEvolution *SE,
- const DataLayout *DL) {
+ const DataLayout *DL, AssumptionTracker *AT) {
bool Changed = false;
// Worklist maintains our depth-first queue of loops in this nest to process.
@@ -730,7 +732,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
while (!Worklist.empty())
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
- SE, PP, DL);
+ SE, PP, DL, AT);
return Changed;
}
@@ -749,10 +751,13 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
const DataLayout *DL;
+ AssumptionTracker *AT;
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
+
// We need loop information to identify the loops...
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -773,11 +778,12 @@ namespace {
char LoopSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", true, false)
+ "Canonicalize natural loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", true, false)
+ "Canonicalize natural loops", false, false)
// Publicly exposed interface to pass...
char &llvm::LoopSimplifyID = LoopSimplify::ID;
@@ -794,10 +800,11 @@ bool LoopSimplify::runOnFunction(Function &F) {
SE = getAnalysisIfAvailable<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
+ AT = &getAnalysis<AssumptionTracker>();
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL);
+ Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AT);
return Changed;
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index c86b82c..0e1baa1 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -17,7 +17,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
@@ -64,10 +66,15 @@ static inline void RemapInstruction(Instruction *I,
/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
/// only has one predecessor, and that predecessor only has one successor.
-/// The LoopInfo Analysis that is passed will be kept consistent.
-/// Returns the new combined block.
-static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
- LPPassManager *LPM) {
+/// The LoopInfo Analysis that is passed will be kept consistent. If folding is
+/// successful references to the containing loop must be removed from
+/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have
+/// references to the eliminated BB. The argument ForgottenLoops contains a set
+/// of loops that have already been forgotten to prevent redundant, expensive
+/// calls to ScalarEvolution::forgetLoop. Returns the new combined block.
+static BasicBlock *
+FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
+ SmallPtrSetImpl<Loop *> &ForgottenLoops) {
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
@@ -104,8 +111,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
// ScalarEvolution holds references to loop exit blocks.
if (LPM) {
if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
- if (Loop *L = LI->getLoopFor(BB))
- SE->forgetLoop(L);
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (ForgottenLoops.insert(L).second)
+ SE->forgetLoop(L);
+ }
}
}
LI->removeBlock(BB);
@@ -146,7 +155,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool AllowRuntime, unsigned TripMultiple,
- LoopInfo *LI, Pass *PP, LPPassManager *LPM) {
+ LoopInfo *LI, Pass *PP, LPPassManager *LPM,
+ AssumptionTracker *AT) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -214,11 +224,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
- if (PP) {
- ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE)
- SE->forgetLoop(L);
- }
+ ScalarEvolution *SE =
+ PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr;
+ if (SE)
+ SE->forgetLoop(L);
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
@@ -292,15 +301,45 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
+ SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
+ NewLoops[L] = L;
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
ValueToValueMapTy VMap;
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
- // Loop over all of the PHI nodes in the block, changing them to use the
- // incoming values from the previous block.
+ // Tell LI about New.
+ if (*BB == Header) {
+ assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
+ L->addBasicBlockToLoop(New, LI->getBase());
+ } else {
+ // Figure out which loop New is in.
+ const Loop *OldLoop = LI->getLoopFor(*BB);
+ assert(OldLoop && "Should (at least) be in the loop being unrolled!");
+
+ Loop *&NewLoop = NewLoops[OldLoop];
+ if (!NewLoop) {
+ // Found a new sub-loop.
+ assert(*BB == OldLoop->getHeader() &&
+ "Header should be first in RPO");
+
+ Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
+ assert(NewLoopParent &&
+ "Expected parent loop before sub-loop in RPO");
+ NewLoop = new Loop;
+ NewLoopParent->addChildLoop(NewLoop);
+
+ // Forget the old loop, since its inputs may have changed.
+ if (SE)
+ SE->forgetLoop(OldLoop);
+ }
+ NewLoop->addBasicBlockToLoop(New, LI->getBase());
+ }
+
if (*BB == Header)
+ // Loop over all of the PHI nodes in the block, changing them to use
+ // the incoming values from the previous block.
for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
@@ -317,8 +356,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
VI != VE; ++VI)
LastValueMap[VI->first] = VI->second;
- L->addBasicBlockToLoop(New, LI->getBase());
-
// Add phi entries for newly created values to all exit blocks.
for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
SI != SE; ++SI) {
@@ -423,15 +460,21 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
}
// Merge adjacent basic blocks, if possible.
+ SmallPtrSet<Loop *, 4> ForgottenLoops;
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
if (Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
+ if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM,
+ ForgottenLoops))
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
}
}
+ // FIXME: We could register any cloned assumptions instead of clearing the
+ // whole function's cache.
+ AT->forgetCachedAssumptions(F);
+
DominatorTree *DT = nullptr;
if (PP) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
@@ -443,7 +486,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
}
// Simplify any new induction variables in the partially unrolled loop.
- ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
if (SE && !CompletelyUnroll) {
SmallVector<WeakVH, 16> DeadInsts;
simplifyLoopIVs(L, SE, LPM, DeadInsts);
@@ -492,8 +534,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (OuterL) {
DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL);
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AT);
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index a96c46a..3d91336 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -57,7 +58,7 @@ STATISTIC(NumRuntimeUnrolled,
static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
- ValueToValueMapTy &LVMap, Pass *P) {
+ ValueToValueMapTy &VMap, Pass *P) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
@@ -86,7 +87,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
Value *V = PN->getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V)) {
if (L->contains(I)) {
- V = LVMap[I];
+ V = VMap[I];
}
}
// Adding a value to the new PHI node from the last prolog block
@@ -127,76 +128,122 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
}
/// Create a clone of the blocks in a loop and connect them together.
-/// This function doesn't create a clone of the loop structure.
+/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
+/// loop will be created including all cloned blocks, and the iterator of it
+/// switches to count NewIter down to 0.
///
-/// There are two value maps that are defined and used. VMap is
-/// for the values in the current loop instance. LVMap contains
-/// the values from the last loop instance. We need the LVMap values
-/// to update the initial values for the current loop instance.
-///
-static void CloneLoopBlocks(Loop *L,
- bool FirstCopy,
- BasicBlock *InsertTop,
- BasicBlock *InsertBot,
+static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
+ BasicBlock *InsertTop, BasicBlock *InsertBot,
std::vector<BasicBlock *> &NewBlocks,
- LoopBlocksDFS &LoopBlocks,
- ValueToValueMapTy &VMap,
- ValueToValueMapTy &LVMap,
+ LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
LoopInfo *LI) {
-
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Function *F = Header->getParent();
LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ Loop *NewLoop = 0;
+ Loop *ParentLoop = L->getParentLoop();
+ if (!UnrollProlog) {
+ NewLoop = new Loop();
+ if (ParentLoop)
+ ParentLoop->addChildLoop(NewLoop);
+ else
+ LI->addTopLevelLoop(NewLoop);
+ }
+
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
- BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
NewBlocks.push_back(NewBB);
- if (Loop *ParentLoop = L->getParentLoop())
+ if (NewLoop)
+ NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ else if (ParentLoop)
ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
VMap[*BB] = NewBB;
if (Header == *BB) {
// For the first block, add a CFG connection to this newly
- // created block
+ // created block.
InsertTop->getTerminator()->setSuccessor(0, NewBB);
- // Change the incoming values to the ones defined in the
- // previously cloned loop.
- for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
- PHINode *NewPHI = cast<PHINode>(VMap[I]);
- if (FirstCopy) {
- // We replace the first phi node with the value from the preheader
- VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
- NewBB->getInstList().erase(NewPHI);
- } else {
- // Update VMap with values from the previous block
- unsigned idx = NewPHI->getBasicBlockIndex(Latch);
- Value *InVal = NewPHI->getIncomingValue(idx);
- if (Instruction *I = dyn_cast<Instruction>(InVal))
- if (L->contains(I))
- InVal = LVMap[InVal];
- NewPHI->setIncomingValue(idx, InVal);
- NewPHI->setIncomingBlock(idx, InsertTop);
- }
- }
}
-
if (Latch == *BB) {
+ // For the last block, if UnrollProlog is true, create a direct jump to
+ // InsertBot. If not, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
- NewBB->getTerminator()->eraseFromParent();
- BranchInst::Create(InsertBot, NewBB);
+ BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
+ BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
+ if (UnrollProlog) {
+ LatchBR->eraseFromParent();
+ BranchInst::Create(InsertBot, NewBB);
+ } else {
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
+ FirstLoopBB->getFirstNonPHI());
+ IRBuilder<> Builder(LatchBR);
+ Value *IdxSub =
+ Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+ NewIdx->getName() + ".sub");
+ Value *IdxCmp =
+ Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
+ BranchInst::Create(FirstLoopBB, InsertBot, IdxCmp, NewBB);
+ NewIdx->addIncoming(NewIter, InsertTop);
+ NewIdx->addIncoming(IdxSub, NewBB);
+ LatchBR->eraseFromParent();
+ }
}
}
- // LastValueMap is updated with the values for the current loop
- // which are used the next time this function is called.
- for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
- VI != VE; ++VI) {
- LVMap[VI->first] = VI->second;
+
+ // Change the incoming values to the ones defined in the preheader or
+ // cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ if (UnrollProlog) {
+ VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+ } else {
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ NewPHI->setIncomingBlock(idx, NewLatch);
+ if (VMap[InVal])
+ NewPHI->setIncomingValue(idx, VMap[InVal]);
+ }
+ }
+ if (NewLoop) {
+ // Add unroll disable metadata to disable future unrolling for this loop.
+ SmallVector<Value *, 4> Vals;
+ // Reserve first location for self reference to the LoopID metadata node.
+ Vals.push_back(nullptr);
+ MDNode *LoopID = NewLoop->getLoopID();
+ if (LoopID) {
+ // First remove any existing loop unrolling metadata.
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ bool IsUnrollMetadata = false;
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
+ }
+ if (!IsUnrollMetadata) Vals.push_back(LoopID->getOperand(i));
+ }
+ }
+
+ LLVMContext &Context = NewLoop->getHeader()->getContext();
+ SmallVector<Value *, 1> DisableOperands;
+ DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ Vals.push_back(DisableNode);
+
+ MDNode *NewLoopID = MDNode::get(Context, Vals);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ NewLoop->setLoopID(NewLoopID);
}
}
@@ -212,18 +259,16 @@ static void CloneLoopBlocks(Loop *L,
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
/// the switch instruction is generated.
///
-/// extraiters = tripcount % loopfactor
-/// if (extraiters == 0) jump Loop:
-/// if (extraiters == loopfactor) jump L1
-/// if (extraiters == loopfactor-1) jump L2
-/// ...
-/// L1: LoopBody;
-/// L2: LoopBody;
-/// ...
-/// if tripcount < loopfactor jump End
-/// Loop:
-/// ...
-/// End:
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// else jump Prol
+/// Prol: LoopBody;
+/// extraiters -= 1 // Omitted if unroll factor is 2.
+/// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
+/// if (tripcount < loopfactor) jump End
+/// Loop:
+/// ...
+/// End:
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
LPPassManager *LPM) {
@@ -250,6 +295,10 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
return false;
+ // If BECount is INT_MAX, we can't compute trip-count without overflow.
+ if (BECount->isAllOnesValue())
+ return false;
+
// Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC =
SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
@@ -284,26 +333,21 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
IRBuilder<> B(PreHeaderBR);
Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
- // Check if for no extra iterations, then jump to unrolled loop. We have to
- // check that the trip count computation didn't overflow when adding one to
- // the backedge taken count.
+ // Check if for no extra iterations, then jump to cloned/unrolled loop.
+ // We have to check that the trip count computation didn't overflow when
+ // adding one to the backedge taken count.
Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
- // Branch to either the extra iterations or the unrolled loop
+ // Branch to either the extra iterations or the cloned/unrolled loop
// We will fix up the true branch label when adding loop body copies
BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
assert(PreHeaderBR->isUnconditional() &&
PreHeaderBR->getSuccessor(0) == PEnd &&
"CFG edges in Preheader are not correct");
PreHeaderBR->eraseFromParent();
-
- ValueToValueMapTy LVMap;
Function *F = Header->getParent();
- // These variables are used to update the CFG links in each iteration
- BasicBlock *CompareBB = nullptr;
- BasicBlock *LastLoopBB = PH;
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog code
LoopBlocksDFS LoopBlocks(L);
@@ -314,62 +358,39 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// and generate a condition that branches to the copy depending on the
// number of 'left over' iterations.
//
- for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
- std::vector<BasicBlock*> NewBlocks;
- ValueToValueMapTy VMap;
-
- // Clone all the basic blocks in the loop, but we don't clone the loop
- // This function adds the appropriate CFG connections.
- CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
- LoopBlocks, VMap, LVMap, LI);
- LastLoopBB = cast<BasicBlock>(VMap[Latch]);
-
- // Insert the cloned blocks into function just before the original loop
- F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
- NewBlocks[0], F->end());
-
- // Generate the code for the comparison which determines if the loop
- // prolog code needs to be executed.
- if (leftOverIters == Count-1) {
- // There is no compare block for the fall-thru case when for the last
- // left over iteration
- CompareBB = NewBlocks[0];
- } else {
- // Create a new block for the comparison
- BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
- F, CompareBB);
- if (Loop *ParentLoop = L->getParentLoop()) {
- // Add the new block to the parent loop, if needed
- ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
- }
-
- // The comparison w/ the extra iteration value and branch
- Type *CountTy = TripCount->getType();
- Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
- ConstantInt::get(CountTy, leftOverIters),
- "un.tmp");
- // Branch to either the extra iterations or the unrolled loop
- BranchInst::Create(NewBlocks[0], CompareBB,
- BranchVal, NewBB);
- CompareBB = NewBB;
- PH->getTerminator()->setSuccessor(0, NewBB);
- VMap[NewPH] = CompareBB;
- }
-
- // Rewrite the cloned instruction operands to use the values
- // created when the clone is created.
- for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
- for (BasicBlock::iterator I = NewBlocks[i]->begin(),
- E = NewBlocks[i]->end(); I != E; ++I) {
- RemapInstruction(I, VMap,
- RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
- }
+ std::vector<BasicBlock *> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // If unroll count is 2 and we can't overflow in tripcount computation (which
+ // is BECount + 1), then we don't need a loop for prologue, and we can unroll
+ // it. We can be sure that we don't overflow only if tripcount is a constant.
+ bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
+
+ // Clone all the basic blocks in the loop. If Count is 2, we don't clone
+ // the loop, otherwise we create a cloned loop to execute the extra
+ // iterations. This function adds the appropriate CFG connections.
+ CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
+ VMap, LI);
+
+ // Insert the cloned blocks into function just before the original loop
+ F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],
+ F->end());
+
+ // Rewrite the cloned instruction operands to use the values
+ // created when the clone is created.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end();
+ I != E; ++I) {
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
}
}
// Connect the prolog code to the original loop and update the
// PHI functions.
- ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+ BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+ ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
LPM->getAsPass());
NumRuntimeUnrolled++;
return true;
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index eac693b..a0105c2 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -67,8 +67,8 @@ namespace {
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
- Value *Val, BasicBlock *OrigBlock,
- BasicBlock *Default);
+ Value *Val, BasicBlock *Predecessor,
+ BasicBlock *OrigBlock, BasicBlock *Default);
BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
BasicBlock *Default);
unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
@@ -131,6 +131,28 @@ static raw_ostream& operator<<(raw_ostream &O,
return O << "]";
}
+/// \brief Update the first occurrence of the "switch statement" BB in the PHI
+/// node with the "new" BB. The other occurrences will be updated by subsequent
+/// calls to this function.
+///
+/// Switch statements may have more than one incoming edge into the same BB if
+/// they all have the same value. When the switch statement is converted these
+/// incoming edges are now coming from multiple BBs.
+static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB) {
+ for (BasicBlock::iterator I = SuccBB->begin(), E = SuccBB->getFirstNonPHI();
+ I != E; ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // Only update the first occurence.
+ for (unsigned Idx = 0, E = PN->getNumIncomingValues(); Idx != E; ++Idx) {
+ if (PN->getIncomingBlock(Idx) == OrigBB) {
+ PN->setIncomingBlock(Idx, NewBB);
+ break;
+ }
+ }
+ }
+}
+
// switchConvert - Convert the switch statement into a binary lookup of
// the case values. The function recursively builds this tree.
// LowerBound and UpperBound are used to keep track of the bounds for Val
@@ -139,6 +161,7 @@ static raw_ostream& operator<<(raw_ostream &O,
BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound,
ConstantInt *UpperBound, Value *Val,
+ BasicBlock *Predecessor,
BasicBlock *OrigBlock,
BasicBlock *Default) {
unsigned Size = End - Begin;
@@ -149,6 +172,7 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
// emitting the code that checks if the value actually falls in the range
// because the bounds already tell us so.
if (Begin->Low == LowerBound && Begin->High == UpperBound) {
+ fixPhis(Begin->BB, OrigBlock, Predecessor);
return Begin->BB;
}
return newLeafBlock(*Begin, Val, OrigBlock, Default);
@@ -200,21 +224,25 @@ BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
dbgs() << "NONE\n";
});
- BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
- NewUpperBound, Val, OrigBlock, Default);
- BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
- UpperBound, Val, OrigBlock, Default);
-
// Create a new node that checks if the value is < pivot. Go to the
// left branch if it is and right branch if not.
Function* F = OrigBlock->getParent();
BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
- Function::iterator FI = OrigBlock;
- F->getBasicBlockList().insert(++FI, NewNode);
ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
Val, Pivot.Low, "Pivot");
+
+ BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
+ NewUpperBound, Val, NewNode, OrigBlock,
+ Default);
+ BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
+ UpperBound, Val, NewNode, OrigBlock,
+ Default);
+
+ Function::iterator FI = OrigBlock;
+ F->getBasicBlockList().insert(++FI, NewNode);
NewNode->getInstList().push_back(Comp);
+
BranchInst::Create(LBranch, RBranch, Comp, NewNode);
return NewNode;
}
@@ -386,7 +414,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
}
BasicBlock *SwitchBlock =
switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
- OrigBlock, NewDefault);
+ OrigBlock, OrigBlock, NewDefault);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 189caa7..477ee7a 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -38,6 +39,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms
@@ -51,6 +53,7 @@ namespace {
char PromotePass::ID = 0;
INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
@@ -63,6 +66,7 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
while (1) {
Allocas.clear();
@@ -76,7 +80,7 @@ bool PromotePass::runOnFunction(Function &F) {
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT);
+ PromoteMemToReg(Allocas, DT, nullptr, AT);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index d9dbbca..35c701e 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -78,7 +78,7 @@ void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
}
GlobalVariable *
-llvm::collectUsedGlobalVariables(Module &M, SmallPtrSet<GlobalValue *, 8> &Set,
+llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set,
bool CompilerUsed) {
const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
GlobalVariable *GV = M.getGlobalVariable(Name);
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 06d73fe..1fd7071 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -238,6 +238,9 @@ struct PromoteMem2Reg {
/// An AliasSetTracker object to update. If null, don't update it.
AliasSetTracker *AST;
+ /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
+ AssumptionTracker *AT;
+
/// Reverse mapping of Allocas.
DenseMap<AllocaInst *, unsigned> AllocaLookup;
@@ -279,9 +282,9 @@ struct PromoteMem2Reg {
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST)
+ AliasSetTracker *AST, AssumptionTracker *AT)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
- DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {}
+ DIB(*DT.getRoot()->getParent()->getParent()), AST(AST), AT(AT) {}
void run();
@@ -302,8 +305,8 @@ private:
void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info);
void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
- SmallPtrSet<BasicBlock *, 32> &LiveInBlocks);
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncVals,
std::vector<RenamePassData> &Worklist);
@@ -685,7 +688,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
@@ -766,8 +769,8 @@ void PromoteMem2Reg::run() {
/// inserted phi nodes would be dead).
void PromoteMem2Reg::ComputeLiveInBlocks(
AllocaInst *AI, AllocaInfo &Info,
- const SmallPtrSet<BasicBlock *, 32> &DefBlocks,
- SmallPtrSet<BasicBlock *, 32> &LiveInBlocks) {
+ const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
+ SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
@@ -816,7 +819,7 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
// The block really is live in here, insert it into the set. If already in
// the set, then it has already been processed.
- if (!LiveInBlocks.insert(BB))
+ if (!LiveInBlocks.insert(BB).second)
continue;
// Since the value is live into BB, it is either defined in a predecessor or
@@ -857,10 +860,8 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
less_second> IDFPriorityQueue;
IDFPriorityQueue PQ;
- for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
- E = DefBlocks.end();
- I != E; ++I) {
- if (DomTreeNode *Node = DT.getNode(*I))
+ for (BasicBlock *BB : DefBlocks) {
+ if (DomTreeNode *Node = DT.getNode(BB))
PQ.push(std::make_pair(Node, DomLevels[Node]));
}
@@ -898,7 +899,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
if (SuccLevel > RootLevel)
continue;
- if (!Visited.insert(SuccNode))
+ if (!Visited.insert(SuccNode).second)
continue;
BasicBlock *SuccBB = SuccNode->getBlock();
@@ -1003,7 +1004,7 @@ NextIteration:
}
// Don't revisit blocks.
- if (!Visited.insert(BB))
+ if (!Visited.insert(BB).second)
return;
for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
@@ -1060,17 +1061,17 @@ NextIteration:
++I;
for (; I != E; ++I)
- if (VisitedSuccs.insert(*I))
+ if (VisitedSuccs.insert(*I).second)
Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
goto NextIteration;
}
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST) {
+ AliasSetTracker *AST, AssumptionTracker *AT) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
- PromoteMem2Reg(Allocas, DT, AST).run();
+ PromoteMem2Reg(Allocas, DT, AST, AT).run();
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 960b198..92fd56a 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -43,6 +43,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <map>
#include <set>
@@ -68,12 +70,23 @@ static cl::opt<bool> HoistCondStores(
cl::desc("Hoist conditional stores if an unconditional store precedes"));
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
+STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
+ // The first field contains the value that the switch produces when a certain
+ // case group is selected, and the second field is a vector containing the cases
+ // composing the case group.
+ typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
+ SwitchCaseResultVectorTy;
+ // The first field contains the phi node that generates a result of the switch
+ // and the second field contains the value generated for a certain case in the switch
+ // for that PHI.
+ typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
+
/// ValueEqualityComparisonCase - Represents a case of a switch.
struct ValueEqualityComparisonCase {
ConstantInt *Value;
@@ -92,7 +105,9 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
+ unsigned BonusInstThreshold;
const DataLayout *const DL;
+ AssumptionTracker *AT;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<ValueEqualityComparisonCase> &Cases);
@@ -111,8 +126,9 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *DL)
- : TTI(TTI), DL(DL) {}
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
+ const DataLayout *DL, AssumptionTracker *AT)
+ : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AT(AT) {}
bool run(BasicBlock *BB);
};
}
@@ -256,7 +272,7 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
/// V plus its non-dominating operands. If that cost is greater than
/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- SmallPtrSet<Instruction*, 4> *AggressiveInsts,
+ SmallPtrSetImpl<Instruction*> *AggressiveInsts,
unsigned &CostRemaining,
const DataLayout *DL) {
Instruction *I = dyn_cast<Instruction>(V);
@@ -341,114 +357,177 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
return nullptr;
}
-/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
-/// collection of icmp eq/ne instructions that compare a value against a
-/// constant, return the value being compared, and stick the constant into the
-/// Values vector.
-static Value *
-GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
- const DataLayout *DL, bool isEQ, unsigned &UsedICmps) {
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I) return nullptr;
-
- // If this is an icmp against a constant, handle this as one of the cases.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
- if (ConstantInt *C = GetConstantInt(I->getOperand(1), DL)) {
- Value *RHSVal;
- ConstantInt *RHSC;
-
- if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
- // (x & ~2^x) == y --> x == y || x == y|2^x
- // This undoes a transformation done by instcombine to fuse 2 compares.
- if (match(ICI->getOperand(0),
- m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
- APInt Not = ~RHSC->getValue();
- if (Not.isPowerOf2()) {
- Vals.push_back(C);
- Vals.push_back(
- ConstantInt::get(C->getContext(), C->getValue() | Not));
- UsedICmps++;
- return RHSVal;
- }
- }
+namespace {
+
+/// Given a chain of or (||) or and (&&) comparison of a value against a
+/// constant, this will try to recover the information required for a switch
+/// structure.
+/// It will depth-first traverse the chain of comparison, seeking for patterns
+/// like %a == 12 or %a < 4 and combine them to produce a set of integer
+/// representing the different cases for the switch.
+/// Note that if the chain is composed of '||' it will build the set of elements
+/// that matches the comparisons (i.e. any of this value validate the chain)
+/// while for a chain of '&&' it will build the set elements that make the test
+/// fail.
+struct ConstantComparesGatherer {
+
+ Value *CompValue; /// Value found for the switch comparison
+ Value *Extra; /// Extra clause to be checked before the switch
+ SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
+ unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
+
+ /// Construct and compute the result for the comparison instruction Cond
+ ConstantComparesGatherer(Instruction *Cond, const DataLayout *DL)
+ : CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
+ gather(Cond, DL);
+ }
- UsedICmps++;
- Vals.push_back(C);
- return I->getOperand(0);
+ /// Prevent copy
+ ConstantComparesGatherer(const ConstantComparesGatherer &)
+ LLVM_DELETED_FUNCTION;
+ ConstantComparesGatherer &
+ operator=(const ConstantComparesGatherer &) LLVM_DELETED_FUNCTION;
+
+private:
+
+ /// Try to set the current value used for the comparison, it succeeds only if
+ /// it wasn't set before or if the new value is the same as the old one
+ bool setValueOnce(Value *NewVal) {
+ if(CompValue && CompValue != NewVal) return false;
+ CompValue = NewVal;
+ return (CompValue != nullptr);
+ }
+
+ /// Try to match Instruction "I" as a comparison against a constant and
+ /// populates the array Vals with the set of values that match (or do not
+ /// match depending on isEQ).
+ /// Return false on failure. On success, the Value the comparison matched
+ /// against is placed in CompValue.
+ /// If CompValue is already set, the function is expected to fail if a match
+ /// is found but the value compared to is different.
+ bool matchInstruction(Instruction *I, const DataLayout *DL, bool isEQ) {
+ // If this is an icmp against a constant, handle this as one of the cases.
+ ICmpInst *ICI;
+ ConstantInt *C;
+ if (!((ICI = dyn_cast<ICmpInst>(I)) &&
+ (C = GetConstantInt(I->getOperand(1), DL)))) {
+ return false;
+ }
+
+ Value *RHSVal;
+ ConstantInt *RHSC;
+
+ // Pattern match a special case
+ // (x & ~2^x) == y --> x == y || x == y|2^x
+ // This undoes a transformation done by instcombine to fuse 2 compares.
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ if (match(ICI->getOperand(0),
+ m_And(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
+ APInt Not = ~RHSC->getValue();
+ if (Not.isPowerOf2()) {
+ // If we already have a value for the switch, it has to match!
+ if(!setValueOnce(RHSVal))
+ return false;
+
+ Vals.push_back(C);
+ Vals.push_back(ConstantInt::get(C->getContext(),
+ C->getValue() | Not));
+ UsedICmps++;
+ return true;
+ }
}
- // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
- // the set.
- ConstantRange Span =
- ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
-
- // Shift the range if the compare is fed by an add. This is the range
- // compare idiom as emitted by instcombine.
- bool hasAdd =
- match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)));
- if (hasAdd)
- Span = Span.subtract(RHSC->getValue());
-
- // If this is an and/!= check then we want to optimize "x ugt 2" into
- // x != 0 && x != 1.
- if (!isEQ)
- Span = Span.inverse();
-
- // If there are a ton of values, we don't want to make a ginormous switch.
- if (Span.getSetSize().ugt(8) || Span.isEmptySet())
- return nullptr;
-
- for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
- Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+ // If we already have a value for the switch, it has to match!
+ if(!setValueOnce(ICI->getOperand(0)))
+ return false;
+
UsedICmps++;
- return hasAdd ? RHSVal : I->getOperand(0);
+ Vals.push_back(C);
+ return ICI->getOperand(0);
}
- return nullptr;
- }
- // Otherwise, we can only handle an | or &, depending on isEQ.
- if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
- return nullptr;
+ // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
+ ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(),
+ C->getValue());
- unsigned NumValsBeforeLHS = Vals.size();
- unsigned UsedICmpsBeforeLHS = UsedICmps;
- if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, DL,
- isEQ, UsedICmps)) {
- unsigned NumVals = Vals.size();
- unsigned UsedICmpsBeforeRHS = UsedICmps;
- if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
- isEQ, UsedICmps)) {
- if (LHS == RHS)
- return LHS;
- Vals.resize(NumVals);
- UsedICmps = UsedICmpsBeforeRHS;
+ // Shift the range if the compare is fed by an add. This is the range
+ // compare idiom as emitted by instcombine.
+ Value *CandidateVal = I->getOperand(0);
+ if(match(I->getOperand(0), m_Add(m_Value(RHSVal), m_ConstantInt(RHSC)))) {
+ Span = Span.subtract(RHSC->getValue());
+ CandidateVal = RHSVal;
}
- // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
- // set it and return success.
- if (Extra == nullptr || Extra == I->getOperand(1)) {
- Extra = I->getOperand(1);
- return LHS;
+ // If this is an and/!= check, then we are looking to build the set of
+ // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.getSetSize().ugt(8) || Span.isEmptySet()) {
+ return false;
}
- Vals.resize(NumValsBeforeLHS);
- UsedICmps = UsedICmpsBeforeLHS;
- return nullptr;
+ // If we already have a value for the switch, it has to match!
+ if(!setValueOnce(CandidateVal))
+ return false;
+
+ // Add all values from the range to the set
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
+
+ UsedICmps++;
+ return true;
+
}
- // If the LHS can't be folded in, but Extra is available and RHS can, try to
- // use LHS as Extra.
- if (Extra == nullptr || Extra == I->getOperand(0)) {
- Value *OldExtra = Extra;
- Extra = I->getOperand(0);
- if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
- isEQ, UsedICmps))
- return RHS;
- assert(Vals.size() == NumValsBeforeLHS);
- Extra = OldExtra;
+ /// gather - Given a potentially 'or'd or 'and'd together collection of icmp
+ /// eq/ne/lt/gt instructions that compare a value against a constant, extract
+ /// the value being compared, and stick the list constants into the Vals
+ /// vector.
+ /// One "Extra" case is allowed to differ from the other.
+ void gather(Value *V, const DataLayout *DL) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ bool isEQ = (I->getOpcode() == Instruction::Or);
+
+ // Keep a stack (SmallVector for efficiency) for depth-first traversal
+ SmallVector<Value *, 8> DFT;
+
+ // Initialize
+ DFT.push_back(V);
+
+ while(!DFT.empty()) {
+ V = DFT.pop_back_val();
+
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If it is a || (or && depending on isEQ), process the operands.
+ if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
+ DFT.push_back(I->getOperand(1));
+ DFT.push_back(I->getOperand(0));
+ continue;
+ }
+
+ // Try to match the current instruction
+ if (matchInstruction(I, DL, isEQ))
+ // Match succeed, continue the loop
+ continue;
+ }
+
+ // One element of the sequence of || (or &&) could not be match as a
+ // comparison against the same value as the others.
+ // We allow only one "Extra" case to be checked before the switch
+ if (!Extra) {
+ Extra = V;
+ continue;
+ }
+ // Failed to parse a proper sequence, abort now
+ CompValue = nullptr;
+ break;
+ }
}
+};
- return nullptr;
}
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
@@ -628,7 +707,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// Collect branch weights into a vector.
SmallVector<uint32_t, 8> Weights;
- MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
if (HasWeight)
for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
@@ -723,7 +802,7 @@ static int ConstantIntSortPredicate(ConstantInt *const *P1,
}
static inline bool HasBranchWeights(const Instruction* I) {
- MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+ MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
if (ProfMD && ProfMD->getOperand(0))
if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
return MDS->getString().equals("branch_weights");
@@ -736,7 +815,7 @@ static inline bool HasBranchWeights(const Instruction* I) {
/// metadata.
static void GetBranchWeights(TerminatorInst *TI,
SmallVectorImpl<uint64_t> &Weights) {
- MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
assert(MD);
for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
ConstantInt *CI = cast<ConstantInt>(MD->getOperand(i));
@@ -995,6 +1074,8 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
return true;
}
+static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
+
/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
/// BB2, hoist any common code in the two blocks up into the branch block. The
/// caller of this function guarantees that BI's block dominates BB1 and BB2.
@@ -1040,6 +1121,14 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull
+ };
+ combineMetadata(I1, I2, KnownIDs);
I2->eraseFromParent();
Changed = true;
@@ -1072,6 +1161,12 @@ HoistTerminator:
if (BB1V == BB2V)
continue;
+ // Check for passingValueIsAlwaysUndefined here because we would rather
+ // eliminate undefined control flow then converting it to a select.
+ if (passingValueIsAlwaysUndefined(BB1V, PN) ||
+ passingValueIsAlwaysUndefined(BB2V, PN))
+ return Changed;
+
if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL))
return Changed;
if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL))
@@ -1281,6 +1376,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
+ // TODO: Use combineMetadata here to preserve what metadata we can
+ // (analogous to the hoisting case above).
I2->eraseFromParent();
if (UpdateRE1)
@@ -1486,6 +1583,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (ThenV == OrigV)
continue;
+ // Don't convert to selects if we could remove undefined behavior instead.
+ if (passingValueIsAlwaysUndefined(OrigV, PN) ||
+ passingValueIsAlwaysUndefined(ThenV, PN))
+ return false;
+
HaveRewritablePHIs = true;
ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
@@ -1963,7 +2065,8 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
/// predecessor branches to us and one of our successors, fold the block into
/// the predecessor and use logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL,
+ unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
Instruction *Cond = nullptr;
@@ -2000,33 +2103,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) {
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
- // Only allow this if the condition is a simple instruction that can be
- // executed unconditionally. It must be in the same block as the branch, and
- // must be at the front of the block.
- BasicBlock::iterator FrontIt = BB->front();
-
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
-
- // Allow a single instruction to be hoisted in addition to the compare
- // that feeds the branch. We later ensure that any values that _it_ uses
- // were also live in the predecessor, so that we don't unnecessarily create
- // register pressure or inhibit out-of-order execution.
- Instruction *BonusInst = nullptr;
- if (&*FrontIt != Cond &&
- FrontIt->hasOneUse() && FrontIt->user_back() == Cond &&
- isSafeToSpeculativelyExecute(FrontIt, DL)) {
- BonusInst = &*FrontIt;
- ++FrontIt;
-
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
- }
-
- // Only a single bonus inst is allowed.
- if (&*FrontIt != Cond)
- return false;
-
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = Cond; ++CondIt;
@@ -2036,6 +2112,31 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) {
if (&*CondIt != BI)
return false;
+ // Only allow this transformation if computing the condition doesn't involve
+ // too many instructions and these involved instructions can be executed
+ // unconditionally. We denote all involved instructions except the condition
+ // as "bonus instructions", and only allow this transformation when the
+ // number of the bonus instructions does not exceed a certain threshold.
+ unsigned NumBonusInsts = 0;
+ for (auto I = BB->begin(); Cond != I; ++I) {
+ // Ignore dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL))
+ return false;
+ // I has only one use and can be executed unconditionally.
+ Instruction *User = dyn_cast<Instruction>(I->user_back());
+ if (User == nullptr || User->getParent() != BB)
+ return false;
+ // I is used in the same BB. Since BI uses Cond and doesn't have more slots
+ // to use any other instruction, User must be an instruction between next(I)
+ // and Cond.
+ ++NumBonusInsts;
+ // Early exits once we reach the limit.
+ if (NumBonusInsts > BonusInstThreshold)
+ return false;
+ }
+
// Cond is known to be a compare or binary operator. Check to make sure that
// neither operand is a potentially-trapping constant expression.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
@@ -2086,49 +2187,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) {
continue;
}
- // Ensure that any values used in the bonus instruction are also used
- // by the terminator of the predecessor. This means that those values
- // must already have been resolved, so we won't be inhibiting the
- // out-of-order core by speculating them earlier. We also allow
- // instructions that are used by the terminator's condition because it
- // exposes more merging opportunities.
- bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
- BonusInst->user_back() == Cond);
-
- if (BonusInst && !UsedByBranch) {
- // Collect the values used by the bonus inst
- SmallPtrSet<Value*, 4> UsedValues;
- for (Instruction::op_iterator OI = BonusInst->op_begin(),
- OE = BonusInst->op_end(); OI != OE; ++OI) {
- Value *V = *OI;
- if (!isa<Constant>(V) && !isa<Argument>(V))
- UsedValues.insert(V);
- }
-
- SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
- Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
-
- // Walk up to four levels back up the use-def chain of the predecessor's
- // terminator to see if all those values were used. The choice of four
- // levels is arbitrary, to provide a compile-time-cost bound.
- while (!Worklist.empty()) {
- std::pair<Value*, unsigned> Pair = Worklist.back();
- Worklist.pop_back();
-
- if (Pair.second >= 4) continue;
- UsedValues.erase(Pair.first);
- if (UsedValues.empty()) break;
-
- if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
- for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
- OI != OE; ++OI)
- Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
- }
- }
-
- if (!UsedValues.empty()) return false;
- }
-
DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
IRBuilder<> Builder(PBI);
@@ -2148,30 +2206,41 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) {
PBI->swapSuccessors();
}
- // If we have a bonus inst, clone it into the predecessor block.
- Instruction *NewBonus = nullptr;
- if (BonusInst) {
- NewBonus = BonusInst->clone();
+ // If we have bonus instructions, clone them into the predecessor block.
+ // Note that there may be mutliple predecessor blocks, so we cannot move
+ // bonus instructions to a predecessor block.
+ ValueToValueMapTy VMap; // maps original values to cloned values
+ // We already make sure Cond is the last instruction before BI. Therefore,
+ // every instructions before Cond other than DbgInfoIntrinsic are bonus
+ // instructions.
+ for (auto BonusInst = BB->begin(); Cond != BonusInst; ++BonusInst) {
+ if (isa<DbgInfoIntrinsic>(BonusInst))
+ continue;
+ Instruction *NewBonusInst = BonusInst->clone();
+ RemapInstruction(NewBonusInst, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+ VMap[BonusInst] = NewBonusInst;
// If we moved a load, we cannot any longer claim any knowledge about
// its potential value. The previous information might have been valid
// only given the branch precondition.
// For an analogous reason, we must also drop all the metadata whose
// semantics we don't understand.
- NewBonus->dropUnknownMetadata(LLVMContext::MD_dbg);
+ NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg);
- PredBlock->getInstList().insert(PBI, NewBonus);
- NewBonus->takeName(BonusInst);
- BonusInst->setName(BonusInst->getName()+".old");
+ PredBlock->getInstList().insert(PBI, NewBonusInst);
+ NewBonusInst->takeName(BonusInst);
+ BonusInst->setName(BonusInst->getName() + ".old");
}
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
Instruction *New = Cond->clone();
- if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+ RemapInstruction(New, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
PredBlock->getInstList().insert(PBI, New);
New->takeName(Cond);
- Cond->setName(New->getName()+".old");
+ Cond->setName(New->getName() + ".old");
if (BI->isConditional()) {
Instruction *NewCond =
@@ -2649,7 +2718,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(
ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
- const DataLayout *DL) {
+ unsigned BonusInstThreshold, const DataLayout *DL, AssumptionTracker *AT) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2682,7 +2751,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -2698,7 +2767,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -2759,24 +2828,17 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (!Cond) return false;
-
// Change br (X == 0 | X == 1), T, F into a switch instruction.
// If this is a bunch of seteq's or'd together, or if it's a bunch of
// 'setne's and'ed together, collect them.
- Value *CompVal = nullptr;
- std::vector<ConstantInt*> Values;
- bool TrueWhenEqual = true;
- Value *ExtraCase = nullptr;
- unsigned UsedICmps = 0;
-
- if (Cond->getOpcode() == Instruction::Or) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, true,
- UsedICmps);
- } else if (Cond->getOpcode() == Instruction::And) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, false,
- UsedICmps);
- TrueWhenEqual = false;
- }
+
+ // Try to gather values from a chain of and/or to be turned into a switch
+ ConstantComparesGatherer ConstantCompare(Cond, DL);
+ // Unpack the result
+ SmallVectorImpl<ConstantInt*> &Values = ConstantCompare.Vals;
+ Value *CompVal = ConstantCompare.CompValue;
+ unsigned UsedICmps = ConstantCompare.UsedICmps;
+ Value *ExtraCase = ConstantCompare.Extra;
// If we didn't have a multiply compared value, fail.
if (!CompVal) return false;
@@ -2785,6 +2847,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
if (UsedICmps <= 1)
return false;
+ bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
+
// There might be duplicate constants in the list, which the switch
// instruction can't handle, remove them now.
array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
@@ -3208,11 +3272,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
/// and use it to remove dead cases.
-static bool EliminateDeadSwitchCases(SwitchInst *SI) {
+static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL,
+ AssumptionTracker *AT) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
- computeKnownBits(Cond, KnownZero, KnownOne);
+ computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AT, SI);
// Gather dead cases.
SmallVector<ConstantInt*, 8> DeadCases;
@@ -3460,6 +3525,163 @@ GetCaseResults(SwitchInst *SI,
return Res.size() > 0;
}
+// MapCaseToResult - Helper function used to
+// add CaseVal to the list of cases that generate Result.
+static void MapCaseToResult(ConstantInt *CaseVal,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *Result) {
+ for (auto &I : UniqueResults) {
+ if (I.first == Result) {
+ I.second.push_back(CaseVal);
+ return;
+ }
+ }
+ UniqueResults.push_back(std::make_pair(Result,
+ SmallVector<ConstantInt*, 4>(1, CaseVal)));
+}
+
+// InitializeUniqueCases - Helper function that initializes a map containing
+// results for the PHI node of the common destination block for a switch
+// instruction. Returns false if multiple PHI nodes have been found or if
+// there is not a common destination block for the switch.
+static bool InitializeUniqueCases(
+ SwitchInst *SI, const DataLayout *DL, PHINode *&PHI,
+ BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult) {
+ for (auto &I : SI->cases()) {
+ ConstantInt *CaseVal = I.getCaseValue();
+
+ // Resulting value at phi nodes for this case value.
+ SwitchCaseResultsTy Results;
+ if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
+ DL))
+ return false;
+
+ // Only one value per case is permitted
+ if (Results.size() > 1)
+ return false;
+ MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
+
+ // Check the PHI consistency.
+ if (!PHI)
+ PHI = Results[0].first;
+ else if (PHI != Results[0].first)
+ return false;
+ }
+ // Find the default result value.
+ SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
+ DL);
+ // If the default value is not found abort unless the default destination
+ // is unreachable.
+ DefaultResult =
+ DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
+ if ((!DefaultResult &&
+ !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
+ return false;
+
+ return true;
+}
+
+// ConvertTwoCaseSwitch - Helper function that checks if it is possible to
+// transform a switch with only two cases (or two cases + default)
+// that produces a result into a value select.
+// Example:
+// switch (a) {
+// case 10: %0 = icmp eq i32 %a, 10
+// return 10; %1 = select i1 %0, i32 10, i32 4
+// case 20: ----> %2 = icmp eq i32 %a, 20
+// return 2; %3 = select i1 %2, i32 2, i32 %1
+// default:
+// return 4;
+// }
+static Value *
+ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
+ Constant *DefaultResult, Value *Condition,
+ IRBuilder<> &Builder) {
+ assert(ResultVector.size() == 2 &&
+ "We should have exactly two unique results at this point");
+ // If we are selecting between only two cases transform into a simple
+ // select or a two-way select if default is possible.
+ if (ResultVector[0].second.size() == 1 &&
+ ResultVector[1].second.size() == 1) {
+ ConstantInt *const FirstCase = ResultVector[0].second[0];
+ ConstantInt *const SecondCase = ResultVector[1].second[0];
+
+ bool DefaultCanTrigger = DefaultResult;
+ Value *SelectValue = ResultVector[1].first;
+ if (DefaultCanTrigger) {
+ Value *const ValueCompare =
+ Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
+ SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
+ DefaultResult, "switch.select");
+ }
+ Value *const ValueCompare =
+ Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
+ return Builder.CreateSelect(ValueCompare, ResultVector[0].first, SelectValue,
+ "switch.select");
+ }
+
+ return nullptr;
+}
+
+// RemoveSwitchAfterSelectConversion - Helper function to cleanup a switch
+// instruction that has been converted into a select, fixing up PHI nodes and
+// basic blocks.
+static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
+ Value *SelectValue,
+ IRBuilder<> &Builder) {
+ BasicBlock *SelectBB = SI->getParent();
+ while (PHI->getBasicBlockIndex(SelectBB) >= 0)
+ PHI->removeIncomingValue(SelectBB);
+ PHI->addIncoming(SelectValue, SelectBB);
+
+ Builder.CreateBr(PHI->getParent());
+
+ // Remove the switch.
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
+ BasicBlock *Succ = SI->getSuccessor(i);
+
+ if (Succ == PHI->getParent())
+ continue;
+ Succ->removePredecessor(SelectBB);
+ }
+ SI->eraseFromParent();
+}
+
+/// SwitchToSelect - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with only two different
+/// constant values, replace the switch with select.
+static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout *DL, AssumptionTracker *AT) {
+ Value *const Cond = SI->getCondition();
+ PHINode *PHI = nullptr;
+ BasicBlock *CommonDest = nullptr;
+ Constant *DefaultResult;
+ SwitchCaseResultVectorTy UniqueResults;
+ // Collect all the cases that will deliver the same value from the switch.
+ if (!InitializeUniqueCases(SI, DL, PHI, CommonDest, UniqueResults,
+ DefaultResult))
+ return false;
+ // Selects choose between maximum two values.
+ if (UniqueResults.size() != 2)
+ return false;
+ assert(PHI != nullptr && "PHI for value select not found");
+
+ Builder.SetInsertPoint(SI);
+ Value *SelectValue = ConvertTwoCaseSwitch(
+ UniqueResults,
+ DefaultResult, Cond, Builder);
+ if (SelectValue) {
+ RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
+ return true;
+ }
+ // The switch couldn't be converted into a select.
+ return false;
+}
+
namespace {
/// SwitchLookupTable - This class represents a lookup table that can be used
/// to replace a switch.
@@ -3493,6 +3715,11 @@ namespace {
// store that single value and return it for each lookup.
SingleValueKind,
+ // For tables where there is a linear relationship between table index
+ // and values. We calculate the result with a simple multiplication
+ // and addition instead of a table lookup.
+ LinearMapKind,
+
// For small tables with integer elements, we can pack them into a bitmap
// that fits into a target-legal register. Values are retrieved by
// shift and mask operations.
@@ -3510,6 +3737,10 @@ namespace {
ConstantInt *BitMap;
IntegerType *BitMapElementTy;
+ // For LinearMapKind, these are the constants used to derive the value.
+ ConstantInt *LinearOffset;
+ ConstantInt *LinearMultiplier;
+
// For ArrayKind, this is the array.
GlobalVariable *Array;
};
@@ -3522,7 +3753,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
Constant *DefaultValue,
const DataLayout *DL)
: SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
- Array(nullptr) {
+ LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
@@ -3567,6 +3798,43 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
return;
}
+ // Check if we can derive the value with a linear transformation from the
+ // table index.
+ if (isa<IntegerType>(ValueType)) {
+ bool LinearMappingPossible = true;
+ APInt PrevVal;
+ APInt DistToPrev;
+ assert(TableSize >= 2 && "Should be a SingleValue table.");
+ // Check if there is the same distance between two consecutive values.
+ for (uint64_t I = 0; I < TableSize; ++I) {
+ ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
+ if (!ConstVal) {
+ // This is an undef. We could deal with it, but undefs in lookup tables
+ // are very seldom. It's probably not worth the additional complexity.
+ LinearMappingPossible = false;
+ break;
+ }
+ APInt Val = ConstVal->getValue();
+ if (I != 0) {
+ APInt Dist = Val - PrevVal;
+ if (I == 1) {
+ DistToPrev = Dist;
+ } else if (Dist != DistToPrev) {
+ LinearMappingPossible = false;
+ break;
+ }
+ }
+ PrevVal = Val;
+ }
+ if (LinearMappingPossible) {
+ LinearOffset = cast<ConstantInt>(TableContents[0]);
+ LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
+ Kind = LinearMapKind;
+ ++NumLinearMaps;
+ return;
+ }
+ }
+
// If the type is integer and the table fits in a register, build a bitmap.
if (WouldFitInRegister(DL, TableSize, ValueType)) {
IntegerType *IT = cast<IntegerType>(ValueType);
@@ -3602,6 +3870,16 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
switch (Kind) {
case SingleValueKind:
return SingleValue;
+ case LinearMapKind: {
+ // Derive the result value from the input value.
+ Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
+ false, "switch.idx.cast");
+ if (!LinearMultiplier->isOne())
+ Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
+ if (!LinearOffset->isZero())
+ Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
+ return Result;
+ }
case BitMapKind: {
// Type of the bitmap (e.g. i59).
IntegerType *MapTy = BitMap->getType();
@@ -3624,6 +3902,16 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
"switch.masked");
}
case ArrayKind: {
+ // Make sure the table index will not overflow when treated as signed.
+ IntegerType *IT = cast<IntegerType>(Index->getType());
+ uint64_t TableSize = Array->getInitializer()->getType()
+ ->getArrayNumElements();
+ if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
+ Index = Builder.CreateZExt(Index,
+ IntegerType::get(IT->getContext(),
+ IT->getBitWidth() + 1),
+ "switch.tableidx.zext");
+
Value *GEPIndices[] = { Builder.getInt32(0), Index };
Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
"switch.gep");
@@ -3663,9 +3951,8 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
bool AllTablesFitInRegister = true;
bool HasIllegalType = false;
- for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
- E = ResultTypes.end(); I != E; ++I) {
- Type *Ty = I->second;
+ for (const auto &I : ResultTypes) {
+ Type *Ty = I.second;
// Saturate this flag to true.
HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
@@ -3749,16 +4036,17 @@ static bool SwitchToLookupTable(SwitchInst *SI,
return false;
// Append the result from this case to the list for each phi.
- for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
- if (!ResultLists.count(I->first))
- PHIs.push_back(I->first);
- ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+ for (const auto &I : Results) {
+ PHINode *PHI = I.first;
+ Constant *Value = I.second;
+ if (!ResultLists.count(PHI))
+ PHIs.push_back(PHI);
+ ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
}
}
// Keep track of the result types.
- for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
- PHINode *PHI = PHIs[I];
+ for (PHINode *PHI : PHIs) {
ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
}
@@ -3775,6 +4063,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
&CommonDest, DefaultResultsList, DL);
}
+
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
@@ -3784,9 +4073,9 @@ static bool SwitchToLookupTable(SwitchInst *SI,
return false;
}
- for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
- PHINode *PHI = DefaultResultsList[I].first;
- Constant *Result = DefaultResultsList[I].second;
+ for (const auto &I : DefaultResultsList) {
+ PHINode *PHI = I.first;
+ Constant *Result = I.second;
DefaultResults[PHI] = Result;
}
@@ -3820,10 +4109,13 @@ static bool SwitchToLookupTable(SwitchInst *SI,
const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
if (GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
- SI->getDefaultDest()->removePredecessor(SI->getParent());
+ // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
+ // do not delete PHINodes here.
+ SI->getDefaultDest()->removePredecessor(SI->getParent(),
+ true/*DontDeleteUselessPHIs*/);
} else {
Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
- MinCaseVal->getType(), TableSize));
+ MinCaseVal->getType(), TableSize));
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
}
@@ -3841,9 +4133,12 @@ static bool SwitchToLookupTable(SwitchInst *SI,
CommonDest->getParent(),
CommonDest);
+ // Make the mask's bitwidth at least 8bit and a power-of-2 to avoid
+ // unnecessary illegal types.
+ uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
+ APInt MaskInt(TableSizePowOf2, 0);
+ APInt One(TableSizePowOf2, 1);
// Build bitmask; fill in a 1 bit for every case.
- APInt MaskInt(TableSize, 0);
- APInt One(TableSize, 1);
const ResultListTy &ResultList = ResultLists[PHIs[0]];
for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
uint64_t Idx = (ResultList[I].first->getValue() -
@@ -3919,12 +4214,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
// If the block only contains the switch, see if we can fold the block
// away into any preds.
@@ -3934,22 +4229,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
++BBI;
if (SI == &*BBI)
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
// Remove unreachable cases.
- if (EliminateDeadSwitchCases(SI))
- return SimplifyCFG(BB, TTI, DL) | true;
+ if (EliminateDeadSwitchCases(SI, DL, AT))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
+
+ if (SwitchToSelect(SI, Builder, DL, AT))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
if (ForwardSwitchConditionToPHI(SI))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
if (SwitchToLookupTable(SI, Builder, TTI, DL))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
return false;
}
@@ -3962,7 +4260,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
SmallPtrSet<Value *, 8> Succs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
BasicBlock *Dest = IBI->getDestination(i);
- if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
Dest->removePredecessor(BB);
IBI->removeDestination(i);
--i; --e;
@@ -3986,7 +4284,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
}
return Changed;
}
@@ -3998,7 +4296,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
- BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
@@ -4010,7 +4308,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, DL))
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI,
+ BonusInstThreshold, DL, AT))
return true;
}
@@ -4018,8 +4317,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, DL))
- return SimplifyCFG(BB, TTI, DL) | true;
+ if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
return false;
}
@@ -4034,7 +4333,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
@@ -4044,14 +4343,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
++I;
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())){
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
}
}
@@ -4062,8 +4361,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, DL))
- return SimplifyCFG(BB, TTI, DL) | true;
+ if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -4072,7 +4371,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
if (HoistThenElseCodeToIf(BI, DL))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
@@ -4080,7 +4379,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
@@ -4089,7 +4388,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
}
// If this is a branch on a phi node in the current block, thread control
@@ -4097,14 +4396,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, DL))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB, TTI, DL) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true;
return false;
}
@@ -4248,6 +4547,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const DataLayout *DL) {
- return SimplifyCFGOpt(TTI, DL).run(BB);
+ unsigned BonusInstThreshold,
+ const DataLayout *DL, AssumptionTracker *AT) {
+ return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AT).run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index b284e6f..a4fdd55 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -40,7 +40,7 @@ STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
namespace {
- /// SimplifyIndvar - This is a utility for simplifying induction variables
+ /// This is a utility for simplifying induction variables
/// based on ScalarEvolution. It is the primary instrument of the
/// IndvarSimplify pass, but it may also be directly invoked to cleanup after
/// other loop passes that preserve SCEV.
@@ -86,7 +86,7 @@ namespace {
};
}
-/// foldIVUser - Fold an IV operand into its use. This removes increments of an
+/// Fold an IV operand into its use. This removes increments of an
/// aligned IV when used by a instruction that ignores the low bits.
///
/// IVOperand is guaranteed SCEVable, but UseInst may not be.
@@ -152,7 +152,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
return IVSrc;
}
-/// eliminateIVComparison - SimplifyIVUsers helper for eliminating useless
+/// SimplifyIVUsers helper for eliminating useless
/// comparisons against an induction variable.
void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
unsigned IVOperIdx = 0;
@@ -188,7 +188,7 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
DeadInsts.push_back(ICmp);
}
-/// eliminateIVRemainder - SimplifyIVUsers helper for eliminating useless
+/// SimplifyIVUsers helper for eliminating useless
/// remainder operations operating on an induction variable.
void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
Value *IVOperand,
@@ -239,7 +239,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
DeadInsts.push_back(Rem);
}
-/// eliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// Eliminate an operation that consumes a simple IV and has
/// no observable side-effect given the range of IV values.
/// IVOperand is guaranteed SCEVable, but UseInst may not be.
bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
@@ -334,8 +334,7 @@ Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
return AddInst;
}
-/// pushIVUsers - Add all uses of Def to the current IV's worklist.
-///
+/// Add all uses of Def to the current IV's worklist.
static void pushIVUsers(
Instruction *Def,
SmallPtrSet<Instruction*,16> &Simplified,
@@ -348,12 +347,12 @@ static void pushIVUsers(
// Also ensure unique worklist users.
// If Def is a LoopPhi, it may not be in the Simplified set, so check for
// self edges first.
- if (UI != Def && Simplified.insert(UI))
+ if (UI != Def && Simplified.insert(UI).second)
SimpleIVUsers.push_back(std::make_pair(UI, Def));
}
}
-/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// Return true if this instruction generates a simple SCEV
/// expression in terms of that IV.
///
/// This is similar to IVUsers' isInteresting() but processes each instruction
@@ -374,7 +373,7 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
return false;
}
-/// simplifyUsers - Iteratively perform simplification on a worklist of users
+/// Iteratively perform simplification on a worklist of users
/// of the specified induction variable. Each successive simplification may push
/// more users which may themselves be candidates for simplification.
///
@@ -446,7 +445,7 @@ namespace llvm {
void IVVisitor::anchor() { }
-/// simplifyUsersOfIV - Simplify instructions that use this induction variable
+/// Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
@@ -457,7 +456,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
return SIV.hasChanged();
}
-/// simplifyLoopIVs - Simplify users of induction variables within this
+/// Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
SmallVectorImpl<WeakVH> &Dead) {
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 33b3637..5632095 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -41,6 +42,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -52,6 +54,7 @@ namespace {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -68,7 +71,7 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AT)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
Next->insert(cast<Instruction>(U));
@@ -101,6 +104,7 @@ namespace {
char InstSimplifier::ID = 0;
INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 3b61bb5..a39f128 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -27,65 +27,43 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
+using namespace PatternMatch;
static cl::opt<bool>
-ColdErrorCalls("error-reporting-is-cold", cl::init(true),
- cl::Hidden, cl::desc("Treat error-reporting calls as cold"));
-
-/// This class is the abstract base class for the set of optimizations that
-/// corresponds to one library call.
-namespace {
-class LibCallOptimization {
-protected:
- Function *Caller;
- const DataLayout *DL;
- const TargetLibraryInfo *TLI;
- const LibCallSimplifier *LCS;
- LLVMContext* Context;
-public:
- LibCallOptimization() { }
- virtual ~LibCallOptimization() {}
-
- /// callOptimizer - This pure virtual method is implemented by base classes to
- /// do various optimizations. If this returns null then no transformation was
- /// performed. If it returns CI, then it transformed the call and CI is to be
- /// deleted. If it returns something else, replace CI with the new value and
- /// delete CI.
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
- =0;
-
- /// ignoreCallingConv - Returns false if this transformation could possibly
- /// change the calling convention.
- virtual bool ignoreCallingConv() { return false; }
-
- Value *optimizeCall(CallInst *CI, const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- const LibCallSimplifier *LCS, IRBuilder<> &B) {
- Caller = CI->getParent()->getParent();
- this->DL = DL;
- this->TLI = TLI;
- this->LCS = LCS;
- if (CI->getCalledFunction())
- Context = &CI->getCalledFunction()->getContext();
+ ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden,
+ cl::desc("Treat error-reporting calls as cold"));
- // We never change the calling convention.
- if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
- return nullptr;
+static cl::opt<bool>
+ EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
- return callOptimizer(CI->getCalledFunction(), CI, B);
- }
-};
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
+static bool ignoreCallingConv(LibFunc::Func Func) {
+ switch (Func) {
+ case LibFunc::abs:
+ case LibFunc::labs:
+ case LibFunc::llabs:
+ case LibFunc::strlen:
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("All cases should be covered in the switch.");
+}
+
/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
/// value is equal or not-equal to zero.
static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
@@ -142,967 +120,912 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
// Fortified Library Call Optimizations
//===----------------------------------------------------------------------===//
-struct FortifiedLibCallOptimization : public LibCallOptimization {
-protected:
- virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
- bool isString) const = 0;
-};
-
-struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
- CallInst *CI;
-
- bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
- bool isString) const override {
- if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+static bool isFortifiedCallFoldable(CallInst *CI, unsigned SizeCIOp, unsigned SizeArgOp,
+ bool isString) {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+ if (SizeCI->isAllOnesValue())
return true;
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
- if (SizeCI->isAllOnesValue())
- return true;
- if (isString) {
- uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
- // If the length is 0 we don't know how long it is and so we can't
- // remove the check.
- if (Len == 0) return false;
- return SizeCI->getZExtValue() >= Len;
- }
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(
- CI->getArgOperand(SizeArgOp)))
- return SizeCI->getZExtValue() >= Arg->getZExtValue();
+ if (isString) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+ // If the length is 0 we don't know how long it is and so we can't
+ // remove the check.
+ if (Len == 0)
+ return false;
+ return SizeCI->getZExtValue() >= Len;
}
- return false;
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp)))
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
}
-};
-
-struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- this->CI = CI;
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(Context) ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
- return nullptr;
+ return false;
+}
- if (isFoldable(3, 2, false)) {
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
+Value *LibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return nullptr;
- }
-};
-
-struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- this->CI = CI;
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(Context) ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
- return nullptr;
-
- if (isFoldable(3, 2, false)) {
- B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
- return nullptr;
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
}
-};
-
-struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- this->CI = CI;
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
-
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(Context) ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
- return nullptr;
+ return nullptr;
+}
- if (isFoldable(3, 2, false)) {
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
- false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
+Value *LibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return nullptr;
+
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
}
-};
+ return nullptr;
+}
-struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- this->CI = CI;
- StringRef Name = Callee->getName();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
+Value *LibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
+ return nullptr;
- // Check if this has the right signature.
- if (FT->getNumParams() != 3 ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != DL->getIntPtrType(Context))
- return nullptr;
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+ }
+ return nullptr;
+}
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // __strcpy_chk(x,x) -> x
- return Src;
-
- // If a) we don't have any length information, or b) we know this will
- // fit then just lower to a plain strcpy. Otherwise we'll keep our
- // strcpy_chk call which may fail at runtime if the size is too long.
- // TODO: It might be nice to get a maximum length out of the possible
- // string lengths for varying.
- if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
- return Ret;
- } else {
- // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return nullptr;
-
- // This optimization require DataLayout.
- if (!DL) return nullptr;
-
- Value *Ret =
- EmitMemCpyChk(Dst, Src,
- ConstantInt::get(DL->getIntPtrType(Context), Len),
- CI->getArgOperand(2), B, DL, TLI);
- return Ret;
- }
+Value *LibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != DL->getIntPtrType(Context))
return nullptr;
- }
-};
-struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- this->CI = CI;
- StringRef Name = Callee->getName();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // __strcpy_chk(x,x) -> x
+ return Src;
+
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain strcpy. Otherwise we'll keep our
+ // strcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFortifiedCallFoldable(CI, 2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
- // Check if this has the right signature.
- if (FT->getNumParams() != 3 ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
+ // This optimization require DataLayout.
+ if (!DL)
return nullptr;
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
- }
+ Value *Ret = EmitMemCpyChk(
+ Dst, Src, ConstantInt::get(DL->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, DL, TLI);
+ return Ret;
+ }
+ return nullptr;
+}
- // If a) we don't have any length information, or b) we know this will
- // fit then just lower to a plain stpcpy. Otherwise we'll keep our
- // stpcpy_chk call which may fail at runtime if the size is too long.
- // TODO: It might be nice to get a maximum length out of the possible
- // string lengths for varying.
- if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
- return Ret;
- } else {
- // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return nullptr;
-
- // This optimization require DataLayout.
- if (!DL) return nullptr;
-
- Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
- Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(DL->getIntPtrType(PT),
- Len - 1));
- if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
- return nullptr;
- return DstEnd;
- }
+Value *LibCallSimplifier::optimizeStpCpyChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
return nullptr;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, DL, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
}
-};
-struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- this->CI = CI;
- StringRef Name = Callee->getName();
- FunctionType *FT = Callee->getFunctionType();
- LLVMContext &Context = CI->getParent()->getContext();
+ // If a) we don't have any length information, or b) we know this will
+ // fit then just lower to a plain stpcpy. Otherwise we'll keep our
+ // stpcpy_chk call which may fail at runtime if the size is too long.
+ // TODO: It might be nice to get a maximum length out of the possible
+ // string lengths for varying.
+ if (isFortifiedCallFoldable(CI, 2, 1, true)) {
+ Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+
+ // This optimization require DataLayout.
+ if (!DL)
+ return nullptr;
- // Check if this has the right signature.
- if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- !FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != DL->getIntPtrType(Context))
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
+ Value *DstEnd =
+ B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1));
+ if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
return nullptr;
+ return DstEnd;
+ }
+ return nullptr;
+}
- if (isFoldable(3, 2, false)) {
- Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, DL, TLI,
- Name.substr(2, 7));
- return Ret;
- }
+Value *LibCallSimplifier::optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ StringRef Name = Callee->getName();
+ FunctionType *FT = Callee->getFunctionType();
+ LLVMContext &Context = CI->getContext();
+
+ // Check if this has the right signature.
+ if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return nullptr;
+
+ if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ Value *Ret =
+ EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7));
+ return Ret;
}
-};
+ return nullptr;
+}
//===----------------------------------------------------------------------===//
// String and Memory Library Call Optimizations
//===----------------------------------------------------------------------===//
-struct StrCatOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "strcat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType())
- return nullptr;
+Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "strcat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType())
+ return nullptr;
- // Extract some information from the instruction
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return nullptr;
- --Len; // Unbias length.
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
+ --Len; // Unbias length.
- // Handle the simple, do-nothing case: strcat(x, "") -> x
- if (Len == 0)
- return Dst;
+ // Handle the simple, do-nothing case: strcat(x, "") -> x
+ if (Len == 0)
+ return Dst;
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- return emitStrLenMemCpy(Src, Dst, Len, B);
- }
+ return emitStrLenMemCpy(Src, Dst, Len, B);
+}
- Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
- IRBuilder<> &B) {
- // We need to find the end of the destination string. That's where the
- // memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
- if (!DstLen)
- return nullptr;
+Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+ IRBuilder<> &B) {
+ // We need to find the end of the destination string. That's where the
+ // memory is to be moved to. We just generate a call to strlen.
+ Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
+ if (!DstLen)
+ return nullptr;
- // Now that we have the destination's length, we must index into the
- // destination's pointer to get the actual memcpy destination (end of
- // the string .. we're concatenating).
- Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+ // Now that we have the destination's length, we must index into the
+ // destination's pointer to get the actual memcpy destination (end of
+ // the string .. we're concatenating).
+ Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+ // We have enough information to now generate the memcpy call to do the
+ // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(
+ CpyDst, Src,
+ ConstantInt::get(DL->getIntPtrType(Src->getContext()), Len + 1), 1);
+ return Dst;
+}
- // We have enough information to now generate the memcpy call to do the
- // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(DL->getIntPtrType(*Context), Len + 1), 1);
- return Dst;
- }
-};
-
-struct StrNCatOpt : public StrCatOpt {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "strncat" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- FT->getParamType(1) != FT->getReturnType() ||
- !FT->getParamType(2)->isIntegerTy())
- return nullptr;
+Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "strncat" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ FT->getParamType(1) != FT->getReturnType() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return nullptr;
- // Extract some information from the instruction
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
- uint64_t Len;
+ // Extract some information from the instruction
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ uint64_t Len;
- // We don't do anything if length is not constant
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Len = LengthArg->getZExtValue();
- else
- return nullptr;
+ // We don't do anything if length is not constant
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Len = LengthArg->getZExtValue();
+ else
+ return nullptr;
- // See if we can get the length of the input string.
- uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return nullptr;
- --SrcLen; // Unbias length.
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0)
+ return nullptr;
+ --SrcLen; // Unbias length.
- // Handle the simple, do-nothing cases:
- // strncat(x, "", c) -> x
- // strncat(x, c, 0) -> x
- if (SrcLen == 0 || Len == 0) return Dst;
+ // Handle the simple, do-nothing cases:
+ // strncat(x, "", c) -> x
+ // strncat(x, c, 0) -> x
+ if (SrcLen == 0 || Len == 0)
+ return Dst;
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
-
- // We don't optimize this case
- if (Len < SrcLen) return nullptr;
-
- // strncat(x, s, c) -> strcat(x, s)
- // s is constant so the strcat can be optimized further
- return emitStrLenMemCpy(Src, Dst, SrcLen, B);
- }
-};
-
-struct StrChrOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "strchr" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return nullptr;
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- Value *SrcStr = CI->getArgOperand(0);
+ // We don't optimize this case
+ if (Len < SrcLen)
+ return nullptr;
- // If the second operand is non-constant, see if we can compute the length
- // of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (!CharC) {
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ // strncat(x, s, c) -> strcat(x, s)
+ // s is constant so the strcat can be optimized further
+ return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+}
- uint64_t Len = GetStringLength(SrcStr);
- if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
- return nullptr;
+Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "strchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return nullptr;
- return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(DL->getIntPtrType(*Context), Len),
- B, DL, TLI);
- }
+ Value *SrcStr = CI->getArgOperand(0);
- // Otherwise, the character is a constant, see if the first argument is
- // a string literal. If so, we can constant fold.
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str)) {
- if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
- return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+ // If the second operand is non-constant, see if we can compute the length
+ // of the input string and turn this into memchr.
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ if (!CharC) {
+ // These optimizations require DataLayout.
+ if (!DL)
return nullptr;
- }
- // Compute the offset, make sure to handle the case when we're searching for
- // zero (a weird way to spell strlen).
- size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
- Str.size() : Str.find(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. strchr returns null.
- return Constant::getNullValue(CI->getType());
+ uint64_t Len = GetStringLength(SrcStr);
+ if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
+ return nullptr;
- // strchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ return EmitMemChr(
+ SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), B, DL, TLI);
}
-};
-struct StrRChrOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "strrchr" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != B.getInt8PtrTy() ||
- FT->getParamType(0) != FT->getReturnType() ||
- !FT->getParamType(1)->isIntegerTy(32))
- return nullptr;
+ // Otherwise, the character is a constant, see if the first argument is
+ // a string literal. If so, we can constant fold.
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+ return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+ return nullptr;
+ }
- Value *SrcStr = CI->getArgOperand(0);
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ // Compute the offset, make sure to handle the case when we're searching for
+ // zero (a weird way to spell strlen).
+ size_t I = (0xFF & CharC->getSExtValue()) == 0
+ ? Str.size()
+ : Str.find(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
- // Cannot fold anything if we're not looking for a constant.
- if (!CharC)
- return nullptr;
+ // strchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+}
- StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str)) {
- // strrchr(s, 0) -> strchr(s, 0)
- if (DL && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, DL, TLI);
- return nullptr;
- }
+Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "strrchr" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return nullptr;
- // Compute the offset.
- size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
- Str.size() : Str.rfind(CharC->getSExtValue());
- if (I == StringRef::npos) // Didn't find the char. Return null.
- return Constant::getNullValue(CI->getType());
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- // strrchr(s+n,c) -> gep(s+n+i,c)
- return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return nullptr;
+
+ StringRef Str;
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (DL && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, DL, TLI);
+ return nullptr;
}
-};
-struct StrCmpOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "strcmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return nullptr;
+ // Compute the offset.
+ size_t I = (0xFF & CharC->getSExtValue()) == 0
+ ? Str.size()
+ : Str.rfind(CharC->getSExtValue());
+ if (I == StringRef::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strcmp(x,x) -> 0
- return ConstantInt::get(CI->getType(), 0);
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+}
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "strcmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return nullptr;
- // strcmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2)
- return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strcmp(x,x) -> 0
+ return ConstantInt::get(CI->getType(), 0);
- if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
- return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
- CI->getType()));
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
- if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ // strcmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2)
+ return ConstantInt::get(CI->getType(), Str1.compare(Str2));
- // strcmp(P, "x") -> memcmp(P, "x", 2)
- uint64_t Len1 = GetStringLength(Str1P);
- uint64_t Len2 = GetStringLength(Str2P);
- if (Len1 && Len2) {
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+ return B.CreateNeg(
+ B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
- return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(DL->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, DL, TLI);
- }
+ if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
- return nullptr;
- }
-};
-
-struct StrNCmpOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "strncmp" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getParamType(2)->isIntegerTy())
+ // strcmp(P, "x") -> memcmp(P, "x", 2)
+ uint64_t Len1 = GetStringLength(Str1P);
+ uint64_t Len2 = GetStringLength(Str2P);
+ if (Len1 && Len2) {
+ // These optimizations require DataLayout.
+ if (!DL)
return nullptr;
- Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
- if (Str1P == Str2P) // strncmp(x,x,n) -> 0
- return ConstantInt::get(CI->getType(), 0);
+ return EmitMemCmp(Str1P, Str2P,
+ ConstantInt::get(DL->getIntPtrType(CI->getContext()),
+ std::min(Len1, Len2)),
+ B, DL, TLI);
+ }
- // Get the length argument if it is constant.
- uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
- Length = LengthArg->getZExtValue();
- else
- return nullptr;
+ return nullptr;
+}
- if (Length == 0) // strncmp(x,y,0) -> 0
- return ConstantInt::get(CI->getType(), 0);
+Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "strncmp" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getReturnType()->isIntegerTy(32) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return nullptr;
- if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+ if (Str1P == Str2P) // strncmp(x,x,n) -> 0
+ return ConstantInt::get(CI->getType(), 0);
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+ // Get the length argument if it is constant.
+ uint64_t Length;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ Length = LengthArg->getZExtValue();
+ else
+ return nullptr;
- // strncmp(x, y) -> cnst (if both x and y are constant strings)
- if (HasStr1 && HasStr2) {
- StringRef SubStr1 = Str1.substr(0, Length);
- StringRef SubStr2 = Str2.substr(0, Length);
- return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
- }
+ if (Length == 0) // strncmp(x,y,0) -> 0
+ return ConstantInt::get(CI->getType(), 0);
- if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
- return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
- CI->getType()));
+ if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
- if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
- return nullptr;
+ // strncmp(x, y) -> cnst (if both x and y are constant strings)
+ if (HasStr1 && HasStr2) {
+ StringRef SubStr1 = Str1.substr(0, Length);
+ StringRef SubStr2 = Str2.substr(0, Length);
+ return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
}
-};
-struct StrCpyOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "strcpy" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return nullptr;
+ if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
+ return B.CreateNeg(
+ B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // strcpy(x,x) -> x
- return Src;
+ if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
+ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return nullptr;
-
- // We have enough information to now generate the memcpy call to do the
- // copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, Src,
- ConstantInt::get(DL->getIntPtrType(*Context), Len), 1);
- return Dst;
- }
-};
-
-struct StpCpyOpt: public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Verify the "stpcpy" function prototype.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return nullptr;
+ return nullptr;
+}
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "strcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return nullptr;
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
- }
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return nullptr;
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
- Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(DL->getIntPtrType(PT),
- Len - 1));
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
- // We have enough information to now generate the memcpy call to do the
- // copy for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(Dst, Src, LenV, 1);
- return DstEnd;
- }
-};
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), 1);
+ return Dst;
+}
-struct StrNCpyOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getParamType(2)->isIntegerTy())
- return nullptr;
+Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Verify the "stpcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return nullptr;
- Value *Dst = CI->getArgOperand(0);
- Value *Src = CI->getArgOperand(1);
- Value *LenOp = CI->getArgOperand(2);
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- // See if we can get the length of the input string.
- uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return nullptr;
- --SrcLen;
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, DL, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+ }
- if (SrcLen == 0) {
- // strncpy(x, "", y) -> memset(x, '\0', y, 1)
- B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
- return Dst;
- }
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0)
+ return nullptr;
- uint64_t Len;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
- Len = LengthArg->getZExtValue();
- else
- return nullptr;
+ Type *PT = FT->getParamType(0);
+ Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
+ Value *DstEnd =
+ B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1));
- if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src, LenV, 1);
+ return DstEnd;
+}
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getParamType(2)->isIntegerTy())
+ return nullptr;
- // Let strncpy handle the zero padding
- if (Len > SrcLen+1) return nullptr;
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
- Type *PT = FT->getParamType(0);
- // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- B.CreateMemCpy(Dst, Src,
- ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
+ // See if we can get the length of the input string.
+ uint64_t SrcLen = GetStringLength(Src);
+ if (SrcLen == 0)
+ return nullptr;
+ --SrcLen;
+ if (SrcLen == 0) {
+ // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
return Dst;
}
-};
-
-struct StrLenOpt : public LibCallOptimization {
- bool ignoreCallingConv() override { return true; }
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
- Value *Src = CI->getArgOperand(0);
-
- // Constant folding: strlen("xyz") -> 3
- if (uint64_t Len = GetStringLength(Src))
- return ConstantInt::get(CI->getType(), Len-1);
-
- // strlen(x?"foo":"bars") --> x ? 3 : 4
- if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
- uint64_t LenTrue = GetStringLength(SI->getTrueValue());
- uint64_t LenFalse = GetStringLength(SI->getFalseValue());
- if (LenTrue && LenFalse) {
- emitOptimizationRemark(*Context, "simplify-libcalls", *Caller,
- SI->getDebugLoc(),
- "folded strlen(select) to select of constants");
- return B.CreateSelect(SI->getCondition(),
- ConstantInt::get(CI->getType(), LenTrue-1),
- ConstantInt::get(CI->getType(), LenFalse-1));
- }
- }
+ uint64_t Len;
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+ Len = LengthArg->getZExtValue();
+ else
+ return nullptr;
- // strlen(x) != 0 --> *x != 0
- // strlen(x) == 0 --> *x == 0
- if (isOnlyUsedInZeroEqualityComparison(CI))
- return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ if (Len == 0)
+ return Dst; // strncpy(x, y, 0) -> x
+ // These optimizations require DataLayout.
+ if (!DL)
return nullptr;
- }
-};
-struct StrPBrkOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- FT->getReturnType() != FT->getParamType(0))
- return nullptr;
+ // Let strncpy handle the zero padding
+ if (Len > SrcLen + 1)
+ return nullptr;
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+ Type *PT = FT->getParamType(0);
+ // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+ B.CreateMemCpy(Dst, Src, ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
- // strpbrk(s, "") -> NULL
- // strpbrk("", s) -> NULL
- if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
- return Constant::getNullValue(CI->getType());
+ return Dst;
+}
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t I = S1.find_first_of(S2);
- if (I == StringRef::npos) // No match.
- return Constant::getNullValue(CI->getType());
+Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return nullptr;
- return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ Value *Src = CI->getArgOperand(0);
+
+ // Constant folding: strlen("xyz") -> 3
+ if (uint64_t Len = GetStringLength(Src))
+ return ConstantInt::get(CI->getType(), Len - 1);
+
+ // strlen(x?"foo":"bars") --> x ? 3 : 4
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
+ uint64_t LenTrue = GetStringLength(SI->getTrueValue());
+ uint64_t LenFalse = GetStringLength(SI->getFalseValue());
+ if (LenTrue && LenFalse) {
+ Function *Caller = CI->getParent()->getParent();
+ emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller,
+ SI->getDebugLoc(),
+ "folded strlen(select) to select of constants");
+ return B.CreateSelect(SI->getCondition(),
+ ConstantInt::get(CI->getType(), LenTrue - 1),
+ ConstantInt::get(CI->getType(), LenFalse - 1));
}
-
- // strpbrk(s, "a") -> strchr(s, 'a')
- if (DL && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
-
- return nullptr;
}
-};
-struct StrToOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy())
- return nullptr;
+ // strlen(x) != 0 --> *x != 0
+ // strlen(x) == 0 --> *x == 0
+ if (isOnlyUsedInZeroEqualityComparison(CI))
+ return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
- Value *EndPtr = CI->getArgOperand(1);
- if (isa<ConstantPointerNull>(EndPtr)) {
- // With a null EndPtr, this function won't capture the main argument.
- // It would be readonly too, except that it still may write to errno.
- CI->addAttribute(1, Attribute::NoCapture);
- }
+ return nullptr;
+}
+Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
return nullptr;
- }
-};
-struct StrSpnOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+ // strpbrk(s, "") -> nullptr
+ // strpbrk("", s) -> nullptr
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
- // strspn(s, "") -> 0
- // strspn("", s) -> 0
- if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t Pos = S1.find_first_not_of(S2);
- if (Pos == StringRef::npos) Pos = S1.size();
- return ConstantInt::get(CI->getType(), Pos);
- }
-
- return nullptr;
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
}
-};
-struct StrCSpnOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- FT->getParamType(0) != B.getInt8PtrTy() ||
- FT->getParamType(1) != FT->getParamType(0) ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (DL && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
- StringRef S1, S2;
- bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
- bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+ return nullptr;
+}
- // strcspn("", s) -> 0
- if (HasS1 && S1.empty())
- return Constant::getNullValue(CI->getType());
+Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
+ return nullptr;
- // Constant folding.
- if (HasS1 && HasS2) {
- size_t Pos = S1.find_first_of(S2);
- if (Pos == StringRef::npos) Pos = S1.size();
- return ConstantInt::get(CI->getType(), Pos);
- }
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr)) {
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
+ CI->addAttribute(1, Attribute::NoCapture);
+ }
- // strcspn(s, "") -> strlen(s)
- if (DL && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
+ return nullptr;
+}
+Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
return nullptr;
+
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_not_of(S2);
+ if (Pos == StringRef::npos)
+ Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
}
-};
-struct StrStrOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isPointerTy())
- return nullptr;
+ return nullptr;
+}
- // fold strstr(x, x) -> x.
- if (CI->getArgOperand(0) == CI->getArgOperand(1))
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return nullptr;
- // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
- if (!StrLen)
- return nullptr;
- Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, DL, TLI);
- if (!StrNCmp)
- return nullptr;
- for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
- ICmpInst *Old = cast<ICmpInst>(*UI++);
- Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
- ConstantInt::getNullValue(StrNCmp->getType()),
- "cmp");
- LCS->replaceAllUsesWith(Old, Cmp);
- }
- return CI;
- }
+ StringRef S1, S2;
+ bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
- // See if either input string is a constant string.
- StringRef SearchStr, ToFindStr;
- bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
- bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
- // fold strstr(x, "") -> x.
- if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t Pos = S1.find_first_of(S2);
+ if (Pos == StringRef::npos)
+ Pos = S1.size();
+ return ConstantInt::get(CI->getType(), Pos);
+ }
- // If both strings are known, constant fold it.
- if (HasStr1 && HasStr2) {
- size_t Offset = SearchStr.find(ToFindStr);
+ // strcspn(s, "") -> strlen(s)
+ if (DL && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
- if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
- return Constant::getNullValue(CI->getType());
+ return nullptr;
+}
- // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = CastToCStr(CI->getArgOperand(0), B);
- Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
- return B.CreateBitCast(Result, CI->getType());
- }
+Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return nullptr;
+
+ // fold strstr(x, x) -> x.
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
- // fold strstr(x, "y") -> strchr(x, 'y').
- if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
- return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
+ if (!StrLen)
+ return nullptr;
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, DL, TLI);
+ if (!StrNCmp)
+ return nullptr;
+ for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
+ Value *Cmp =
+ B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()), "cmp");
+ replaceAllUsesWith(Old, Cmp);
}
- return nullptr;
+ return CI;
}
-};
-struct MemCmpOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy(32))
- return nullptr;
+ // See if either input string is a constant string.
+ StringRef SearchStr, ToFindStr;
+ bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+ // fold strstr(x, "") -> x.
+ if (HasStr2 && ToFindStr.empty())
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
- Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+ // If both strings are known, constant fold it.
+ if (HasStr1 && HasStr2) {
+ size_t Offset = SearchStr.find(ToFindStr);
- if (LHS == RHS) // memcmp(s,s,x) -> 0
+ if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
return Constant::getNullValue(CI->getType());
- // Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!LenC) return nullptr;
- uint64_t Len = LenC->getZExtValue();
+ // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
+ Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ return B.CreateBitCast(Result, CI->getType());
+ }
- if (Len == 0) // memcmp(s1,s2,0) -> 0
- return Constant::getNullValue(CI->getType());
+ // fold strstr(x, "y") -> strchr(x, 'y').
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
+ }
+ return nullptr;
+}
- // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
- if (Len == 1) {
- Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
- CI->getType(), "lhsv");
- Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
- CI->getType(), "rhsv");
- return B.CreateSub(LHSV, RHSV, "chardiff");
- }
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy(32))
+ return nullptr;
- // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
- StringRef LHSStr, RHSStr;
- if (getConstantStringInfo(LHS, LHSStr) &&
- getConstantStringInfo(RHS, RHSStr)) {
- // Make sure we're not reading out-of-bounds memory.
- if (Len > LHSStr.size() || Len > RHSStr.size())
- return nullptr;
- // Fold the memcmp and normalize the result. This way we get consistent
- // results across multiple platforms.
- uint64_t Ret = 0;
- int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
- if (Cmp < 0)
- Ret = -1;
- else if (Cmp > 0)
- Ret = 1;
- return ConstantInt::get(CI->getType(), Ret);
- }
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+ // Make sure we have a constant length.
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!LenC)
return nullptr;
+ uint64_t Len = LenC->getZExtValue();
+
+ if (Len == 0) // memcmp(s1,s2,0) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
+ }
+
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ StringRef LHSStr, RHSStr;
+ if (getConstantStringInfo(LHS, LHSStr) &&
+ getConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.size() || Len > RHSStr.size())
+ return nullptr;
+ // Fold the memcmp and normalize the result. This way we get consistent
+ // results across multiple platforms.
+ uint64_t Ret = 0;
+ int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ if (Cmp < 0)
+ Ret = -1;
+ else if (Cmp > 0)
+ Ret = 1;
+ return ConstantInt::get(CI->getType(), Ret);
}
-};
-struct MemCpyOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ return nullptr;
+}
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(*Context))
- return nullptr;
+Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
-};
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != DL->getIntPtrType(CI->getContext()))
+ return nullptr;
-struct MemMoveOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+}
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(*Context))
- return nullptr;
+Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
-};
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ FT->getParamType(2) != DL->getIntPtrType(CI->getContext()))
+ return nullptr;
-struct MemSetOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+}
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
- return nullptr;
+Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- // memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
- B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
- return CI->getArgOperand(0);
- }
-};
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
+ return nullptr;
+
+ // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+ return CI->getArgOperand(0);
+}
//===----------------------------------------------------------------------===//
// Math Library Optimizations
@@ -1111,935 +1034,959 @@ struct MemSetOpt : public LibCallOptimization {
//===----------------------------------------------------------------------===//
// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-struct UnaryDoubleFPOpt : public LibCallOptimization {
- bool CheckRetType;
- UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
- !FT->getParamType(0)->isDoubleTy())
- return nullptr;
+Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
+ bool CheckRetType) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return nullptr;
- if (CheckRetType) {
- // Check if all the uses for function like 'sin' are converted to float.
- for (User *U : CI->users()) {
- FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
- if (!Cast || !Cast->getType()->isFloatTy())
- return nullptr;
- }
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (User *U : CI->users()) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
+ if (!Cast || !Cast->getType()->isFloatTy())
+ return nullptr;
}
+ }
- // If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy())
- return nullptr;
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy())
+ return nullptr;
- // floor((double)floatval) -> (double)floorf(floatval)
- Value *V = Cast->getOperand(0);
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ if (Callee->isIntrinsic()) {
+ Module *M = CI->getParent()->getParent()->getParent();
+ Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID();
+ Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
+ V = B.CreateCall(F, V);
+ } else {
+ // The call is a library call rather than an intrinsic.
V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
}
-};
+
+ return B.CreateFPExt(V, B.getDoubleTy());
+}
// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax'
-struct BinaryDoubleFPOpt : public LibCallOptimization {
- bool CheckRetType;
- BinaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return nullptr;
+Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return nullptr;
- if (CheckRetType) {
- // Check if all the uses for function like 'fmin/fmax' are converted to
- // float.
- for (User *U : CI->users()) {
- FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
- if (!Cast || !Cast->getType()->isFloatTy())
- return nullptr;
- }
- }
+ // If this is something like 'fmin((double)floatval1, (double)floatval2)',
+ // we convert it to fminf.
+ FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
+ if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() || !Cast2 ||
+ !Cast2->getOperand(0)->getType()->isFloatTy())
+ return nullptr;
- // If this is something like 'fmin((double)floatval1, (double)floatval2)',
- // we convert it to fminf.
- FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
- if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
- !Cast2 || !Cast2->getOperand(0)->getType()->isFloatTy())
- return nullptr;
+ // fmin((double)floatval1, (double)floatval2)
+ // -> (double)fmin(floatval1, floatval2)
+ Value *V = nullptr;
+ Value *V1 = Cast1->getOperand(0);
+ Value *V2 = Cast2->getOperand(0);
+ // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP().
+ V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
+ Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+}
- // fmin((double)floatval1, (double)floatval2)
- // -> (double)fmin(floatval1, floatval2)
- Value *V = nullptr;
- Value *V1 = Cast1->getOperand(0);
- Value *V2 = Cast2->getOperand(0);
- V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
- Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
- }
-};
-
-struct UnsafeFPLibCallOptimization : public LibCallOptimization {
- bool UnsafeFPShrink;
- UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
- this->UnsafeFPShrink = UnsafeFPShrink;
- }
-};
-
-struct CosOpt : public UnsafeFPLibCallOptimization {
- CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "cos" &&
- TLI->has(LibFunc::cosf)) {
- UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
- Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
- }
+Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Value *Ret = nullptr;
+ if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) {
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+ }
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
- // cos(-x) -> cos(x)
- Value *Op1 = CI->getArgOperand(0);
- if (BinaryOperator::isFNeg(Op1)) {
- BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
- return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
- }
+ // cos(-x) -> cos(x)
+ Value *Op1 = CI->getArgOperand(0);
+ if (BinaryOperator::isFNeg(Op1)) {
+ BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+ return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+ }
+ return Ret;
+}
+
+Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+
+ Value *Ret = nullptr;
+ if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) {
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ // pow(1.0, x) -> 1.0
+ if (Op1C->isExactlyValue(1.0))
+ return Op1C;
+ // pow(2.0, x) -> exp2(x)
+ if (Op1C->isExactlyValue(2.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
+ LibFunc::exp2l))
+ return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ // pow(10.0, x) -> exp10(x)
+ if (Op1C->isExactlyValue(10.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
+ LibFunc::exp10l))
+ return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
+ Callee->getAttributes());
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (!Op2C)
return Ret;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
+ LibFunc::sqrtl) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
+ LibFunc::fabsl)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow, and still handles negative zero
+ // and negative infinity correctly.
+ // TODO: In fast-math mode, this could be just sqrt(x).
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *Inf = ConstantFP::getInfinity(CI->getType());
+ Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
+ Value *FAbs =
+ EmitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes());
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
+ return Sel;
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateFMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ Function *Caller = CI->getParent()->getParent();
+
+ Value *Ret = nullptr;
+ if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+ TLI->has(LibFunc::exp2f)) {
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
}
-};
-
-struct PowOpt : public UnsafeFPLibCallOptimization {
- PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "pow" &&
- TLI->has(LibFunc::powf)) {
- UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
- Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
- }
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
- Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- // pow(1.0, x) -> 1.0
- if (Op1C->isExactlyValue(1.0))
- return Op1C;
- // pow(2.0, x) -> exp2(x)
- if (Op1C->isExactlyValue(2.0) &&
- hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
- LibFunc::exp2l))
- return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
- // pow(10.0, x) -> exp10(x)
- if (Op1C->isExactlyValue(10.0) &&
- hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
- LibFunc::exp10l))
- return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
- Callee->getAttributes());
+ Value *Op = CI->getArgOperand(0);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ LibFunc::Func LdExp = LibFunc::ldexpl;
+ if (Op->getType()->isFloatTy())
+ LdExp = LibFunc::ldexpf;
+ else if (Op->getType()->isDoubleTy())
+ LdExp = LibFunc::ldexp;
+
+ if (TLI->has(LdExp)) {
+ Value *LdExpArg = nullptr;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
}
- ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (!Op2C) return Ret;
-
- if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
- return ConstantFP::get(CI->getType(), 1.0);
-
- if (Op2C->isExactlyValue(0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
- LibFunc::sqrtl) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
- LibFunc::fabsl)) {
- // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
- // This is faster than calling pow, and still handles negative zero
- // and negative infinity correctly.
- // TODO: In fast-math mode, this could be just sqrt(x).
- // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
- Value *Inf = ConstantFP::getInfinity(CI->getType());
- Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
- Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
- Callee->getAttributes());
- Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
- Callee->getAttributes());
- Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
- Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
- return Sel;
- }
+ if (LdExpArg) {
+ Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee =
+ M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
+ Op->getType(), B.getInt32Ty(), nullptr);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
- if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
- return Op1;
- if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
- return B.CreateFMul(Op1, Op1, "pow2");
- if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
- return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
- Op1, "powrecip");
- return nullptr;
- }
-};
-
-struct Exp2Opt : public UnsafeFPLibCallOptimization {
- Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- Value *Ret = nullptr;
- if (UnsafeFPShrink && Callee->getName() == "exp2" &&
- TLI->has(LibFunc::exp2f)) {
- UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
- Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ return CI;
}
+ }
+ return Ret;
+}
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
+Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
- Value *Op = CI->getArgOperand(0);
- // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
- // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- LibFunc::Func LdExp = LibFunc::ldexpl;
- if (Op->getType()->isFloatTy())
- LdExp = LibFunc::ldexpf;
- else if (Op->getType()->isDoubleTy())
- LdExp = LibFunc::ldexp;
-
- if (TLI->has(LdExp)) {
- Value *LdExpArg = nullptr;
- if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
- } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
- }
+ Value *Ret = nullptr;
+ if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) {
+ Ret = optimizeUnaryDoubleFP(CI, B, false);
+ }
- if (LdExpArg) {
- Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
- if (!Op->getType()->isFloatTy())
- One = ConstantExpr::getFPExtend(One, Op->getType());
+ FunctionType *FT = Callee->getFunctionType();
+ // Make sure this has 1 argument of FP type which matches the result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
- Module *M = Caller->getParent();
- Value *Callee =
- M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
- Op->getType(), B.getInt32Ty(), NULL);
- CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
+ Value *Op = CI->getArgOperand(0);
+ if (Instruction *I = dyn_cast<Instruction>(Op)) {
+ // Fold fabs(x * x) -> x * x; any squared FP value must already be positive.
+ if (I->getOpcode() == Instruction::FMul)
+ if (I->getOperand(0) == I->getOperand(1))
+ return Op;
+ }
+ return Ret;
+}
- return CI;
+Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+
+ Value *Ret = nullptr;
+ if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
+ Callee->getIntrinsicID() == Intrinsic::sqrt))
+ Ret = optimizeUnaryDoubleFP(CI, B, true);
+
+ // FIXME: For finer-grain optimization, we need intrinsics to have the same
+ // fast-math flag decorations that are applied to FP instructions. For now,
+ // we have to rely on the function-level unsafe-fp-math attribute to do this
+ // optimization because there's no other way to express that the sqrt can be
+ // reassociated.
+ Function *F = CI->getParent()->getParent();
+ if (F->hasFnAttribute("unsafe-fp-math")) {
+ // Check for unsafe-fp-math = true.
+ Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+ if (Attr.getValueAsString() != "true")
+ return Ret;
+ }
+ Value *Op = CI->getArgOperand(0);
+ if (Instruction *I = dyn_cast<Instruction>(Op)) {
+ if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
+ // We're looking for a repeated factor in a multiplication tree,
+ // so we can do this fold: sqrt(x * x) -> fabs(x);
+ // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y).
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+ Value *RepeatOp = nullptr;
+ Value *OtherOp = nullptr;
+ if (Op0 == Op1) {
+ // Simple match: the operands of the multiply are identical.
+ RepeatOp = Op0;
+ } else {
+ // Look for a more complicated pattern: one of the operands is itself
+ // a multiply, so search for a common factor in that multiply.
+ // Note: We don't bother looking any deeper than this first level or for
+ // variations of this pattern because instcombine's visitFMUL and/or the
+ // reassociation pass should give us this form.
+ Value *OtherMul0, *OtherMul1;
+ if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
+ // Pattern: sqrt((x * y) * z)
+ if (OtherMul0 == OtherMul1) {
+ // Matched: sqrt((x * x) * z)
+ RepeatOp = OtherMul0;
+ OtherOp = Op1;
+ }
+ }
+ }
+ if (RepeatOp) {
+ // Fast math flags for any created instructions should match the sqrt
+ // and multiply.
+ // FIXME: We're not checking the sqrt because it doesn't have
+ // fast-math-flags (see earlier comment).
+ IRBuilder<true, ConstantFolder,
+ IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
+ B.SetFastMathFlags(I->getFastMathFlags());
+ // If we found a repeated factor, hoist it out of the square root and
+ // replace it with the fabs of that factor.
+ Module *M = Callee->getParent();
+ Type *ArgType = Op->getType();
+ Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
+ if (OtherOp) {
+ // If we found a non-repeated factor, we still need to get its square
+ // root. We then multiply that by the value that was simplified out
+ // of the square root calculation.
+ Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
+ return B.CreateFMul(FabsCall, SqrtCall);
+ }
+ return FabsCall;
}
}
- return Ret;
}
-};
+ return Ret;
+}
-struct SinCosPiOpt : public LibCallOptimization {
- SinCosPiOpt() {}
+static bool isTrigLibCall(CallInst *CI);
+static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos,
+ Value *&SinCos);
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Make sure the prototype is as expected, otherwise the rest of the
- // function is probably invalid and likely to abort.
- if (!isTrigLibCall(CI))
- return nullptr;
+Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
- Value *Arg = CI->getArgOperand(0);
- SmallVector<CallInst *, 1> SinCalls;
- SmallVector<CallInst *, 1> CosCalls;
- SmallVector<CallInst *, 1> SinCosCalls;
+ // Make sure the prototype is as expected, otherwise the rest of the
+ // function is probably invalid and likely to abort.
+ if (!isTrigLibCall(CI))
+ return nullptr;
- bool IsFloat = Arg->getType()->isFloatTy();
+ Value *Arg = CI->getArgOperand(0);
+ SmallVector<CallInst *, 1> SinCalls;
+ SmallVector<CallInst *, 1> CosCalls;
+ SmallVector<CallInst *, 1> SinCosCalls;
- // Look for all compatible sinpi, cospi and sincospi calls with the same
- // argument. If there are enough (in some sense) we can make the
- // substitution.
- for (User *U : Arg->users())
- classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls,
- SinCosCalls);
+ bool IsFloat = Arg->getType()->isFloatTy();
- // It's only worthwhile if both sinpi and cospi are actually used.
- if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
- return nullptr;
+ // Look for all compatible sinpi, cospi and sincospi calls with the same
+ // argument. If there are enough (in some sense) we can make the
+ // substitution.
+ for (User *U : Arg->users())
+ classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls,
+ SinCosCalls);
- Value *Sin, *Cos, *SinCos;
- insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
- SinCos);
-
- replaceTrigInsts(SinCalls, Sin);
- replaceTrigInsts(CosCalls, Cos);
- replaceTrigInsts(SinCosCalls, SinCos);
-
- return nullptr;
- }
-
- bool isTrigLibCall(CallInst *CI) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
-
- // We can only hope to do anything useful if we can ignore things like errno
- // and floating-point exceptions.
- bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
- CI->hasFnAttr(Attribute::ReadNone);
-
- // Other than that we need float(float) or double(double)
- return AttributesSafe && FT->getNumParams() == 1 &&
- FT->getReturnType() == FT->getParamType(0) &&
- (FT->getParamType(0)->isFloatTy() ||
- FT->getParamType(0)->isDoubleTy());
- }
-
- void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
- SmallVectorImpl<CallInst *> &SinCalls,
- SmallVectorImpl<CallInst *> &CosCalls,
- SmallVectorImpl<CallInst *> &SinCosCalls) {
- CallInst *CI = dyn_cast<CallInst>(Val);
-
- if (!CI)
- return;
-
- Function *Callee = CI->getCalledFunction();
- StringRef FuncName = Callee->getName();
- LibFunc::Func Func;
- if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
- !isTrigLibCall(CI))
- return;
-
- if (IsFloat) {
- if (Func == LibFunc::sinpif)
- SinCalls.push_back(CI);
- else if (Func == LibFunc::cospif)
- CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospif_stret)
- SinCosCalls.push_back(CI);
- } else {
- if (Func == LibFunc::sinpi)
- SinCalls.push_back(CI);
- else if (Func == LibFunc::cospi)
- CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospi_stret)
- SinCosCalls.push_back(CI);
- }
- }
+ // It's only worthwhile if both sinpi and cospi are actually used.
+ if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ return nullptr;
- void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
- for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
- E = Calls.end();
- I != E; ++I) {
- LCS->replaceAllUsesWith(*I, Res);
- }
- }
+ Value *Sin, *Cos, *SinCos;
+ insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos);
- void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
- bool UseFloat, Value *&Sin, Value *&Cos,
- Value *&SinCos) {
- Type *ArgTy = Arg->getType();
- Type *ResTy;
- StringRef Name;
-
- Triple T(OrigCallee->getParent()->getTargetTriple());
- if (UseFloat) {
- Name = "__sincospif_stret";
-
- assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
- // x86_64 can't use {float, float} since that would be returned in both
- // xmm0 and xmm1, which isn't what a real struct would do.
- ResTy = T.getArch() == Triple::x86_64
- ? static_cast<Type *>(VectorType::get(ArgTy, 2))
- : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
- } else {
- Name = "__sincospi_stret";
- ResTy = StructType::get(ArgTy, ArgTy, NULL);
- }
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
- Module *M = OrigCallee->getParent();
- Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
- ResTy, ArgTy, NULL);
-
- if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
- // If the argument is an instruction, it must dominate all uses so put our
- // sincos call there.
- BasicBlock::iterator Loc = ArgInst;
- B.SetInsertPoint(ArgInst->getParent(), ++Loc);
- } else {
- // Otherwise (e.g. for a constant) the beginning of the function is as
- // good a place as any.
- BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
- B.SetInsertPoint(&EntryBB, EntryBB.begin());
- }
+ return nullptr;
+}
- SinCos = B.CreateCall(Callee, Arg, "sincospi");
+static bool isTrigLibCall(CallInst *CI) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ bool AttributesSafe =
+ CI->hasFnAttr(Attribute::NoUnwind) && CI->hasFnAttr(Attribute::ReadNone);
+
+ // Other than that we need float(float) or double(double)
+ return AttributesSafe && FT->getNumParams() == 1 &&
+ FT->getReturnType() == FT->getParamType(0) &&
+ (FT->getParamType(0)->isFloatTy() ||
+ FT->getParamType(0)->isDoubleTy());
+}
- if (SinCos->getType()->isStructTy()) {
- Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
- Cos = B.CreateExtractValue(SinCos, 1, "cospi");
- } else {
- Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
- "sinpi");
- Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
- "cospi");
- }
+void
+LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
+ CallInst *CI = dyn_cast<CallInst>(Val);
+
+ if (!CI)
+ return;
+
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+ LibFunc::Func Func;
+ if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || !isTrigLibCall(CI))
+ return;
+
+ if (IsFloat) {
+ if (Func == LibFunc::sinpif)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospif)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospif_stret)
+ SinCosCalls.push_back(CI);
+ } else {
+ if (Func == LibFunc::sinpi)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospi)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stret)
+ SinCosCalls.push_back(CI);
}
+}
-};
+void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls,
+ Value *Res) {
+ for (SmallVectorImpl<CallInst *>::iterator I = Calls.begin(), E = Calls.end();
+ I != E; ++I) {
+ replaceAllUsesWith(*I, Res);
+ }
+}
+
+void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos, Value *&SinCos) {
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
+
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospif_stret";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy, nullptr));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy, nullptr);
+ }
+
+ Module *M = OrigCallee->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+ ResTy, ArgTy, nullptr);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ BasicBlock::iterator Loc = ArgInst;
+ B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+}
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
-struct FFSOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 1 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- !FT->getParamType(0)->isIntegerTy())
- return nullptr;
+Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getParamType(0)->isIntegerTy())
+ return nullptr;
- Value *Op = CI->getArgOperand(0);
+ Value *Op = CI->getArgOperand(0);
- // Constant fold.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- if (CI->isZero()) // ffs(0) -> 0.
- return B.getInt32(0);
- // ffs(c) -> cttz(c)+1
- return B.getInt32(CI->getValue().countTrailingZeros() + 1);
- }
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->isZero()) // ffs(0) -> 0.
+ return B.getInt32(0);
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+ }
- // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
- Type *ArgType = Op->getType();
- Value *F = Intrinsic::getDeclaration(Callee->getParent(),
- Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
- V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
- V = B.CreateIntCast(V, B.getInt32Ty(), false);
-
- Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
- return B.CreateSelect(Cond, V, B.getInt32(0));
- }
-};
-
-struct AbsOpt : public LibCallOptimization {
- bool ignoreCallingConv() override { return true; }
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(integer) where the types agree.
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- FT->getParamType(0) != FT->getReturnType())
- return nullptr;
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ Type *ArgType = Op->getType();
+ Value *F =
+ Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
- // abs(x) -> x >s -1 ? x : -x
- Value *Op = CI->getArgOperand(0);
- Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
- "ispos");
- Value *Neg = B.CreateNeg(Op, "neg");
- return B.CreateSelect(Pos, Op, Neg);
- }
-};
-
-struct IsDigitOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
- return nullptr;
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
+}
- // isdigit(c) -> (c-'0') <u 10
- Value *Op = CI->getArgOperand(0);
- Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
- Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
- return B.CreateZExt(Op, CI->getType());
- }
-};
-
-struct IsAsciiOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
- return nullptr;
+Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ FT->getParamType(0) != FT->getReturnType())
+ return nullptr;
- // isascii(c) -> c <u 128
- Value *Op = CI->getArgOperand(0);
- Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
- return B.CreateZExt(Op, CI->getType());
- }
-};
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getArgOperand(0);
+ Value *Pos =
+ B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+}
-struct ToAsciiOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- FunctionType *FT = Callee->getFunctionType();
- // We require i32(i32)
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isIntegerTy(32))
- return nullptr;
+Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return nullptr;
- // toascii(c) -> c & 0x7f
- return B.CreateAnd(CI->getArgOperand(0),
- ConstantInt::get(CI->getType(),0x7F));
- }
-};
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+}
+
+Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return nullptr;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+}
+
+Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return nullptr;
+
+ // toascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(), 0x7F));
+}
//===----------------------------------------------------------------------===//
// Formatting and IO Library Call Optimizations
//===----------------------------------------------------------------------===//
-struct ErrorReportingOpt : public LibCallOptimization {
- ErrorReportingOpt(int S = -1) : StreamArg(S) {}
+static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &) override {
- // Error reporting calls should be cold, mark them as such.
- // This applies even to non-builtin calls: it is only a hint and applies to
- // functions that the frontend might not understand as builtins.
+Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
+ int StreamArg) {
+ // Error reporting calls should be cold, mark them as such.
+ // This applies even to non-builtin calls: it is only a hint and applies to
+ // functions that the frontend might not understand as builtins.
- // This heuristic was suggested in:
- // Improving Static Branch Prediction in a Compiler
- // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
- // Proceedings of PACT'98, Oct. 1998, IEEE
-
- if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) {
- CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
- }
+ // This heuristic was suggested in:
+ // Improving Static Branch Prediction in a Compiler
+ // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+ // Proceedings of PACT'98, Oct. 1998, IEEE
+ Function *Callee = CI->getCalledFunction();
- return nullptr;
+ if (!CI->hasFnAttr(Attribute::Cold) &&
+ isReportingError(Callee, CI, StreamArg)) {
+ CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
}
-protected:
- bool isReportingError(Function *Callee, CallInst *CI) {
- if (!ColdErrorCalls)
- return false;
-
- if (!Callee || !Callee->isDeclaration())
- return false;
-
- if (StreamArg < 0)
- return true;
+ return nullptr;
+}
- // These functions might be considered cold, but only if their stream
- // argument is stderr.
-
- if (StreamArg >= (int) CI->getNumArgOperands())
- return false;
- LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
- if (!LI)
- return false;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
- if (!GV || !GV->isDeclaration())
- return false;
- return GV->getName() == "stderr";
- }
-
- int StreamArg;
-};
-
-struct PrintFOpt : public LibCallOptimization {
- Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
- IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
- return nullptr;
+static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
+ if (!ColdErrorCalls)
+ return false;
- // Empty format string -> noop.
- if (FormatStr.empty()) // Tolerate printf's declared void.
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 0);
+ if (!Callee || !Callee->isDeclaration())
+ return false;
- // Do not do any of the following transformations if the printf return value
- // is used, in general the printf return value is not compatible with either
- // putchar() or puts().
- if (!CI->use_empty())
- return nullptr;
+ if (StreamArg < 0)
+ return true;
- // printf("x") -> putchar('x'), even for '%'.
- if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
- if (CI->use_empty() || !Res) return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
+ // These functions might be considered cold, but only if their stream
+ // argument is stderr.
- // printf("foo\n") --> puts("foo")
- if (FormatStr[FormatStr.size()-1] == '\n' &&
- FormatStr.find('%') == StringRef::npos) { // No format characters.
- // Create a string literal with no \n on it. We expect the constant merge
- // pass to be run after this pass, to merge duplicate strings.
- FormatStr = FormatStr.drop_back();
- Value *GV = B.CreateGlobalString(FormatStr, "str");
- Value *NewCI = EmitPutS(GV, B, DL, TLI);
- return (CI->use_empty() || !NewCI) ?
- NewCI :
- ConstantInt::get(CI->getType(), FormatStr.size()+1);
- }
+ if (StreamArg >= (int)CI->getNumArgOperands())
+ return false;
+ LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+ if (!LI)
+ return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ if (!GV || !GV->isDeclaration())
+ return false;
+ return GV->getName() == "stderr";
+}
- // Optimize specific format strings.
- // printf("%c", chr) --> putchar(chr)
- if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
+Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return nullptr;
- if (CI->use_empty() || !Res) return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value *)CI : ConstantInt::get(CI->getType(), 0);
- // printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isPointerTy()) {
- return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
- }
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
return nullptr;
- }
-
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Require one fixed pointer argument and an integer/void result.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() ||
- FT->getReturnType()->isVoidTy()))
- return nullptr;
- if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
- return V;
- }
+ // printf("x") -> putchar('x'), even for '%'.
+ if (FormatStr.size() == 1) {
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
+ if (CI->use_empty() || !Res)
+ return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
- // printf(format, ...) -> iprintf(format, ...) if no floating point
- // arguments.
- if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *IPrintFFn =
- M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(IPrintFFn);
- B.Insert(New);
- return New;
- }
- return nullptr;
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size() - 1] == '\n' &&
+ FormatStr.find('%') == StringRef::npos) { // No format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr = FormatStr.drop_back();
+ Value *GV = B.CreateGlobalString(FormatStr, "str");
+ Value *NewCI = EmitPutS(GV, B, DL, TLI);
+ return (CI->use_empty() || !NewCI)
+ ? NewCI
+ : ConstantInt::get(CI->getType(), FormatStr.size() + 1);
}
-};
-struct SPrintFOpt : public LibCallOptimization {
- Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
- IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
- return nullptr;
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
- // If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumArgOperands() == 2) {
- // Make sure there's no % in the constant array. We could try to handle
- // %% -> % in the future if we cared.
- for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
- if (FormatStr[i] == '%')
- return nullptr; // we found a format specifier, bail out.
-
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
-
- // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- ConstantInt::get(DL->getIntPtrType(*Context), // Copy the
- FormatStr.size() + 1), 1); // nul byte.
- return ConstantInt::get(CI->getType(), FormatStr.size());
- }
+ if (CI->use_empty() || !Res)
+ return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
- // The remaining optimizations require the format string to be "%s" or "%c"
- // and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
- return nullptr;
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
+ return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
+ }
+ return nullptr;
+}
- // Decode the second character of the format string.
- if (FormatStr[1] == 'c') {
- // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
- Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
- Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
- B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
- B.CreateStore(B.getInt8(0), Ptr);
-
- return ConstantInt::get(CI->getType(), 1);
- }
+Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
- if (FormatStr[1] == 's') {
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ Function *Callee = CI->getCalledFunction();
+ // Require one fixed pointer argument and an integer/void result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy()))
+ return nullptr;
- // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr;
+ if (Value *V = optimizePrintFString(CI, B)) {
+ return V;
+ }
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
- if (!Len)
- return nullptr;
- Value *IncLen = B.CreateAdd(Len,
- ConstantInt::get(Len->getType(), 1),
- "leninc");
- B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return nullptr;
+}
- // The sprintf result is the unincremented number of bytes in the string.
- return B.CreateIntCast(Len, CI->getType(), false);
- }
+Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return nullptr;
- }
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Require two fixed pointer arguments and an integer result.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
+ // If we just have a format string (nothing else crazy) transform it.
+ if (CI->getNumArgOperands() == 2) {
+ // Make sure there's no % in the constant array. We could try to handle
+ // %% -> % in the future if we cared.
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%')
+ return nullptr; // we found a format specifier, bail out.
+
+ // These optimizations require DataLayout.
+ if (!DL)
return nullptr;
- if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
- return V;
- }
+ // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+ B.CreateMemCpy(
+ CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(DL->getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1),
+ 1); // Copy the null byte.
+ return ConstantInt::get(CI->getType(), FormatStr.size());
+ }
- // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
- // point arguments.
- if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *SIPrintFFn =
- M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(SIPrintFFn);
- B.Insert(New);
- return New;
- }
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
return nullptr;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy())
+ return nullptr;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+ B.CreateStore(V, Ptr);
+ Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
+
+ return ConstantInt::get(CI->getType(), 1);
}
-};
-struct FPrintFOpt : public LibCallOptimization {
- Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
- IRBuilder<> &B) {
- ErrorReportingOpt ER(/* StreamArg = */ 0);
- (void) ER.callOptimizer(Callee, CI, B);
+ if (FormatStr[1] == 's') {
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
- // All the optimizations depend on the format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+ // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
- // Do not do any of the following transformations if the fprintf return
- // value is used, in general the fprintf return value is not compatible
- // with fwrite(), fputc() or fputs().
- if (!CI->use_empty())
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
+ if (!Len)
return nullptr;
+ Value *IncLen =
+ B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
- // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
- if (CI->getNumArgOperands() == 2) {
- for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
- if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
- return nullptr; // We found a format specifier.
+ // The sprintf result is the unincremented number of bytes in the string.
+ return B.CreateIntCast(Len, CI->getType(), false);
+ }
+ return nullptr;
+}
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Require two fixed pointer arguments and an integer result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return nullptr;
- return EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(DL->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, DL, TLI);
- }
+ if (Value *V = optimizeSPrintFString(CI, B)) {
+ return V;
+ }
- // The remaining optimizations require the format string to be "%s" or "%c"
- // and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
- return nullptr;
+ // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+ // point arguments.
+ if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *SIPrintFFn =
+ M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return nullptr;
+}
- // Decode the second character of the format string.
- if (FormatStr[1] == 'c') {
- // fprintf(F, "%c", chr) --> fputc(chr, F)
- if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
- return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
- }
+Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
+ optimizeErrorReporting(CI, B, 0);
- if (FormatStr[1] == 's') {
- // fprintf(F, "%s", str) --> fputs(str, F)
- if (!CI->getArgOperand(2)->getType()->isPointerTy())
- return nullptr;
- return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
- }
+ // All the optimizations depend on the format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return nullptr;
+
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return nullptr;
+
+ // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+ if (CI->getNumArgOperands() == 2) {
+ for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+ if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
+ return nullptr; // We found a format specifier.
+
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
+
+ return EmitFWrite(
+ CI->getArgOperand(1),
+ ConstantInt::get(DL->getIntPtrType(CI->getContext()), FormatStr.size()),
+ CI->getArgOperand(0), B, DL, TLI);
}
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Require two fixed paramters as pointers and integer result.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
+ // The remaining optimizations require the format string to be "%s" or "%c"
+ // and have an extra operand.
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
+ return nullptr;
+
+ // Decode the second character of the format string.
+ if (FormatStr[1] == 'c') {
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+ }
- if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
- return V;
- }
+ if (FormatStr[1] == 's') {
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
+ return nullptr;
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+ }
+ return nullptr;
+}
- // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
- // floating point arguments.
- if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *FIPrintFFn =
- M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(FIPrintFFn);
- B.Insert(New);
- return New;
- }
+Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Require two fixed paramters as pointers and integer result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
return nullptr;
+
+ if (Value *V = optimizeFPrintFString(CI, B)) {
+ return V;
}
-};
-struct FWriteOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- ErrorReportingOpt ER(/* StreamArg = */ 3);
- (void) ER.callOptimizer(Callee, CI, B);
+ // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+ // floating point arguments.
+ if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *FIPrintFFn =
+ M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(FIPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return nullptr;
+}
- // Require a pointer, an integer, an integer, a pointer, returning integer.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isIntegerTy() ||
- !FT->getParamType(2)->isIntegerTy() ||
- !FT->getParamType(3)->isPointerTy() ||
- !FT->getReturnType()->isIntegerTy())
- return nullptr;
+Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
+ optimizeErrorReporting(CI, B, 3);
- // Get the element size and count.
- ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!SizeC || !CountC) return nullptr;
- uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
-
- // If this is writing zero records, remove the call (it's a noop).
- if (Bytes == 0)
- return ConstantInt::get(CI->getType(), 0);
-
- // If this is writing one byte, turn it into fputc.
- // This optimisation is only valid, if the return value is unused.
- if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
- }
+ Function *Callee = CI->getCalledFunction();
+ // Require a pointer, an integer, an integer, a pointer, returning integer.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getParamType(3)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
+ return nullptr;
+ // Get the element size and count.
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeC || !CountC)
return nullptr;
- }
-};
+ uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
-struct FPutsOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- ErrorReportingOpt ER(/* StreamArg = */ 1);
- (void) ER.callOptimizer(Callee, CI, B);
+ // If this is writing zero records, remove the call (it's a noop).
+ if (Bytes == 0)
+ return ConstantInt::get(CI->getType(), 0);
- // These optimizations require DataLayout.
- if (!DL) return nullptr;
+ // If this is writing one byte, turn it into fputc.
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
+ }
- // Require two pointers. Also, we can't optimize if return value is used.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
- !FT->getParamType(1)->isPointerTy() ||
- !CI->use_empty())
- return nullptr;
+ return nullptr;
+}
- // fputs(s,F) --> fwrite(s,1,strlen(s),F)
- uint64_t Len = GetStringLength(CI->getArgOperand(0));
- if (!Len) return nullptr;
- // Known to have no uses (see above).
- return EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(DL->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, DL, TLI);
- }
-};
-
-struct PutsOpt : public LibCallOptimization {
- Value *callOptimizer(Function *Callee, CallInst *CI,
- IRBuilder<> &B) override {
- // Require one fixed pointer argument and an integer/void result.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() ||
- FT->getReturnType()->isVoidTy()))
- return nullptr;
+Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
+ optimizeErrorReporting(CI, B, 1);
- // Check for a constant string.
- StringRef Str;
- if (!getConstantStringInfo(CI->getArgOperand(0), Str))
- return nullptr;
+ Function *Callee = CI->getCalledFunction();
- if (Str.empty() && CI->use_empty()) {
- // puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
- if (CI->use_empty() || !Res) return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
+ // These optimizations require DataLayout.
+ if (!DL)
+ return nullptr;
+ // Require two pointers. Also, we can't optimize if return value is used.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() || !CI->use_empty())
return nullptr;
- }
-};
-} // End anonymous namespace.
+ // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
+ if (!Len)
+ return nullptr;
+
+ // Known to have no uses (see above).
+ return EmitFWrite(
+ CI->getArgOperand(0),
+ ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len - 1),
+ CI->getArgOperand(1), B, DL, TLI);
+}
-namespace llvm {
+Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ // Require one fixed pointer argument and an integer/void result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy()))
+ return nullptr;
-class LibCallSimplifierImpl {
- const DataLayout *DL;
- const TargetLibraryInfo *TLI;
- const LibCallSimplifier *LCS;
- bool UnsafeFPShrink;
+ // Check for a constant string.
+ StringRef Str;
+ if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ return nullptr;
- // Math library call optimizations.
- CosOpt Cos;
- PowOpt Pow;
- Exp2Opt Exp2;
-public:
- LibCallSimplifierImpl(const DataLayout *DL, const TargetLibraryInfo *TLI,
- const LibCallSimplifier *LCS,
- bool UnsafeFPShrink = false)
- : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
- this->DL = DL;
- this->TLI = TLI;
- this->LCS = LCS;
- this->UnsafeFPShrink = UnsafeFPShrink;
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
+ if (CI->use_empty() || !Res)
+ return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
}
- Value *optimizeCall(CallInst *CI);
- LibCallOptimization *lookupOptimization(CallInst *CI);
- bool hasFloatVersion(StringRef FuncName);
-};
+ return nullptr;
+}
-bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
LibFunc::Func Func;
SmallString<20> FloatFuncName = FuncName;
FloatFuncName += 'f';
@@ -2048,263 +1995,219 @@ bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
return false;
}
-// Fortified library call optimizations.
-static MemCpyChkOpt MemCpyChk;
-static MemMoveChkOpt MemMoveChk;
-static MemSetChkOpt MemSetChk;
-static StrCpyChkOpt StrCpyChk;
-static StpCpyChkOpt StpCpyChk;
-static StrNCpyChkOpt StrNCpyChk;
-
-// String library call optimizations.
-static StrCatOpt StrCat;
-static StrNCatOpt StrNCat;
-static StrChrOpt StrChr;
-static StrRChrOpt StrRChr;
-static StrCmpOpt StrCmp;
-static StrNCmpOpt StrNCmp;
-static StrCpyOpt StrCpy;
-static StpCpyOpt StpCpy;
-static StrNCpyOpt StrNCpy;
-static StrLenOpt StrLen;
-static StrPBrkOpt StrPBrk;
-static StrToOpt StrTo;
-static StrSpnOpt StrSpn;
-static StrCSpnOpt StrCSpn;
-static StrStrOpt StrStr;
-
-// Memory library call optimizations.
-static MemCmpOpt MemCmp;
-static MemCpyOpt MemCpy;
-static MemMoveOpt MemMove;
-static MemSetOpt MemSet;
-
-// Math library call optimizations.
-static UnaryDoubleFPOpt UnaryDoubleFP(false);
-static BinaryDoubleFPOpt BinaryDoubleFP(false);
-static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
-static SinCosPiOpt SinCosPi;
-
- // Integer library call optimizations.
-static FFSOpt FFS;
-static AbsOpt Abs;
-static IsDigitOpt IsDigit;
-static IsAsciiOpt IsAscii;
-static ToAsciiOpt ToAscii;
-
-// Formatting and IO library call optimizations.
-static ErrorReportingOpt ErrorReporting;
-static ErrorReportingOpt ErrorReporting0(0);
-static ErrorReportingOpt ErrorReporting1(1);
-static PrintFOpt PrintF;
-static SPrintFOpt SPrintF;
-static FPrintFOpt FPrintF;
-static FWriteOpt FWrite;
-static FPutsOpt FPuts;
-static PutsOpt Puts;
-
-LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ if (CI->isNoBuiltin())
+ return nullptr;
+
LibFunc::Func Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
+ IRBuilder<> Builder(CI);
+ bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
- // Next check for intrinsics.
+ // Command-line parameter overrides function attribute.
+ if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
+ UnsafeFPShrink = EnableUnsafeFPShrink;
+ else if (Callee->hasFnAttribute("unsafe-fp-math")) {
+ // FIXME: This is the same problem as described in optimizeSqrt().
+ // If calls gain access to IR-level FMF, then use that instead of a
+ // function attribute.
+
+ // Check for unsafe-fp-math = true.
+ Attribute Attr = Callee->getFnAttribute("unsafe-fp-math");
+ if (Attr.getValueAsString() == "true")
+ UnsafeFPShrink = true;
+ }
+
+ // First, check for intrinsics.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ if (!isCallingConvC)
+ return nullptr;
switch (II->getIntrinsicID()) {
case Intrinsic::pow:
- return &Pow;
+ return optimizePow(CI, Builder);
case Intrinsic::exp2:
- return &Exp2;
+ return optimizeExp2(CI, Builder);
+ case Intrinsic::fabs:
+ return optimizeFabs(CI, Builder);
+ case Intrinsic::sqrt:
+ return optimizeSqrt(CI, Builder);
default:
- return nullptr;
+ return nullptr;
}
}
// Then check for known library functions.
if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ // We never change the calling convention.
+ if (!ignoreCallingConv(Func) && !isCallingConvC)
+ return nullptr;
switch (Func) {
- case LibFunc::strcat:
- return &StrCat;
- case LibFunc::strncat:
- return &StrNCat;
- case LibFunc::strchr:
- return &StrChr;
- case LibFunc::strrchr:
- return &StrRChr;
- case LibFunc::strcmp:
- return &StrCmp;
- case LibFunc::strncmp:
- return &StrNCmp;
- case LibFunc::strcpy:
- return &StrCpy;
- case LibFunc::stpcpy:
- return &StpCpy;
- case LibFunc::strncpy:
- return &StrNCpy;
- case LibFunc::strlen:
- return &StrLen;
- case LibFunc::strpbrk:
- return &StrPBrk;
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
- return &StrTo;
- case LibFunc::strspn:
- return &StrSpn;
- case LibFunc::strcspn:
- return &StrCSpn;
- case LibFunc::strstr:
- return &StrStr;
- case LibFunc::memcmp:
- return &MemCmp;
- case LibFunc::memcpy:
- return &MemCpy;
- case LibFunc::memmove:
- return &MemMove;
- case LibFunc::memset:
- return &MemSet;
- case LibFunc::cosf:
- case LibFunc::cos:
- case LibFunc::cosl:
- return &Cos;
- case LibFunc::sinpif:
- case LibFunc::sinpi:
- case LibFunc::cospif:
- case LibFunc::cospi:
- return &SinCosPi;
- case LibFunc::powf:
- case LibFunc::pow:
- case LibFunc::powl:
- return &Pow;
- case LibFunc::exp2l:
- case LibFunc::exp2:
- case LibFunc::exp2f:
- return &Exp2;
- case LibFunc::ffs:
- case LibFunc::ffsl:
- case LibFunc::ffsll:
- return &FFS;
- case LibFunc::abs:
- case LibFunc::labs:
- case LibFunc::llabs:
- return &Abs;
- case LibFunc::isdigit:
- return &IsDigit;
- case LibFunc::isascii:
- return &IsAscii;
- case LibFunc::toascii:
- return &ToAscii;
- case LibFunc::printf:
- return &PrintF;
- case LibFunc::sprintf:
- return &SPrintF;
- case LibFunc::fprintf:
- return &FPrintF;
- case LibFunc::fwrite:
- return &FWrite;
- case LibFunc::fputs:
- return &FPuts;
- case LibFunc::puts:
- return &Puts;
- case LibFunc::perror:
- return &ErrorReporting;
- case LibFunc::vfprintf:
- case LibFunc::fiprintf:
- return &ErrorReporting0;
- case LibFunc::fputc:
- return &ErrorReporting1;
- case LibFunc::ceil:
- case LibFunc::fabs:
- case LibFunc::floor:
- case LibFunc::rint:
- case LibFunc::round:
- case LibFunc::nearbyint:
- case LibFunc::trunc:
- if (hasFloatVersion(FuncName))
- return &UnaryDoubleFP;
- return nullptr;
- case LibFunc::acos:
- case LibFunc::acosh:
- case LibFunc::asin:
- case LibFunc::asinh:
- case LibFunc::atan:
- case LibFunc::atanh:
- case LibFunc::cbrt:
- case LibFunc::cosh:
- case LibFunc::exp:
- case LibFunc::exp10:
- case LibFunc::expm1:
- case LibFunc::log:
- case LibFunc::log10:
- case LibFunc::log1p:
- case LibFunc::log2:
- case LibFunc::logb:
- case LibFunc::sin:
- case LibFunc::sinh:
- case LibFunc::sqrt:
- case LibFunc::tan:
- case LibFunc::tanh:
- if (UnsafeFPShrink && hasFloatVersion(FuncName))
- return &UnsafeUnaryDoubleFP;
- return nullptr;
- case LibFunc::fmin:
- case LibFunc::fmax:
- if (hasFloatVersion(FuncName))
- return &BinaryDoubleFP;
- return nullptr;
- case LibFunc::memcpy_chk:
- return &MemCpyChk;
- default:
- return nullptr;
- }
- }
-
- // Finally check for fortified library calls.
- if (FuncName.endswith("_chk")) {
- if (FuncName == "__memmove_chk")
- return &MemMoveChk;
- else if (FuncName == "__memset_chk")
- return &MemSetChk;
- else if (FuncName == "__strcpy_chk")
- return &StrCpyChk;
- else if (FuncName == "__stpcpy_chk")
- return &StpCpyChk;
- else if (FuncName == "__strncpy_chk")
- return &StrNCpyChk;
- else if (FuncName == "__stpncpy_chk")
- return &StrNCpyChk;
+ case LibFunc::strcat:
+ return optimizeStrCat(CI, Builder);
+ case LibFunc::strncat:
+ return optimizeStrNCat(CI, Builder);
+ case LibFunc::strchr:
+ return optimizeStrChr(CI, Builder);
+ case LibFunc::strrchr:
+ return optimizeStrRChr(CI, Builder);
+ case LibFunc::strcmp:
+ return optimizeStrCmp(CI, Builder);
+ case LibFunc::strncmp:
+ return optimizeStrNCmp(CI, Builder);
+ case LibFunc::strcpy:
+ return optimizeStrCpy(CI, Builder);
+ case LibFunc::stpcpy:
+ return optimizeStpCpy(CI, Builder);
+ case LibFunc::strncpy:
+ return optimizeStrNCpy(CI, Builder);
+ case LibFunc::strlen:
+ return optimizeStrLen(CI, Builder);
+ case LibFunc::strpbrk:
+ return optimizeStrPBrk(CI, Builder);
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ return optimizeStrTo(CI, Builder);
+ case LibFunc::strspn:
+ return optimizeStrSpn(CI, Builder);
+ case LibFunc::strcspn:
+ return optimizeStrCSpn(CI, Builder);
+ case LibFunc::strstr:
+ return optimizeStrStr(CI, Builder);
+ case LibFunc::memcmp:
+ return optimizeMemCmp(CI, Builder);
+ case LibFunc::memcpy:
+ return optimizeMemCpy(CI, Builder);
+ case LibFunc::memmove:
+ return optimizeMemMove(CI, Builder);
+ case LibFunc::memset:
+ return optimizeMemSet(CI, Builder);
+ case LibFunc::cosf:
+ case LibFunc::cos:
+ case LibFunc::cosl:
+ return optimizeCos(CI, Builder);
+ case LibFunc::sinpif:
+ case LibFunc::sinpi:
+ case LibFunc::cospif:
+ case LibFunc::cospi:
+ return optimizeSinCosPi(CI, Builder);
+ case LibFunc::powf:
+ case LibFunc::pow:
+ case LibFunc::powl:
+ return optimizePow(CI, Builder);
+ case LibFunc::exp2l:
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ return optimizeExp2(CI, Builder);
+ case LibFunc::fabsf:
+ case LibFunc::fabs:
+ case LibFunc::fabsl:
+ return optimizeFabs(CI, Builder);
+ case LibFunc::sqrtf:
+ case LibFunc::sqrt:
+ case LibFunc::sqrtl:
+ return optimizeSqrt(CI, Builder);
+ case LibFunc::ffs:
+ case LibFunc::ffsl:
+ case LibFunc::ffsll:
+ return optimizeFFS(CI, Builder);
+ case LibFunc::abs:
+ case LibFunc::labs:
+ case LibFunc::llabs:
+ return optimizeAbs(CI, Builder);
+ case LibFunc::isdigit:
+ return optimizeIsDigit(CI, Builder);
+ case LibFunc::isascii:
+ return optimizeIsAscii(CI, Builder);
+ case LibFunc::toascii:
+ return optimizeToAscii(CI, Builder);
+ case LibFunc::printf:
+ return optimizePrintF(CI, Builder);
+ case LibFunc::sprintf:
+ return optimizeSPrintF(CI, Builder);
+ case LibFunc::fprintf:
+ return optimizeFPrintF(CI, Builder);
+ case LibFunc::fwrite:
+ return optimizeFWrite(CI, Builder);
+ case LibFunc::fputs:
+ return optimizeFPuts(CI, Builder);
+ case LibFunc::puts:
+ return optimizePuts(CI, Builder);
+ case LibFunc::perror:
+ return optimizeErrorReporting(CI, Builder);
+ case LibFunc::vfprintf:
+ case LibFunc::fiprintf:
+ return optimizeErrorReporting(CI, Builder, 0);
+ case LibFunc::fputc:
+ return optimizeErrorReporting(CI, Builder, 1);
+ case LibFunc::ceil:
+ case LibFunc::floor:
+ case LibFunc::rint:
+ case LibFunc::round:
+ case LibFunc::nearbyint:
+ case LibFunc::trunc:
+ if (hasFloatVersion(FuncName))
+ return optimizeUnaryDoubleFP(CI, Builder, false);
+ return nullptr;
+ case LibFunc::acos:
+ case LibFunc::acosh:
+ case LibFunc::asin:
+ case LibFunc::asinh:
+ case LibFunc::atan:
+ case LibFunc::atanh:
+ case LibFunc::cbrt:
+ case LibFunc::cosh:
+ case LibFunc::exp:
+ case LibFunc::exp10:
+ case LibFunc::expm1:
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ case LibFunc::sin:
+ case LibFunc::sinh:
+ case LibFunc::tan:
+ case LibFunc::tanh:
+ if (UnsafeFPShrink && hasFloatVersion(FuncName))
+ return optimizeUnaryDoubleFP(CI, Builder, true);
+ return nullptr;
+ case LibFunc::fmin:
+ case LibFunc::fmax:
+ if (hasFloatVersion(FuncName))
+ return optimizeBinaryDoubleFP(CI, Builder);
+ return nullptr;
+ case LibFunc::memcpy_chk:
+ return optimizeMemCpyChk(CI, Builder);
+ case LibFunc::memmove_chk:
+ return optimizeMemMoveChk(CI, Builder);
+ case LibFunc::memset_chk:
+ return optimizeMemSetChk(CI, Builder);
+ case LibFunc::strcpy_chk:
+ return optimizeStrCpyChk(CI, Builder);
+ case LibFunc::stpcpy_chk:
+ return optimizeStpCpyChk(CI, Builder);
+ case LibFunc::stpncpy_chk:
+ case LibFunc::strncpy_chk:
+ return optimizeStrNCpyChk(CI, Builder);
+ default:
+ return nullptr;
+ }
}
return nullptr;
-
-}
-
-Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
- LibCallOptimization *LCO = lookupOptimization(CI);
- if (LCO) {
- IRBuilder<> Builder(CI);
- return LCO->optimizeCall(CI, DL, TLI, LCS, Builder);
- }
- return nullptr;
}
LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- bool UnsafeFPShrink) {
- Impl = new LibCallSimplifierImpl(DL, TLI, this, UnsafeFPShrink);
-}
-
-LibCallSimplifier::~LibCallSimplifier() {
- delete Impl;
-}
-
-Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
- if (CI->isNoBuiltin()) return nullptr;
- return Impl->optimizeCall(CI);
+ const TargetLibraryInfo *TLI) :
+ DL(DL),
+ TLI(TLI),
+ UnsafeFPShrink(false) {
}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
@@ -2312,8 +2215,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
I->eraseFromParent();
}
-}
-
// TODO:
// Additional cases that we need to add to this file:
//
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
new file mode 100644
index 0000000..aacc945
--- /dev/null
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -0,0 +1,525 @@
+//===- SymbolRewriter.cpp - Symbol Rewriter ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
+// existing code. It is implemented as a compiler pass and is configured via a
+// YAML configuration file.
+//
+// The YAML configuration file format is as follows:
+//
+// RewriteMapFile := RewriteDescriptors
+// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
+// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
+// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
+// RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
+// RewriteDescriptorType := Identifier
+// FieldIdentifier := Identifier
+// FieldValue := Identifier
+// Identifier := [0-9a-zA-Z]+
+//
+// Currently, the following descriptor types are supported:
+//
+// - function: (function rewriting)
+// + Source (original name of the function)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+// + Naked (boolean, whether the function is undecorated)
+// - global variable: (external linkage global variable rewriting)
+// + Source (original name of externally visible variable)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+// - global alias: (global alias rewriting)
+// + Source (original name of the aliased name)
+// + Target (explicit transformation)
+// + Transform (pattern transformation)
+//
+// Note that source and exactly one of [Target, Transform] must be provided
+//
+// New rewrite descriptors can be created. Addding a new rewrite descriptor
+// involves:
+//
+// a) extended the rewrite descriptor kind enumeration
+// (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
+// b) implementing the new descriptor
+// (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
+// c) extending the rewrite map parser
+// (<anonymous>::RewriteMapParser::parseEntry)
+//
+// Specify to rewrite the symbols using the `-rewrite-symbols` option, and
+// specify the map file to use for the rewriting via the `-rewrite-map-file`
+// option.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "symbol-rewriter"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
+
+using namespace llvm;
+
+static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
+ cl::desc("Symbol Rewrite Map"),
+ cl::value_desc("filename"));
+
+namespace llvm {
+namespace SymbolRewriter {
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const>
+class ExplicitRewriteDescriptor : public RewriteDescriptor {
+public:
+ const std::string Source;
+ const std::string Target;
+
+ ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
+ : RewriteDescriptor(DT), Source(Naked ? StringRef("\01" + S.str()) : S),
+ Target(T) {}
+
+ bool performOnModule(Module &M) override;
+
+ static bool classof(const RewriteDescriptor *RD) {
+ return RD->getType() == DT;
+ }
+};
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const>
+bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
+ bool Changed = false;
+ if (ValueType *S = (M.*Get)(Source)) {
+ if (Value *T = (M.*Get)(Target))
+ S->setValueName(T->getValueName());
+ else
+ S->setName(Target);
+ Changed = true;
+ }
+ return Changed;
+}
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const,
+ iterator_range<typename iplist<ValueType>::iterator> (llvm::Module::*Iterator)()>
+class PatternRewriteDescriptor : public RewriteDescriptor {
+public:
+ const std::string Pattern;
+ const std::string Transform;
+
+ PatternRewriteDescriptor(StringRef P, StringRef T)
+ : RewriteDescriptor(DT), Pattern(P), Transform(T) { }
+
+ bool performOnModule(Module &M) override;
+
+ static bool classof(const RewriteDescriptor *RD) {
+ return RD->getType() == DT;
+ }
+};
+
+template <RewriteDescriptor::Type DT, typename ValueType,
+ ValueType *(llvm::Module::*Get)(StringRef) const,
+ iterator_range<typename iplist<ValueType>::iterator> (llvm::Module::*Iterator)()>
+bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
+performOnModule(Module &M) {
+ bool Changed = false;
+ for (auto &C : (M.*Iterator)()) {
+ std::string Error;
+
+ std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
+ if (!Error.empty())
+ report_fatal_error("unable to transforn " + C.getName() + " in " +
+ M.getModuleIdentifier() + ": " + Error);
+
+ if (Value *V = (M.*Get)(Name))
+ C.setValueName(V->getValueName());
+ else
+ C.setName(Name);
+
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// Represents a rewrite for an explicitly named (function) symbol. Both the
+/// source function name and target function name of the transformation are
+/// explicitly spelt out.
+typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function,
+ llvm::Function, &llvm::Module::getFunction>
+ ExplicitRewriteFunctionDescriptor;
+
+/// Represents a rewrite for an explicitly named (global variable) symbol. Both
+/// the source variable name and target variable name are spelt out. This
+/// applies only to module level variables.
+typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ llvm::GlobalVariable,
+ &llvm::Module::getGlobalVariable>
+ ExplicitRewriteGlobalVariableDescriptor;
+
+/// Represents a rewrite for an explicitly named global alias. Both the source
+/// and target name are explicitly spelt out.
+typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
+ llvm::GlobalAlias,
+ &llvm::Module::getNamedAlias>
+ ExplicitRewriteNamedAliasDescriptor;
+
+/// Represents a rewrite for a regular expression based pattern for functions.
+/// A pattern for the function name is provided and a transformation for that
+/// pattern to determine the target function name create the rewrite rule.
+typedef PatternRewriteDescriptor<RewriteDescriptor::Type::Function,
+ llvm::Function, &llvm::Module::getFunction,
+ &llvm::Module::functions>
+ PatternRewriteFunctionDescriptor;
+
+/// Represents a rewrite for a global variable based upon a matching pattern.
+/// Each global variable matching the provided pattern will be transformed as
+/// described in the transformation pattern for the target. Applies only to
+/// module level variables.
+typedef PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+ llvm::GlobalVariable,
+ &llvm::Module::getGlobalVariable,
+ &llvm::Module::globals>
+ PatternRewriteGlobalVariableDescriptor;
+
+/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
+/// aliases which match a given pattern. The provided transformation will be
+/// applied to each of the matching names.
+typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
+ llvm::GlobalAlias,
+ &llvm::Module::getNamedAlias,
+ &llvm::Module::aliases>
+ PatternRewriteNamedAliasDescriptor;
+
+bool RewriteMapParser::parse(const std::string &MapFile,
+ RewriteDescriptorList *DL) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping =
+ MemoryBuffer::getFile(MapFile);
+
+ if (!Mapping)
+ report_fatal_error("unable to read rewrite map '" + MapFile + "': " +
+ Mapping.getError().message());
+
+ if (!parse(*Mapping, DL))
+ report_fatal_error("unable to parse rewrite map '" + MapFile + "'");
+
+ return true;
+}
+
+bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
+ RewriteDescriptorList *DL) {
+ SourceMgr SM;
+ yaml::Stream YS(MapFile->getBuffer(), SM);
+
+ for (auto &Document : YS) {
+ yaml::MappingNode *DescriptorList;
+
+ // ignore empty documents
+ if (isa<yaml::NullNode>(Document.getRoot()))
+ continue;
+
+ DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
+ if (!DescriptorList) {
+ YS.printError(Document.getRoot(), "DescriptorList node must be a map");
+ return false;
+ }
+
+ for (auto &Descriptor : *DescriptorList)
+ if (!parseEntry(YS, Descriptor, DL))
+ return false;
+ }
+
+ return true;
+}
+
+bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
+ RewriteDescriptorList *DL) {
+ yaml::ScalarNode *Key;
+ yaml::MappingNode *Value;
+ SmallString<32> KeyStorage;
+ StringRef RewriteType;
+
+ Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
+ if (!Key) {
+ YS.printError(Entry.getKey(), "rewrite type must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
+ if (!Value) {
+ YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
+ return false;
+ }
+
+ RewriteType = Key->getValue(KeyStorage);
+ if (RewriteType.equals("function"))
+ return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
+ else if (RewriteType.equals("global variable"))
+ return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
+ else if (RewriteType.equals("global alias"))
+ return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
+
+ YS.printError(Entry.getKey(), "unknown rewrite type");
+ return false;
+}
+
+bool RewriteMapParser::
+parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ bool Naked = false;
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = Value->getValue(ValueStorage);
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("transform")) {
+ Transform = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("naked")) {
+ std::string Undecorated;
+
+ Undecorated = Value->getValue(ValueStorage);
+ Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
+ } else {
+ YS.printError(Field.getKey(), "unknown key for function");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ // TODO see if there is a more elegant solution to selecting the rewrite
+ // descriptor type
+ if (!Target.empty())
+ DL->push_back(new ExplicitRewriteFunctionDescriptor(Source, Target, Naked));
+ else
+ DL->push_back(new PatternRewriteFunctionDescriptor(Source, Transform));
+
+ return true;
+}
+
+bool RewriteMapParser::
+parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor Key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = Value->getValue(ValueStorage);
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("transform")) {
+ Transform = Value->getValue(ValueStorage);
+ } else {
+ YS.printError(Field.getKey(), "unknown Key for Global Variable");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ if (!Target.empty())
+ DL->push_back(new ExplicitRewriteGlobalVariableDescriptor(Source, Target,
+ /*Naked*/false));
+ else
+ DL->push_back(new PatternRewriteGlobalVariableDescriptor(Source,
+ Transform));
+
+ return true;
+}
+
+bool RewriteMapParser::
+parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
+ yaml::MappingNode *Descriptor,
+ RewriteDescriptorList *DL) {
+ std::string Source;
+ std::string Target;
+ std::string Transform;
+
+ for (auto &Field : *Descriptor) {
+ yaml::ScalarNode *Key;
+ yaml::ScalarNode *Value;
+ SmallString<32> KeyStorage;
+ SmallString<32> ValueStorage;
+ StringRef KeyValue;
+
+ Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+ if (!Key) {
+ YS.printError(Field.getKey(), "descriptor key must be a scalar");
+ return false;
+ }
+
+ Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+ if (!Value) {
+ YS.printError(Field.getValue(), "descriptor value must be a scalar");
+ return false;
+ }
+
+ KeyValue = Key->getValue(KeyStorage);
+ if (KeyValue.equals("source")) {
+ std::string Error;
+
+ Source = Value->getValue(ValueStorage);
+ if (!Regex(Source).isValid(Error)) {
+ YS.printError(Field.getKey(), "invalid regex: " + Error);
+ return false;
+ }
+ } else if (KeyValue.equals("target")) {
+ Target = Value->getValue(ValueStorage);
+ } else if (KeyValue.equals("transform")) {
+ Transform = Value->getValue(ValueStorage);
+ } else {
+ YS.printError(Field.getKey(), "unknown key for Global Alias");
+ return false;
+ }
+ }
+
+ if (Transform.empty() == Target.empty()) {
+ YS.printError(Descriptor,
+ "exactly one of transform or target must be specified");
+ return false;
+ }
+
+ if (!Target.empty())
+ DL->push_back(new ExplicitRewriteNamedAliasDescriptor(Source, Target,
+ /*Naked*/false));
+ else
+ DL->push_back(new PatternRewriteNamedAliasDescriptor(Source, Transform));
+
+ return true;
+}
+}
+}
+
+namespace {
+class RewriteSymbols : public ModulePass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ RewriteSymbols();
+ RewriteSymbols(SymbolRewriter::RewriteDescriptorList &DL);
+
+ bool runOnModule(Module &M) override;
+
+private:
+ void loadAndParseMapFiles();
+
+ SymbolRewriter::RewriteDescriptorList Descriptors;
+};
+
+char RewriteSymbols::ID = 0;
+
+RewriteSymbols::RewriteSymbols() : ModulePass(ID) {
+ initializeRewriteSymbolsPass(*PassRegistry::getPassRegistry());
+ loadAndParseMapFiles();
+}
+
+RewriteSymbols::RewriteSymbols(SymbolRewriter::RewriteDescriptorList &DL)
+ : ModulePass(ID) {
+ std::swap(Descriptors, DL);
+}
+
+bool RewriteSymbols::runOnModule(Module &M) {
+ bool Changed;
+
+ Changed = false;
+ for (auto &Descriptor : Descriptors)
+ Changed |= Descriptor.performOnModule(M);
+
+ return Changed;
+}
+
+void RewriteSymbols::loadAndParseMapFiles() {
+ const std::vector<std::string> MapFiles(RewriteMapFiles);
+ SymbolRewriter::RewriteMapParser parser;
+
+ for (const auto &MapFile : MapFiles)
+ parser.parse(MapFile, &Descriptors);
+}
+}
+
+INITIALIZE_PASS(RewriteSymbols, "rewrite-symbols", "Rewrite Symbols", false,
+ false)
+
+ModulePass *llvm::createRewriteSymbolsPass() { return new RewriteSymbols(); }
+
+ModulePass *
+llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) {
+ return new RewriteSymbols(DL);
+}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 0f20e6d..a2f69d1 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -210,8 +210,10 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
// Remap attached metadata.
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
I->getAllMetadata(MDs);
- for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
- MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
+ MI = MDs.begin(),
+ ME = MDs.end();
+ MI != ME; ++MI) {
MDNode *Old = MI->second;
MDNode *New = MapValue(Old, VMap, Flags, TypeMapper, Materializer);
if (New != Old)
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 28ec83b..b4991bc 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -391,8 +391,6 @@ namespace {
Instruction *&InsertionPt,
Instruction *I, Instruction *J);
- void combineMetadata(Instruction *K, const Instruction *J);
-
bool vectorizeBB(BasicBlock &BB) {
if (skipOptnoneFunction(BB))
return false;
@@ -687,6 +685,8 @@ namespace {
case Intrinsic::trunc:
case Intrinsic::floor:
case Intrinsic::fabs:
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
return Config.VectorizeMath;
case Intrinsic::bswap:
case Intrinsic::ctpop:
@@ -2964,31 +2964,6 @@ namespace {
}
}
- // When the first instruction in each pair is cloned, it will inherit its
- // parent's metadata. This metadata must be combined with that of the other
- // instruction in a safe way.
- void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) {
- SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata;
- K->getAllMetadataOtherThanDebugLoc(Metadata);
- for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
- unsigned Kind = Metadata[i].first;
- MDNode *JMD = J->getMetadata(Kind);
- MDNode *KMD = Metadata[i].second;
-
- switch (Kind) {
- default:
- K->setMetadata(Kind, nullptr); // Remove unknown metadata
- break;
- case LLVMContext::MD_tbaa:
- K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
- break;
- case LLVMContext::MD_fpmath:
- K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
- break;
- }
- }
- }
-
// This function fuses the chosen instruction pairs into vector instructions,
// taking care preserve any needed scalar outputs and, then, it reorders the
// remaining instructions as needed (users of the first member of the pair
@@ -3138,7 +3113,13 @@ namespace {
if (!isa<StoreInst>(K))
K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
- combineMetadata(K, H);
+ unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_fpmath
+ };
+ combineMetadata(K, H, KnownIDs);
K->intersectOptionalDataWith(H);
for (unsigned o = 0; o < NumOperands; ++o)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index cb8a41d..35b2ecf 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -54,7 +54,10 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
@@ -107,8 +110,8 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."));
static cl::opt<unsigned>
-VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden,
- cl::desc("Sets the vectorization unroll count. "
+VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
+ cl::desc("Sets the vectorization interleave count. "
"Zero is autoselect."));
static cl::opt<bool>
@@ -156,17 +159,17 @@ static cl::opt<unsigned> ForceTargetNumVectorRegs(
"force-target-num-vector-regs", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's number of vector registers."));
-/// Maximum vectorization unroll count.
-static const unsigned MaxUnrollFactor = 16;
+/// Maximum vectorization interleave count.
+static const unsigned MaxInterleaveFactor = 16;
-static cl::opt<unsigned> ForceTargetMaxScalarUnrollFactor(
- "force-target-max-scalar-unroll", cl::init(0), cl::Hidden,
- cl::desc("A flag that overrides the target's max unroll factor for scalar "
- "loops."));
+static cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor(
+ "force-target-max-scalar-interleave", cl::init(0), cl::Hidden,
+ cl::desc("A flag that overrides the target's max interleave factor for "
+ "scalar loops."));
-static cl::opt<unsigned> ForceTargetMaxVectorUnrollFactor(
- "force-target-max-vector-unroll", cl::init(0), cl::Hidden,
- cl::desc("A flag that overrides the target's max unroll factor for "
+static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
+ "force-target-max-vector-interleave", cl::init(0), cl::Hidden,
+ cl::desc("A flag that overrides the target's max interleave factor for "
"vectorized loops."));
static cl::opt<unsigned> ForceTargetInstructionCost(
@@ -203,11 +206,17 @@ static cl::opt<bool> EnableCondStoresVectorization(
"enable-cond-stores-vec", cl::init(false), cl::Hidden,
cl::desc("Enable if predication of stores during vectorization."));
+static cl::opt<unsigned> MaxNestedScalarReductionUF(
+ "max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden,
+ cl::desc("The maximum unroll factor to use when unrolling a scalar "
+ "reduction in a nested loop."));
+
namespace {
// Forward declarations.
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
+class LoopVectorizeHints;
/// Optimization analysis message produced during vectorization. Messages inform
/// the user why vectorization did not occur.
@@ -409,6 +418,8 @@ protected:
LoopInfo *LI;
/// Dominator Tree.
DominatorTree *DT;
+ /// Alias Analysis.
+ AliasAnalysis *AA;
/// Data Layout.
const DataLayout *DL;
/// Target Library Info.
@@ -518,6 +529,36 @@ static std::string getDebugLocString(const Loop *L) {
}
#endif
+/// \brief Propagate known metadata from one instruction to another.
+static void propagateMetadata(Instruction *To, const Instruction *From) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+ From->getAllMetadataOtherThanDebugLoc(Metadata);
+
+ for (auto M : Metadata) {
+ unsigned Kind = M.first;
+
+ // These are safe to transfer (this is safe for TBAA, even when we
+ // if-convert, because should that metadata have had a control dependency
+ // on the condition, and thus actually aliased with some other
+ // non-speculated memory access when the condition was false, this would be
+ // caught by the runtime overlap checks).
+ if (Kind != LLVMContext::MD_tbaa &&
+ Kind != LLVMContext::MD_alias_scope &&
+ Kind != LLVMContext::MD_noalias &&
+ Kind != LLVMContext::MD_fpmath)
+ continue;
+
+ To->setMetadata(Kind, M.second);
+ }
+}
+
+/// \brief Propagate known metadata from one instruction to a vector of others.
+static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *From) {
+ for (Value *V : To)
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ propagateMetadata(I, From);
+}
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -539,9 +580,9 @@ public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI,
- Function *F)
+ AliasAnalysis *AA, Function *F)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), TheFunction(F), Induction(nullptr),
+ DT(DT), TLI(TLI), AA(AA), TheFunction(F), Induction(nullptr),
WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
}
@@ -629,11 +670,12 @@ public:
Ends.clear();
IsWritePtr.clear();
DependencySetId.clear();
+ AliasSetId.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
- unsigned DepSetId, ValueToValueMap &Strides);
+ unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides);
/// This flag indicates if we need to add the runtime check.
bool Need;
@@ -648,6 +690,8 @@ public:
/// Holds the id of the set of pointers that could be dependent because of a
/// shared underlying object.
SmallVector<unsigned, 2> DependencySetId;
+ /// Holds the id of the disjoint alias set to which this pointer belongs.
+ SmallVector<unsigned, 2> AliasSetId;
};
/// A struct for saving information about induction variables.
@@ -746,7 +790,7 @@ private:
/// Return true if all of the instructions in the block can be speculatively
/// executed. \p SafePtrs is a list of addresses that are known to be legal
/// and we know that we can read from them without segfault.
- bool blockCanBePredicated(BasicBlock *BB, SmallPtrSet<Value *, 8>& SafePtrs);
+ bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
@@ -792,6 +836,8 @@ private:
DominatorTree *DT;
/// Target Library Info.
TargetLibraryInfo *TLI;
+ /// Alias analysis.
+ AliasAnalysis *AA;
/// Parent function
Function *TheFunction;
@@ -839,8 +885,13 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- const DataLayout *DL, const TargetLibraryInfo *TLI)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
+ const DataLayout *DL, const TargetLibraryInfo *TLI,
+ AssumptionTracker *AT, const Function *F,
+ const LoopVectorizeHints *Hints)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
+ TheFunction(F), Hints(Hints) {
+ CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+ }
/// Information about vectorization costs
struct VectorizationFactor {
@@ -851,9 +902,7 @@ public:
/// This method checks every power of two up to VF. If UserVF is not ZERO
/// then this vectorization factor will be selected if vectorization is
/// possible.
- VectorizationFactor selectVectorizationFactor(bool OptForSize,
- unsigned UserVF,
- bool ForceVectorization);
+ VectorizationFactor selectVectorizationFactor(bool OptForSize);
/// \return The size (in bits) of the widest type in the code that
/// needs to be vectorized. We ignore values that remain scalar such as
@@ -865,8 +914,7 @@ public:
/// based on register pressure and other parameters.
/// VF and LoopCost are the selected vectorization factor and the cost of the
/// selected VF.
- unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF,
- unsigned LoopCost);
+ unsigned selectUnrollFactor(bool OptForSize, unsigned VF, unsigned LoopCost);
/// \brief A struct that represents some properties of the register usage
/// of a loop.
@@ -902,6 +950,19 @@ private:
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
+ /// Report an analysis message to assist the user in diagnosing loops that are
+ /// not vectorized.
+ void emitAnalysis(Report &Message) {
+ DebugLoc DL = TheLoop->getStartLoc();
+ if (Instruction *I = Message.getInstr())
+ DL = I->getDebugLoc();
+ emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
+ *TheFunction, DL, Message.str());
+ }
+
+ /// Values used only by @llvm.assume calls.
+ SmallPtrSet<const Value *, 32> EphValues;
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
@@ -916,11 +977,59 @@ private:
const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
+ const Function *TheFunction;
+ // Loop Vectorize Hint.
+ const LoopVectorizeHints *Hints;
};
/// Utility class for getting and setting loop vectorizer hints in the form
/// of loop metadata.
+/// This class keeps a number of loop annotations locally (as member variables)
+/// and can, upon request, write them back as metadata on the loop. It will
+/// initially scan the loop for existing metadata, and will update the local
+/// values based on information in the loop.
+/// We cannot write all values to metadata, as the mere presence of some info,
+/// for example 'force', means a decision has been made. So, we need to be
+/// careful NOT to add them if the user hasn't specifically asked so.
class LoopVectorizeHints {
+ enum HintKind {
+ HK_WIDTH,
+ HK_UNROLL,
+ HK_FORCE
+ };
+
+ /// Hint - associates name and validation with the hint value.
+ struct Hint {
+ const char * Name;
+ unsigned Value; // This may have to change for non-numeric values.
+ HintKind Kind;
+
+ Hint(const char * Name, unsigned Value, HintKind Kind)
+ : Name(Name), Value(Value), Kind(Kind) { }
+
+ bool validate(unsigned Val) {
+ switch (Kind) {
+ case HK_WIDTH:
+ return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
+ case HK_UNROLL:
+ return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
+ case HK_FORCE:
+ return (Val <= 1);
+ }
+ return false;
+ }
+ };
+
+ /// Vectorization width.
+ Hint Width;
+ /// Vectorization interleave factor.
+ Hint Interleave;
+ /// Vectorization forced
+ Hint Force;
+
+ /// Return the loop metadata prefix.
+ static StringRef Prefix() { return "llvm.loop."; }
+
public:
enum ForceKind {
FK_Undefined = -1, ///< Not selected.
@@ -928,88 +1037,57 @@ public:
FK_Enabled = 1, ///< Forcing enabled.
};
- LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
- : Width(VectorizationFactor),
- Unroll(DisableUnrolling),
- Force(FK_Undefined),
- LoopID(L->getLoopID()) {
- getHints(L);
- // force-vector-unroll overrides DisableUnrolling.
- if (VectorizationUnroll.getNumOccurrences() > 0)
- Unroll = VectorizationUnroll;
-
- DEBUG(if (DisableUnrolling && Unroll == 1) dbgs()
- << "LV: Unrolling disabled by the pass manager\n");
- }
+ LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
+ : Width("vectorize.width", VectorizationFactor, HK_WIDTH),
+ Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
+ Force("vectorize.enable", FK_Undefined, HK_FORCE),
+ TheLoop(L) {
+ // Populate values with existing loop metadata.
+ getHintsFromMetadata();
- /// Return the loop vectorizer metadata prefix.
- static StringRef Prefix() { return "llvm.loop.vectorize."; }
+ // force-vector-interleave overrides DisableInterleaving.
+ if (VectorizationInterleave.getNumOccurrences() > 0)
+ Interleave.Value = VectorizationInterleave;
- MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const {
- SmallVector<Value*, 2> Vals;
- Vals.push_back(MDString::get(Context, Name));
- Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V));
- return MDNode::get(Context, Vals);
+ DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+ << "LV: Interleaving disabled by the pass manager\n");
}
/// Mark the loop L as already vectorized by setting the width to 1.
- void setAlreadyVectorized(Loop *L) {
- LLVMContext &Context = L->getHeader()->getContext();
-
- Width = 1;
-
- // Create a new loop id with one more operand for the already_vectorized
- // hint. If the loop already has a loop id then copy the existing operands.
- SmallVector<Value*, 4> Vals(1);
- if (LoopID)
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
- Vals.push_back(LoopID->getOperand(i));
-
- Vals.push_back(createHint(Context, Twine(Prefix(), "width").str(), Width));
- Vals.push_back(createHint(Context, Twine(Prefix(), "unroll").str(), 1));
-
- MDNode *NewLoopID = MDNode::get(Context, Vals);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
-
- L->setLoopID(NewLoopID);
- if (LoopID)
- LoopID->replaceAllUsesWith(NewLoopID);
-
- LoopID = NewLoopID;
+ void setAlreadyVectorized() {
+ Width.Value = Interleave.Value = 1;
+ Hint Hints[] = {Width, Interleave};
+ writeHintsToMetadata(Hints);
}
+ /// Dumps all the hint information.
std::string emitRemark() const {
Report R;
- R << "vectorization ";
- switch (Force) {
- case LoopVectorizeHints::FK_Disabled:
- R << "is explicitly disabled";
- break;
- case LoopVectorizeHints::FK_Enabled:
- R << "is explicitly enabled";
- if (Width != 0 && Unroll != 0)
- R << " with width " << Width << " and interleave count " << Unroll;
- else if (Width != 0)
- R << " with width " << Width;
- else if (Unroll != 0)
- R << " with interleave count " << Unroll;
- break;
- case LoopVectorizeHints::FK_Undefined:
- R << "was not specified";
- break;
+ if (Force.Value == LoopVectorizeHints::FK_Disabled)
+ R << "vectorization is explicitly disabled";
+ else {
+ R << "use -Rpass-analysis=loop-vectorize for more info";
+ if (Force.Value == LoopVectorizeHints::FK_Enabled) {
+ R << " (Force=true";
+ if (Width.Value != 0)
+ R << ", Vector Width=" << Width.Value;
+ if (Interleave.Value != 0)
+ R << ", Interleave Count=" << Interleave.Value;
+ R << ")";
+ }
}
+
return R.str();
}
- unsigned getWidth() const { return Width; }
- unsigned getUnroll() const { return Unroll; }
- enum ForceKind getForce() const { return Force; }
- MDNode *getLoopID() const { return LoopID; }
+ unsigned getWidth() const { return Width.Value; }
+ unsigned getInterleave() const { return Interleave.Value; }
+ enum ForceKind getForce() const { return (ForceKind)Force.Value; }
private:
- /// Find hints specified in the loop metadata.
- void getHints(const Loop *L) {
+ /// Find hints specified in the loop metadata and update local values.
+ void getHintsFromMetadata() {
+ MDNode *LoopID = TheLoop->getLoopID();
if (!LoopID)
return;
@@ -1037,55 +1115,111 @@ private:
if (!S)
continue;
- // Check if the hint starts with the vectorizer prefix.
- StringRef Hint = S->getString();
- if (!Hint.startswith(Prefix()))
- continue;
- // Remove the prefix.
- Hint = Hint.substr(Prefix().size(), StringRef::npos);
-
+ // Check if the hint starts with the loop metadata prefix.
+ StringRef Name = S->getString();
if (Args.size() == 1)
- getHint(Hint, Args[0]);
+ setHint(Name, Args[0]);
}
}
- // Check string hint with one operand.
- void getHint(StringRef Hint, Value *Arg) {
+ /// Checks string hint with one operand and set value if valid.
+ void setHint(StringRef Name, Value *Arg) {
+ if (!Name.startswith(Prefix()))
+ return;
+ Name = Name.substr(Prefix().size(), StringRef::npos);
+
const ConstantInt *C = dyn_cast<ConstantInt>(Arg);
if (!C) return;
unsigned Val = C->getZExtValue();
- if (Hint == "width") {
- if (isPowerOf2_32(Val) && Val <= MaxVectorWidth)
- Width = Val;
- else
- DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n");
- } else if (Hint == "unroll") {
- if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor)
- Unroll = Val;
- else
- DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
- } else if (Hint == "enable") {
- if (C->getBitWidth() == 1)
- Force = Val == 1 ? LoopVectorizeHints::FK_Enabled
- : LoopVectorizeHints::FK_Disabled;
- else
- DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n");
- } else {
- DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
+ Hint *Hints[] = {&Width, &Interleave, &Force};
+ for (auto H : Hints) {
+ if (Name == H->Name) {
+ if (H->validate(Val))
+ H->Value = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
+ break;
+ }
}
}
- /// Vectorization width.
- unsigned Width;
- /// Vectorization unroll factor.
- unsigned Unroll;
- /// Vectorization forced
- enum ForceKind Force;
+ /// Create a new hint from name / value pair.
+ MDNode *createHintMetadata(StringRef Name, unsigned V) const {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ Value *Vals[] = {MDString::get(Context, Name),
+ ConstantInt::get(Type::getInt32Ty(Context), V)};
+ return MDNode::get(Context, Vals);
+ }
- MDNode *LoopID;
+ /// Matches metadata with hint name.
+ bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
+ MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
+ if (!Name)
+ return false;
+
+ for (auto H : HintTypes)
+ if (Name->getString().endswith(H.Name))
+ return true;
+ return false;
+ }
+
+ /// Sets current hints into loop metadata, keeping other values intact.
+ void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
+ if (HintTypes.size() == 0)
+ return;
+
+ // Reserve the first element to LoopID (see below).
+ SmallVector<Value*, 4> Vals(1);
+ // If the loop already has metadata, then ignore the existing operands.
+ MDNode *LoopID = TheLoop->getLoopID();
+ if (LoopID) {
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
+ // If node in update list, ignore old value.
+ if (!matchesHintMetadataName(Node, HintTypes))
+ Vals.push_back(Node);
+ }
+ }
+
+ // Now, add the missing hints.
+ for (auto H : HintTypes)
+ Vals.push_back(
+ createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
+
+ // Replace current metadata node with new one.
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+ MDNode *NewLoopID = MDNode::get(Context, Vals);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+
+ TheLoop->setLoopID(NewLoopID);
+ if (LoopID)
+ LoopID->replaceAllUsesWith(NewLoopID);
+ LoopID = NewLoopID;
+ }
+
+ /// The loop these hints belong to.
+ const Loop *TheLoop;
};
+static void emitMissedWarning(Function *F, Loop *L,
+ const LoopVectorizeHints &LH) {
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), LH.emitRemark());
+
+ if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
+ if (LH.getWidth() != 1)
+ emitLoopVectorizeWarning(
+ F->getContext(), *F, L->getStartLoc(),
+ "failed explicitly specified loop vectorization");
+ else if (LH.getInterleave() != 1)
+ emitLoopInterleaveWarning(
+ F->getContext(), *F, L->getStartLoc(),
+ "failed explicitly specified loop interleaving");
+ }
+}
+
static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
if (L.empty())
return V.push_back(&L);
@@ -1113,6 +1247,8 @@ struct LoopVectorize : public FunctionPass {
DominatorTree *DT;
BlockFrequencyInfo *BFI;
TargetLibraryInfo *TLI;
+ AliasAnalysis *AA;
+ AssumptionTracker *AT;
bool DisableUnrolling;
bool AlwaysVectorize;
@@ -1127,6 +1263,8 @@ struct LoopVectorize : public FunctionPass {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BFI = &getAnalysis<BlockFrequencyInfo>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ AT = &getAnalysis<AssumptionTracker>();
// Compute some weights outside of the loop over the loops. Compute this
// using a BranchProbability to re-use its scaling math.
@@ -1183,7 +1321,7 @@ struct LoopVectorize : public FunctionPass {
: (Hints.getForce() == LoopVectorizeHints::FK_Enabled
? "enabled"
: "?")) << " width=" << Hints.getWidth()
- << " unroll=" << Hints.getUnroll() << "\n");
+ << " unroll=" << Hints.getInterleave() << "\n");
// Function containing loop
Function *F = L->getHeader()->getParent();
@@ -1210,7 +1348,7 @@ struct LoopVectorize : public FunctionPass {
return false;
}
- if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
+ if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) {
DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
emitOptimizationRemarkAnalysis(
F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
@@ -1221,8 +1359,7 @@ struct LoopVectorize : public FunctionPass {
// Check the loop for a trip count threshold:
// do not vectorize loops with a tiny trip count.
- BasicBlock *Latch = L->getLoopLatch();
- const unsigned TC = SE->getSmallConstantTripCount(L, Latch);
+ const unsigned TC = SE->getSmallConstantTripCount(L);
if (TC > 0u && TC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
<< "This loop is not worth vectorizing.");
@@ -1238,16 +1375,16 @@ struct LoopVectorize : public FunctionPass {
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, F);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
- emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), Hints.emitRemark());
+ emitMissedWarning(F, L, Hints);
return false;
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AT, F,
+ &Hints);
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -1276,20 +1413,17 @@ struct LoopVectorize : public FunctionPass {
emitOptimizationRemarkAnalysis(
F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
"loop not vectorized due to NoImplicitFloat attribute");
- emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
- L->getStartLoc(), Hints.emitRemark());
+ emitMissedWarning(F, L, Hints);
return false;
}
// Select the optimal vectorization factor.
const LoopVectorizationCostModel::VectorizationFactor VF =
- CM.selectVectorizationFactor(OptForSize, Hints.getWidth(),
- Hints.getForce() ==
- LoopVectorizeHints::FK_Enabled);
+ CM.selectVectorizationFactor(OptForSize);
// Select the unroll factor.
const unsigned UF =
- CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost);
+ CM.selectUnrollFactor(OptForSize, VF.Width, VF.Cost);
DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
<< DebugLocStr << '\n');
@@ -1330,13 +1464,14 @@ struct LoopVectorize : public FunctionPass {
}
// Mark the loop as already vectorized to avoid vectorizing again.
- Hints.setAlreadyVectorized(L);
+ Hints.setAlreadyVectorized();
DEBUG(verifyFunction(*L->getHeader()->getParent()));
return true;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addRequired<BlockFrequencyInfo>();
@@ -1344,8 +1479,10 @@ struct LoopVectorize : public FunctionPass {
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<AliasAnalysis>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AliasAnalysis>();
}
};
@@ -1401,7 +1538,7 @@ static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
void LoopVectorizationLegality::RuntimePointerCheck::insert(
ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- ValueToValueMap &Strides) {
+ unsigned ASId, ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
@@ -1413,6 +1550,7 @@ void LoopVectorizationLegality::RuntimePointerCheck::insert(
Ends.push_back(ScEnd);
IsWritePtr.push_back(WritePtr);
DependencySetId.push_back(DepSetId);
+ AliasSetId.push_back(ASId);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
@@ -1719,7 +1857,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Value *VecPtr = Builder.CreateBitCast(PartPtr,
DataTy->getPointerTo(AddressSpace));
- Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
+ StoreInst *NewSI =
+ Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
+ propagateMetadata(NewSI, SI);
}
return;
}
@@ -1740,9 +1880,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Value *VecPtr = Builder.CreateBitCast(PartPtr,
DataTy->getPointerTo(AddressSpace));
- Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
- cast<LoadInst>(LI)->setAlignment(Alignment);
- Entry[Part] = Reverse ? reverseVector(LI) : LI;
+ LoadInst *NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
+ propagateMetadata(NewLI, LI);
+ Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
}
}
@@ -1956,6 +2096,9 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
// Only need to check pointers between two different dependency sets.
if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
continue;
+ // Only need to check pointers in the same alias set.
+ if (PtrRtCheck->AliasSetId[i] != PtrRtCheck->AliasSetId[j])
+ continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
@@ -2424,7 +2567,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
LoopScalarBody = OldBasicBlock;
LoopVectorizeHints Hints(Lp, true);
- Hints.setAlreadyVectorized(Lp);
+ Hints.setAlreadyVectorized();
}
/// This function returns the identity element (or neutral element) for
@@ -2838,7 +2981,7 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
LoopMiddleBlock);
}
-}
+}
InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
@@ -3105,21 +3248,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
for (unsigned Part = 0; Part < UF; ++Part) {
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
- // Update the NSW, NUW and Exact flags. Notice: V can be an Undef.
- BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V);
- if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) {
- VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
- VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
- }
- if (VecOp && isa<PossiblyExactOperator>(VecOp))
- VecOp->setIsExact(BinOp->isExact());
-
- // Copy the fast-math flags.
- if (VecOp && isa<FPMathOperator>(V))
- VecOp->setFastMathFlags(it->getFastMathFlags());
+ if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
+ VecOp->copyIRFlags(BinOp);
Entry[Part] = V;
}
+
+ propagateMetadata(Entry, it);
break;
}
case Instruction::Select: {
@@ -3147,6 +3282,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Op0[Part],
Op1[Part]);
}
+
+ propagateMetadata(Entry, it);
break;
}
@@ -3166,6 +3303,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
Entry[Part] = C;
}
+
+ propagateMetadata(Entry, it);
break;
}
@@ -3198,6 +3337,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
+ propagateMetadata(Entry, it);
break;
}
/// Vectorize casts.
@@ -3207,6 +3347,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
+ propagateMetadata(Entry, it);
break;
}
@@ -3221,6 +3362,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
assert(ID && "Not an intrinsic call!");
switch (ID) {
+ case Intrinsic::assume:
case Intrinsic::lifetime_end:
case Intrinsic::lifetime_start:
scalarizeInstruction(it);
@@ -3244,6 +3386,8 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Function *F = Intrinsic::getDeclaration(M, ID, Tys);
Entry[Part] = Builder.CreateCall(F, Args);
}
+
+ propagateMetadata(Entry, it);
break;
}
break;
@@ -3284,7 +3428,7 @@ void InnerLoopVectorizer::updateAnalysis() {
DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
- DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
+ DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
DEBUG(DT->verifyDomTree());
}
@@ -3460,7 +3604,7 @@ static Type* getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
/// \brief Check that the instruction has outside loop users and is not an
/// identified reduction variable.
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
- SmallPtrSet<Value *, 4> &Reductions) {
+ SmallPtrSetImpl<Value *> &Reductions) {
// Reduction instructions are allowed to have exit users. All other
// instructions must not have external users.
if (!Reductions.count(Inst))
@@ -3515,8 +3659,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// identified reduction value with an outside user.
if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
continue;
- emitAnalysis(Report(it) << "value that could not be identified as "
- "reduction is used outside the loop");
+ emitAnalysis(Report(it) << "value could not be identified as "
+ "an induction or reduction variable");
return false;
}
@@ -3601,7 +3745,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- emitAnalysis(Report(it) << "unvectorizable operation");
+ emitAnalysis(Report(it) << "value that could not be identified as "
+ "reduction is used outside the loop");
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
}// end of PHI handling
@@ -3858,19 +4003,22 @@ public:
/// \brief Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
- AccessAnalysis(const DataLayout *Dl, DepCandidates &DA) :
- DL(Dl), DepCands(DA), AreAllWritesIdentified(true),
- AreAllReadsIdentified(true), IsRTCheckNeeded(false) {}
+ AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
+ DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
/// \brief Register a load and whether it is only read from.
- void addLoad(Value *Ptr, bool IsReadOnly) {
+ void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, false));
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
}
/// \brief Register a store.
- void addStore(Value *Ptr) {
+ void addStore(AliasAnalysis::Location &Loc) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, true));
}
@@ -3884,10 +4032,7 @@ public:
/// \brief Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
void buildDependenceSets() {
- // Process read-write pointers first.
- processMemAccesses(false);
- // Next, process read pointers.
- processMemAccesses(true);
+ processMemAccesses();
}
bool isRTCheckNeeded() { return IsRTCheckNeeded; }
@@ -3899,40 +4044,31 @@ public:
private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
- typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
- /// \brief Go over all memory access or only the deferred ones if
- /// \p UseDeferred is true and check whether runtime pointer checks are needed
- /// and build sets of dependency check candidates.
- void processMemAccesses(bool UseDeferred);
+ /// \brief Go over all memory access and check whether runtime pointer checks
+ /// are needed /// and build sets of dependency check candidates.
+ void processMemAccesses();
/// Set of all accesses.
PtrAccessSet Accesses;
- /// Set of access to check after all writes have been processed.
- PtrAccessSet DeferredAccesses;
-
- /// Map of pointers to last access encountered.
- UnderlyingObjToAccessMap ObjToLastAccess;
-
/// Set of accesses that need a further dependence check.
MemAccessInfoSet CheckDeps;
/// Set of pointers that are read only.
SmallPtrSet<Value*, 16> ReadOnlyPtr;
- /// Set of underlying objects already written to.
- SmallPtrSet<Value*, 16> WriteObjects;
-
const DataLayout *DL;
+ /// An alias set tracker to partition the access set by underlying object and
+ //intrinsic property (such as TBAA metadata).
+ AliasSetTracker AST;
+
/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
/// dependence check.
DepCandidates &DepCands;
- bool AreAllWritesIdentified;
- bool AreAllReadsIdentified;
bool IsRTCheckNeeded;
};
@@ -3960,62 +4096,67 @@ bool AccessAnalysis::canCheckPtrAtRT(
ValueToValueMap &StridesMap, bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- unsigned NumReadPtrChecks = 0;
- unsigned NumWritePtrChecks = 0;
bool CanDoRT = true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
- // We assign consecutive id to access from different dependence sets.
- // Accesses within the same set don't need a runtime check.
- unsigned RunningDepId = 1;
- DenseMap<Value *, unsigned> DepSetId;
-
- for (PtrAccessSet::iterator AI = Accesses.begin(), AE = Accesses.end();
- AI != AE; ++AI) {
- const MemAccessInfo &Access = *AI;
- Value *Ptr = Access.getPointer();
- bool IsWrite = Access.getInt();
-
- // Just add write checks if we have both.
- if (!IsWrite && Accesses.count(MemAccessInfo(Ptr, true)))
- continue;
+ NumComparisons = 0;
- if (IsWrite)
- ++NumWritePtrChecks;
- else
- ++NumReadPtrChecks;
-
- if (hasComputableBounds(SE, StridesMap, Ptr) &&
- // When we run after a failing dependency check we have to make sure we
- // don't have wrapping pointers.
- (!ShouldCheckStride ||
- isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
- // The id of the dependence set.
- unsigned DepId;
-
- if (IsDepCheckNeeded) {
- Value *Leader = DepCands.getLeaderValue(Access).getPointer();
- unsigned &LeaderId = DepSetId[Leader];
- if (!LeaderId)
- LeaderId = RunningDepId++;
- DepId = LeaderId;
- } else
- // Each access has its own dependence set.
- DepId = RunningDepId++;
-
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, StridesMap);
-
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
- } else {
- CanDoRT = false;
+ // We assign a consecutive id to access from different alias sets.
+ // Accesses between different groups doesn't need to be checked.
+ unsigned ASId = 1;
+ for (auto &AS : AST) {
+ unsigned NumReadPtrChecks = 0;
+ unsigned NumWritePtrChecks = 0;
+
+ // We assign consecutive id to access from different dependence sets.
+ // Accesses within the same set don't need a runtime check.
+ unsigned RunningDepId = 1;
+ DenseMap<Value *, unsigned> DepSetId;
+
+ for (auto A : AS) {
+ Value *Ptr = A.getValue();
+ bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
+ MemAccessInfo Access(Ptr, IsWrite);
+
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
+ if (hasComputableBounds(SE, StridesMap, Ptr) &&
+ // When we run after a failing dependency check we have to make sure we
+ // don't have wrapping pointers.
+ (!ShouldCheckStride ||
+ isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
+ // The id of the dependence set.
+ unsigned DepId;
+
+ if (IsDepCheckNeeded) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
+ } else {
+ CanDoRT = false;
+ }
}
- }
- if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
- NumComparisons = 0; // Only one dependence set.
- else {
- NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
- NumWritePtrChecks - 1));
+ if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
+ NumComparisons += 0; // Only one dependence set.
+ else {
+ NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
+ NumWritePtrChecks - 1));
+ }
+
+ ++ASId;
}
// If the pointers that we would use for the bounds comparison have different
@@ -4029,6 +4170,9 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Only need to check pointers between two different dependency sets.
if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
continue;
+ // Only need to check pointers in the same alias set.
+ if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ continue;
Value *PtrI = RtCheck.Pointers[i];
Value *PtrJ = RtCheck.Pointers[j];
@@ -4046,90 +4190,99 @@ bool AccessAnalysis::canCheckPtrAtRT(
return CanDoRT;
}
-static bool isFunctionScopeIdentifiedObject(Value *Ptr) {
- return isNoAliasArgument(Ptr) || isNoAliasCall(Ptr) || isa<AllocaInst>(Ptr);
-}
-
-void AccessAnalysis::processMemAccesses(bool UseDeferred) {
+void AccessAnalysis::processMemAccesses() {
// We process the set twice: first we process read-write pointers, last we
// process read-only pointers. This allows us to skip dependence tests for
// read-only pointers.
- PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
- for (PtrAccessSet::iterator AI = S.begin(), AE = S.end(); AI != AE; ++AI) {
- const MemAccessInfo &Access = *AI;
- Value *Ptr = Access.getPointer();
- bool IsWrite = Access.getInt();
-
- DepCands.insert(Access);
-
- // Memorize read-only pointers for later processing and skip them in the
- // first round (they need to be checked after we have seen all write
- // pointers). Note: we also mark pointer that are not consecutive as
- // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need the
- // second check for "!IsWrite".
- bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
- if (!UseDeferred && IsReadOnlyPtr) {
- DeferredAccesses.insert(Access);
- continue;
- }
+ DEBUG(dbgs() << "LV: Processing memory accesses...\n");
+ DEBUG(dbgs() << " AST: "; AST.dump());
+ DEBUG(dbgs() << "LV: Accesses:\n");
+ DEBUG({
+ for (auto A : Accesses)
+ dbgs() << "\t" << *A.getPointer() << " (" <<
+ (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
+ "read-only" : "read")) << ")\n";
+ });
+
+ // The AliasSetTracker has nicely partitioned our pointers by metadata
+ // compatibility and potential for underlying-object overlap. As a result, we
+ // only need to check for potential pointer dependencies within each alias
+ // set.
+ for (auto &AS : AST) {
+ // Note that both the alias-set tracker and the alias sets themselves used
+ // linked lists internally and so the iteration order here is deterministic
+ // (matching the original instruction order within each set).
+
+ bool SetHasWrite = false;
+
+ // Map of pointers to last access encountered.
+ typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+ UnderlyingObjToAccessMap ObjToLastAccess;
+
+ // Set of access to check after all writes have been processed.
+ PtrAccessSet DeferredAccesses;
+
+ // Iterate over each alias set twice, once to process read/write pointers,
+ // and then to process read-only pointers.
+ for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
+ bool UseDeferred = SetIteration > 0;
+ PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+
+ for (auto A : AS) {
+ Value *Ptr = A.getValue();
+ bool IsWrite = S.count(MemAccessInfo(Ptr, true));
+
+ // If we're using the deferred access set, then it contains only reads.
+ bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+ if (UseDeferred && !IsReadOnlyPtr)
+ continue;
+ // Otherwise, the pointer must be in the PtrAccessSet, either as a read
+ // or a write.
+ assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
+ S.count(MemAccessInfo(Ptr, false))) &&
+ "Alias-set pointer not in the access set?");
+
+ MemAccessInfo Access(Ptr, IsWrite);
+ DepCands.insert(Access);
+
+ // Memorize read-only pointers for later processing and skip them in the
+ // first round (they need to be checked after we have seen all write
+ // pointers). Note: we also mark pointer that are not consecutive as
+ // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need
+ // the second check for "!IsWrite".
+ if (!UseDeferred && IsReadOnlyPtr) {
+ DeferredAccesses.insert(Access);
+ continue;
+ }
- bool NeedDepCheck = false;
- // Check whether there is the possibility of dependency because of
- // underlying objects being the same.
- typedef SmallVector<Value*, 16> ValueVector;
- ValueVector TempObjects;
- GetUnderlyingObjects(Ptr, TempObjects, DL);
- for (ValueVector::iterator UI = TempObjects.begin(), UE = TempObjects.end();
- UI != UE; ++UI) {
- Value *UnderlyingObj = *UI;
-
- // If this is a write then it needs to be an identified object. If this a
- // read and all writes (so far) are identified function scope objects we
- // don't need an identified underlying object but only an Argument (the
- // next write is going to invalidate this assumption if it is
- // unidentified).
- // This is a micro-optimization for the case where all writes are
- // identified and we have one argument pointer.
- // Otherwise, we do need a runtime check.
- if ((IsWrite && !isFunctionScopeIdentifiedObject(UnderlyingObj)) ||
- (!IsWrite && (!AreAllWritesIdentified ||
- !isa<Argument>(UnderlyingObj)) &&
- !isIdentifiedObject(UnderlyingObj))) {
- DEBUG(dbgs() << "LV: Found an unidentified " <<
- (IsWrite ? "write" : "read" ) << " ptr: " << *UnderlyingObj <<
- "\n");
- IsRTCheckNeeded = (IsRTCheckNeeded ||
- !isIdentifiedObject(UnderlyingObj) ||
- !AreAllReadsIdentified);
+ // If this is a write - check other reads and writes for conflicts. If
+ // this is a read only check other writes for conflicts (but only if
+ // there is no other write to the ptr - this is an optimization to
+ // catch "a[i] = a[i] + " without having to do a dependence check).
+ if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
+ CheckDeps.insert(Access);
+ IsRTCheckNeeded = true;
+ }
if (IsWrite)
- AreAllWritesIdentified = false;
- if (!IsWrite)
- AreAllReadsIdentified = false;
+ SetHasWrite = true;
+
+ // Create sets of pointers connected by a shared alias set and
+ // underlying object.
+ typedef SmallVector<Value *, 16> ValueVector;
+ ValueVector TempObjects;
+ GetUnderlyingObjects(Ptr, TempObjects, DL);
+ for (Value *UnderlyingObj : TempObjects) {
+ UnderlyingObjToAccessMap::iterator Prev =
+ ObjToLastAccess.find(UnderlyingObj);
+ if (Prev != ObjToLastAccess.end())
+ DepCands.unionSets(Access, Prev->second);
+
+ ObjToLastAccess[UnderlyingObj] = Access;
+ }
}
-
- // If this is a write - check other reads and writes for conflicts. If
- // this is a read only check other writes for conflicts (but only if there
- // is no other write to the ptr - this is an optimization to catch "a[i] =
- // a[i] + " without having to do a dependence check).
- if ((IsWrite || IsReadOnlyPtr) && WriteObjects.count(UnderlyingObj))
- NeedDepCheck = true;
-
- if (IsWrite)
- WriteObjects.insert(UnderlyingObj);
-
- // Create sets of pointers connected by shared underlying objects.
- UnderlyingObjToAccessMap::iterator Prev =
- ObjToLastAccess.find(UnderlyingObj);
- if (Prev != ObjToLastAccess.end())
- DepCands.unionSets(Access, Prev->second);
-
- ObjToLastAccess[UnderlyingObj] = Access;
}
-
- if (NeedDepCheck)
- CheckDeps.insert(Access);
}
}
@@ -4389,6 +4542,11 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (!AIsWrite && !BIsWrite)
return false;
+ // We cannot check pointers in different address spaces.
+ if (APtr->getType()->getPointerAddressSpace() !=
+ BPtr->getType()->getPointerAddressSpace())
+ return true;
+
const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
@@ -4471,7 +4629,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;
- unsigned ForcedUnroll = VectorizationUnroll ? VectorizationUnroll : 1;
+ unsigned ForcedUnroll = VectorizationInterleave ? VectorizationInterleave : 1;
// The distance must be bigger than the size needed for a vectorized version
// of the operation and the size of the vectorized operation must not be
@@ -4619,7 +4777,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
AccessAnalysis::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(DL, DependentAccesses);
+ AccessAnalysis Accesses(DL, AA, DependentAccesses);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -4643,9 +4801,17 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
- if (Seen.insert(Ptr)) {
+ if (Seen.insert(Ptr).second) {
++NumReadWrites;
- Accesses.addStore(Ptr);
+
+ AliasAnalysis::Location Loc = AA->getLocation(ST);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(ST->getParent()))
+ Loc.AATags.TBAA = nullptr;
+
+ Accesses.addStore(Loc);
}
}
@@ -4668,11 +4834,20 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
+ if (Seen.insert(Ptr).second ||
+ !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
++NumReads;
IsReadOnlyPtr = true;
}
- Accesses.addLoad(Ptr, IsReadOnlyPtr);
+
+ AliasAnalysis::Location Loc = AA->getLocation(LD);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(LD->getParent()))
+ Loc.AATags.TBAA = nullptr;
+
+ Accesses.addLoad(Loc, IsReadOnlyPtr);
}
// If we write (or read-write) to a single destination and there are no
@@ -4773,7 +4948,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
static bool hasMultipleUsesOf(Instruction *I,
- SmallPtrSet<Instruction *, 8> &Insts) {
+ SmallPtrSetImpl<Instruction *> &Insts) {
unsigned NumUses = 0;
for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
if (Insts.count(dyn_cast<Instruction>(*Use)))
@@ -4785,7 +4960,7 @@ static bool hasMultipleUsesOf(Instruction *I,
return false;
}
-static bool areAllUsesIn(Instruction *I, SmallPtrSet<Instruction *, 8> &Set) {
+static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set) {
for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
if (!Set.count(dyn_cast<Instruction>(*Use)))
return false;
@@ -4923,7 +5098,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// value must only be used once, except by phi nodes and min/max
// reductions which are represented as a cmp followed by a select.
ReductionInstDesc IgnoredVal(false, nullptr);
- if (VisitedInsts.insert(UI)) {
+ if (VisitedInsts.insert(UI).second) {
if (isa<PHINode>(UI))
PHIs.push_back(UI);
else
@@ -5025,7 +5200,7 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
ReductionKind Kind,
ReductionInstDesc &Prev) {
bool FP = I->getType()->isFloatingPointTy();
- bool FastMath = (FP && I->isCommutative() && I->isAssociative());
+ bool FastMath = FP && I->hasUnsafeAlgebra();
switch (I->getOpcode()) {
default:
return ReductionInstDesc(false, I);
@@ -5047,6 +5222,7 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
return ReductionInstDesc(Kind == RK_IntegerXor, I);
case Instruction::FMul:
return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
+ case Instruction::FSub:
case Instruction::FAdd:
return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
case Instruction::FCmp:
@@ -5090,7 +5266,13 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
return IK_NoInduction;
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
- uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
+ Type *PointerElementType = PhiTy->getPointerElementType();
+ // The pointer stride cannot be determined if the pointer element type is not
+ // sized.
+ if (!PointerElementType->isSized())
+ return IK_NoInduction;
+
+ uint64_t Size = DL->getTypeAllocSize(PointerElementType);
if (C->getValue()->equalsInt(Size))
return IK_PtrInduction;
else if (C->getValue()->equalsInt(0 - Size))
@@ -5117,7 +5299,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
}
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
- SmallPtrSet<Value *, 8>& SafePtrs) {
+ SmallPtrSetImpl<Value *> &SafePtrs) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// We might be able to hoist the load.
if (it->mayReadFromMemory()) {
@@ -5162,23 +5344,23 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
}
LoopVectorizationCostModel::VectorizationFactor
-LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
- unsigned UserVF,
- bool ForceVectorization) {
+LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
// Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+ emitAnalysis(Report() << "runtime pointer checks needed. Enable vectorization of this loop with '#pragma clang loop vectorize(enable)' when compiling with -Os");
DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
return Factor;
}
if (!EnableCondStoresVectorization && Legal->NumPredStores) {
+ emitAnalysis(Report() << "store that is conditionally executed prevents vectorization");
DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
return Factor;
}
// Find the trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+ unsigned TC = SE->getSmallConstantTripCount(TheLoop);
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
unsigned WidestType = getWidestType();
@@ -5207,6 +5389,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
if (OptForSize) {
// If we are unable to calculate the trip count then don't try to vectorize.
if (TC < 2) {
+ emitAnalysis(Report() << "unable to calculate the loop count due to complex control flow");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor;
}
@@ -5220,11 +5403,16 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
// If the trip count that we found modulo the vectorization factor is not
// zero then we require a tail.
if (VF < 2) {
+ emitAnalysis(Report() << "cannot optimize for size and vectorize at the "
+ "same time. Enable vectorization of this loop "
+ "with '#pragma clang loop vectorize(enable)' "
+ "when compiling with -Os");
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
return Factor;
}
}
+ int UserVF = Hints->getWidth();
if (UserVF != 0) {
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
@@ -5240,6 +5428,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
unsigned Width = 1;
DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
+ bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
// Ignore scalar width, because the user explicitly wants vectorization.
if (ForceVectorization && VF > 1) {
Width = 2;
@@ -5280,6 +5469,10 @@ unsigned LoopVectorizationCostModel::getWidestType() {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
Type *T = it->getType();
+ // Ignore ephemeral values.
+ if (EphValues.count(it))
+ continue;
+
// Only examine Loads, Stores and PHINodes.
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
continue;
@@ -5309,29 +5502,29 @@ unsigned LoopVectorizationCostModel::getWidestType() {
unsigned
LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
- unsigned UserUF,
unsigned VF,
unsigned LoopCost) {
// -- The unroll heuristics --
// We unroll the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
- // at this level. For example frontend pressure (on decode or fetch) due to
+ // at this level. For example, frontend pressure (on decode or fetch) due to
// code size, or the number and capabilities of the execution ports.
//
// We use the following heuristics to select the unroll factor:
- // 1. If the code has reductions the we unroll in order to break the cross
+ // 1. If the code has reductions, then we unroll in order to break the cross
// iteration dependency.
- // 2. If the loop is really small then we unroll in order to reduce the loop
+ // 2. If the loop is really small, then we unroll in order to reduce the loop
// overhead.
// 3. We don't unroll if we think that we will spill registers to memory due
// to the increased register pressure.
// Use the user preference, unless 'auto' is selected.
+ int UserUF = Hints->getInterleave();
if (UserUF != 0)
return UserUF;
- // When we optimize for size we don't unroll.
+ // When we optimize for size, we don't unroll.
if (OptForSize)
return 1;
@@ -5340,8 +5533,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
return 1;
// Do not unroll loops with a relatively small trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop,
- TheLoop->getLoopLatch());
+ unsigned TC = SE->getSmallConstantTripCount(TheLoop);
if (TC > 1 && TC < TinyTripCountUnrollThreshold)
return 1;
@@ -5380,15 +5572,15 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
std::max(1U, (R.MaxLocalUsers - 1)));
// Clamp the unroll factor ranges to reasonable factors.
- unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
+ unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor();
// Check if the user has overridden the unroll max.
if (VF == 1) {
- if (ForceTargetMaxScalarUnrollFactor.getNumOccurrences() > 0)
- MaxUnrollSize = ForceTargetMaxScalarUnrollFactor;
+ if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0)
+ MaxInterleaveSize = ForceTargetMaxScalarInterleaveFactor;
} else {
- if (ForceTargetMaxVectorUnrollFactor.getNumOccurrences() > 0)
- MaxUnrollSize = ForceTargetMaxVectorUnrollFactor;
+ if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0)
+ MaxInterleaveSize = ForceTargetMaxVectorInterleaveFactor;
}
// If we did not calculate the cost for VF (because the user selected the VF)
@@ -5398,8 +5590,8 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// Clamp the calculated UF to be between the 1 and the max unroll factor
// that the target allows.
- if (UF > MaxUnrollSize)
- UF = MaxUnrollSize;
+ if (UF > MaxInterleaveSize)
+ UF = MaxInterleaveSize;
else if (UF < 1)
UF = 1;
@@ -5430,6 +5622,18 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
unsigned StoresUF = UF / (Legal->NumStores ? Legal->NumStores : 1);
unsigned LoadsUF = UF / (Legal->NumLoads ? Legal->NumLoads : 1);
+ // If we have a scalar reduction (vector reductions are already dealt with
+ // by this point), we can increase the critical path length if the loop
+ // we're unrolling is inside another loop. Limit, by default to 2, so the
+ // critical path only gets increased by one reduction operation.
+ if (Legal->getReductionVars()->size() &&
+ TheLoop->getLoopDepth() > 1) {
+ unsigned F = static_cast<unsigned>(MaxNestedScalarReductionUF);
+ SmallUF = std::min(SmallUF, F);
+ StoresUF = std::min(StoresUF, F);
+ LoadsUF = std::min(LoadsUF, F);
+ }
+
if (EnableLoadStoreRuntimeUnroll && std::max(StoresUF, LoadsUF) > SmallUF) {
DEBUG(dbgs() << "LV: Unrolling to saturate store or load ports.\n");
return std::max(StoresUF, LoadsUF);
@@ -5531,6 +5735,10 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
// Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue;
+ // Ignore ephemeral values.
+ if (EphValues.count(I))
+ continue;
+
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
for (unsigned int j=0, e = List.size(); j < e; ++j)
@@ -5571,6 +5779,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
if (isa<DbgInfoIntrinsic>(it))
continue;
+ // Ignore ephemeral values.
+ if (EphValues.count(it))
+ continue;
+
unsigned C = getInstructionCost(it, VF);
// Check if we should override the cost.
@@ -5704,18 +5916,31 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueProperties Op1VP =
+ TargetTransformInfo::OP_None;
+ TargetTransformInfo::OperandValueProperties Op2VP =
+ TargetTransformInfo::OP_None;
Value *Op2 = I->getOperand(1);
// Check for a splat of a constant or for a non uniform vector of constants.
- if (isa<ConstantInt>(Op2))
+ if (isa<ConstantInt>(Op2)) {
+ ConstantInt *CInt = cast<ConstantInt>(Op2);
+ if (CInt && CInt->getValue().isPowerOf2())
+ Op2VP = TargetTransformInfo::OP_PowerOf2;
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
- else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
+ } else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
- if (cast<Constant>(Op2)->getSplatValue() != nullptr)
+ Constant *SplatValue = cast<Constant>(Op2)->getSplatValue();
+ if (SplatValue) {
+ ConstantInt *CInt = dyn_cast<ConstantInt>(SplatValue);
+ if (CInt && CInt->getValue().isPowerOf2())
+ Op2VP = TargetTransformInfo::OP_PowerOf2;
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ }
}
- return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
+ return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
+ Op1VP, Op2VP);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -5857,6 +6082,8 @@ char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 53a43d9..44bfea1 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -19,7 +19,10 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -42,12 +45,15 @@
#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
+#include <memory>
using namespace llvm;
#define SV_NAME "slp-vectorizer"
#define DEBUG_TYPE "SLP"
+STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
+
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
@@ -68,53 +74,6 @@ static const unsigned MinVecRegSize = 128;
static const unsigned RecursionMaxDepth = 12;
-/// A helper class for numbering instructions in multiple blocks.
-/// Numbers start at zero for each basic block.
-struct BlockNumbering {
-
- BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
-
- void numberInstructions() {
- unsigned Loc = 0;
- InstrIdx.clear();
- InstrVec.clear();
- // Number the instructions in the block.
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- InstrIdx[it] = Loc++;
- InstrVec.push_back(it);
- assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
- }
- Valid = true;
- }
-
- int getIndex(Instruction *I) {
- assert(I->getParent() == BB && "Invalid instruction");
- if (!Valid)
- numberInstructions();
- assert(InstrIdx.count(I) && "Unknown instruction");
- return InstrIdx[I];
- }
-
- Instruction *getInstruction(unsigned loc) {
- if (!Valid)
- numberInstructions();
- assert(InstrVec.size() > loc && "Invalid Index");
- return InstrVec[loc];
- }
-
- void forget() { Valid = false; }
-
-private:
- /// The block we are numbering.
- BasicBlock *BB;
- /// Is the block numbered.
- bool Valid;
- /// Maps instructions to numbers and back.
- SmallDenseMap<Instruction *, int> InstrIdx;
- /// Maps integers to Instructions.
- SmallVector<Instruction *, 32> InstrVec;
-};
-
/// \returns the parent basic block if all of the instructions in \p VL
/// are in the same block or null otherwise.
static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
@@ -209,6 +168,23 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
return Opcode;
}
+/// Get the intersection (logical and) of all of the potential IR flags
+/// of each scalar operation (VL) that will be converted into a vector (I).
+/// Flag set: NSW, NUW, exact, and all of fast-math.
+static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
+ if (auto *VecOp = dyn_cast<BinaryOperator>(I)) {
+ if (auto *Intersection = dyn_cast<BinaryOperator>(VL[0])) {
+ // Intersection is initialized to the 0th scalar,
+ // so start counting from index '1'.
+ for (int i = 1, e = VL.size(); i < e; ++i) {
+ if (auto *Scalar = dyn_cast<BinaryOperator>(VL[i]))
+ Intersection->andIRFlags(Scalar);
+ }
+ VecOp->copyIRFlags(Intersection);
+ }
+ }
+}
+
/// \returns \p I after propagating metadata from \p VL.
static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
Instruction *I0 = cast<Instruction>(VL[0]);
@@ -230,6 +206,10 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
case LLVMContext::MD_tbaa:
MD = MDNode::getMostGenericTBAA(MD, IMD);
break;
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ MD = MDNode::intersect(MD, IMD);
+ break;
case LLVMContext::MD_fpmath:
MD = MDNode::getMostGenericFPMath(MD, IMD);
break;
@@ -381,6 +361,33 @@ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
}
}
+/// \returns True if in-tree use also needs extract. This refers to
+/// possible scalar operand in vectorized instruction.
+static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
+ TargetLibraryInfo *TLI) {
+
+ unsigned Opcode = UserInst->getOpcode();
+ switch (Opcode) {
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(UserInst);
+ return (LI->getPointerOperand() == Scalar);
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(UserInst);
+ return (SI->getPointerOperand() == Scalar);
+ }
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(UserInst);
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ if (hasVectorInstrinsicScalarOpd(ID, 1)) {
+ return (CI->getArgOperand(1) == Scalar);
+ }
+ }
+ default:
+ return false;
+ }
+}
+
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
public:
@@ -391,14 +398,21 @@ public:
BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
- LoopInfo *Li, DominatorTree *Dt)
- : F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
- Builder(Se->getContext()) {}
+ LoopInfo *Li, DominatorTree *Dt, AssumptionTracker *AT)
+ : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0),
+ F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+ Builder(Se->getContext()) {
+ CodeMetrics::collectEphemeralValues(F, AT, EphValues);
+ }
/// \brief Vectorize the tree that starts with the elements in \p VL.
/// Returns the vectorized root.
Value *vectorizeTree();
+ /// \returns the cost incurred by unwanted spills and fills, caused by
+ /// holding live values over call sites.
+ int getSpillCost();
+
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
int getTreeCost();
@@ -414,7 +428,12 @@ public:
ScalarToTreeEntry.clear();
MustGather.clear();
ExternalUses.clear();
- MemBarrierIgnoreList.clear();
+ NumLoadsWantToKeepOrder = 0;
+ NumLoadsWantToChangeOrder = 0;
+ for (auto &Iter : BlocksSchedules) {
+ BlockScheduling *BS = Iter.second.get();
+ BS->clear();
+ }
}
/// \returns true if the memory operations A and B are consecutive.
@@ -423,6 +442,11 @@ public:
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
+ /// \returns true if it is benefitial to reverse the vector order.
+ bool shouldReorder() const {
+ return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder;
+ }
+
private:
struct TreeEntry;
@@ -459,20 +483,6 @@ private:
/// roots. This method calculates the cost of extracting the values.
int getGatherCost(ArrayRef<Value *> VL);
- /// \returns the AA location that is being access by the instruction.
- AliasAnalysis::Location getLocation(Instruction *I);
-
- /// \brief Checks if it is possible to sink an instruction from
- /// \p Src to \p Dst.
- /// \returns the pointer to the barrier instruction if we can't sink.
- Value *getSinkBarrier(Instruction *Src, Instruction *Dst);
-
- /// \returns the index of the last instruction in the BB from \p VL.
- int getLastIndex(ArrayRef<Value *> VL);
-
- /// \returns the Instruction in the bundle \p VL.
- Instruction *getLastInstruction(ArrayRef<Value *> VL);
-
/// \brief Set the Builder insert point to one after the last instruction in
/// the bundle
void setInsertPointAfterBundle(ArrayRef<Value *> VL);
@@ -485,7 +495,7 @@ private:
bool isFullyVectorizableTinyTree();
struct TreeEntry {
- TreeEntry() : Scalars(), VectorizedValue(nullptr), LastScalarIndex(0),
+ TreeEntry() : Scalars(), VectorizedValue(nullptr),
NeedToGather(0) {}
/// \returns true if the scalars in VL are equal to this entry.
@@ -500,9 +510,6 @@ private:
/// The Scalars are vectorized into this value. It is initialized to Null.
Value *VectorizedValue;
- /// The index in the basic block of the last scalar.
- int LastScalarIndex;
-
/// Do we need to gather this sequence ?
bool NeedToGather;
};
@@ -515,18 +522,16 @@ private:
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->NeedToGather = !Vectorized;
if (Vectorized) {
- Last->LastScalarIndex = getLastIndex(VL);
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!ScalarToTreeEntry.count(VL[i]) && "Scalar already in tree!");
ScalarToTreeEntry[VL[i]] = idx;
}
} else {
- Last->LastScalarIndex = 0;
MustGather.insert(VL.begin(), VL.end());
}
return Last;
}
-
+
/// -- Vectorization State --
/// Holds all of the tree entries.
std::vector<TreeEntry> VectorizableTree;
@@ -554,28 +559,319 @@ private:
/// This list holds pairs of (Internal Scalar : External User).
UserList ExternalUses;
- /// A list of instructions to ignore while sinking
- /// memory instructions. This map must be reset between runs of getCost.
- ValueSet MemBarrierIgnoreList;
+ /// Values used only by @llvm.assume calls.
+ SmallPtrSet<const Value *, 32> EphValues;
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherSeq;
/// A list of blocks that we are going to CSE.
SetVector<BasicBlock *> CSEBlocks;
- /// Numbers instructions in different blocks.
- DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
+ /// Contains all scheduling relevant data for an instruction.
+ /// A ScheduleData either represents a single instruction or a member of an
+ /// instruction bundle (= a group of instructions which is combined into a
+ /// vector instruction).
+ struct ScheduleData {
+
+ // The initial value for the dependency counters. It means that the
+ // dependencies are not calculated yet.
+ enum { InvalidDeps = -1 };
+
+ ScheduleData()
+ : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr),
+ NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0),
+ Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps),
+ UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false) {}
+
+ void init(int BlockSchedulingRegionID) {
+ FirstInBundle = this;
+ NextInBundle = nullptr;
+ NextLoadStore = nullptr;
+ IsScheduled = false;
+ SchedulingRegionID = BlockSchedulingRegionID;
+ UnscheduledDepsInBundle = UnscheduledDeps;
+ clearDependencies();
+ }
+
+ /// Returns true if the dependency information has been calculated.
+ bool hasValidDependencies() const { return Dependencies != InvalidDeps; }
+
+ /// Returns true for single instructions and for bundle representatives
+ /// (= the head of a bundle).
+ bool isSchedulingEntity() const { return FirstInBundle == this; }
+
+ /// Returns true if it represents an instruction bundle and not only a
+ /// single instruction.
+ bool isPartOfBundle() const {
+ return NextInBundle != nullptr || FirstInBundle != this;
+ }
+
+ /// Returns true if it is ready for scheduling, i.e. it has no more
+ /// unscheduled depending instructions/bundles.
+ bool isReady() const {
+ assert(isSchedulingEntity() &&
+ "can't consider non-scheduling entity for ready list");
+ return UnscheduledDepsInBundle == 0 && !IsScheduled;
+ }
+
+ /// Modifies the number of unscheduled dependencies, also updating it for
+ /// the whole bundle.
+ int incrementUnscheduledDeps(int Incr) {
+ UnscheduledDeps += Incr;
+ return FirstInBundle->UnscheduledDepsInBundle += Incr;
+ }
+
+ /// Sets the number of unscheduled dependencies to the number of
+ /// dependencies.
+ void resetUnscheduledDeps() {
+ incrementUnscheduledDeps(Dependencies - UnscheduledDeps);
+ }
+
+ /// Clears all dependency information.
+ void clearDependencies() {
+ Dependencies = InvalidDeps;
+ resetUnscheduledDeps();
+ MemoryDependencies.clear();
+ }
+
+ void dump(raw_ostream &os) const {
+ if (!isSchedulingEntity()) {
+ os << "/ " << *Inst;
+ } else if (NextInBundle) {
+ os << '[' << *Inst;
+ ScheduleData *SD = NextInBundle;
+ while (SD) {
+ os << ';' << *SD->Inst;
+ SD = SD->NextInBundle;
+ }
+ os << ']';
+ } else {
+ os << *Inst;
+ }
+ }
- /// \brief Get the corresponding instruction numbering list for a given
- /// BasicBlock. The list is allocated lazily.
- BlockNumbering &getBlockNumbering(BasicBlock *BB) {
- auto I = BlocksNumbers.insert(std::make_pair(BB, BlockNumbering(BB)));
- return I.first->second;
- }
+ Instruction *Inst;
+
+ /// Points to the head in an instruction bundle (and always to this for
+ /// single instructions).
+ ScheduleData *FirstInBundle;
+
+ /// Single linked list of all instructions in a bundle. Null if it is a
+ /// single instruction.
+ ScheduleData *NextInBundle;
+
+ /// Single linked list of all memory instructions (e.g. load, store, call)
+ /// in the block - until the end of the scheduling region.
+ ScheduleData *NextLoadStore;
+
+ /// The dependent memory instructions.
+ /// This list is derived on demand in calculateDependencies().
+ SmallVector<ScheduleData *, 4> MemoryDependencies;
+
+ /// This ScheduleData is in the current scheduling region if this matches
+ /// the current SchedulingRegionID of BlockScheduling.
+ int SchedulingRegionID;
+
+ /// Used for getting a "good" final ordering of instructions.
+ int SchedulingPriority;
+
+ /// The number of dependencies. Constitutes of the number of users of the
+ /// instruction plus the number of dependent memory instructions (if any).
+ /// This value is calculated on demand.
+ /// If InvalidDeps, the number of dependencies is not calculated yet.
+ ///
+ int Dependencies;
+
+ /// The number of dependencies minus the number of dependencies of scheduled
+ /// instructions. As soon as this is zero, the instruction/bundle gets ready
+ /// for scheduling.
+ /// Note that this is negative as long as Dependencies is not calculated.
+ int UnscheduledDeps;
+
+ /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
+ /// single instructions.
+ int UnscheduledDepsInBundle;
+
+ /// True if this instruction is scheduled (or considered as scheduled in the
+ /// dry-run).
+ bool IsScheduled;
+ };
+
+#ifndef NDEBUG
+ friend raw_ostream &operator<<(raw_ostream &os,
+ const BoUpSLP::ScheduleData &SD);
+#endif
+
+ /// Contains all scheduling data for a basic block.
+ ///
+ struct BlockScheduling {
+
+ BlockScheduling(BasicBlock *BB)
+ : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
+ ScheduleStart(nullptr), ScheduleEnd(nullptr),
+ FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
+ // Make sure that the initial SchedulingRegionID is greater than the
+ // initial SchedulingRegionID in ScheduleData (which is 0).
+ SchedulingRegionID(1) {}
+
+ void clear() {
+ ReadyInsts.clear();
+ ScheduleStart = nullptr;
+ ScheduleEnd = nullptr;
+ FirstLoadStoreInRegion = nullptr;
+ LastLoadStoreInRegion = nullptr;
+
+ // Make a new scheduling region, i.e. all existing ScheduleData is not
+ // in the new region yet.
+ ++SchedulingRegionID;
+ }
+
+ ScheduleData *getScheduleData(Value *V) {
+ ScheduleData *SD = ScheduleDataMap[V];
+ if (SD && SD->SchedulingRegionID == SchedulingRegionID)
+ return SD;
+ return nullptr;
+ }
+
+ bool isInSchedulingRegion(ScheduleData *SD) {
+ return SD->SchedulingRegionID == SchedulingRegionID;
+ }
+
+ /// Marks an instruction as scheduled and puts all dependent ready
+ /// instructions into the ready-list.
+ template <typename ReadyListType>
+ void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
+ SD->IsScheduled = true;
+ DEBUG(dbgs() << "SLP: schedule " << *SD << "\n");
+
+ ScheduleData *BundleMember = SD;
+ while (BundleMember) {
+ // Handle the def-use chain dependencies.
+ for (Use &U : BundleMember->Inst->operands()) {
+ ScheduleData *OpDef = getScheduleData(U.get());
+ if (OpDef && OpDef->hasValidDependencies() &&
+ OpDef->incrementUnscheduledDeps(-1) == 0) {
+ // There are no more unscheduled dependencies after decrementing,
+ // so we can put the dependent instruction into the ready list.
+ ScheduleData *DepBundle = OpDef->FirstInBundle;
+ assert(!DepBundle->IsScheduled &&
+ "already scheduled bundle gets ready");
+ ReadyList.insert(DepBundle);
+ DEBUG(dbgs() << "SLP: gets ready (def): " << *DepBundle << "\n");
+ }
+ }
+ // Handle the memory dependencies.
+ for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
+ if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
+ // There are no more unscheduled dependencies after decrementing,
+ // so we can put the dependent instruction into the ready list.
+ ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
+ assert(!DepBundle->IsScheduled &&
+ "already scheduled bundle gets ready");
+ ReadyList.insert(DepBundle);
+ DEBUG(dbgs() << "SLP: gets ready (mem): " << *DepBundle << "\n");
+ }
+ }
+ BundleMember = BundleMember->NextInBundle;
+ }
+ }
+
+ /// Put all instructions into the ReadyList which are ready for scheduling.
+ template <typename ReadyListType>
+ void initialFillReadyList(ReadyListType &ReadyList) {
+ for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
+ ScheduleData *SD = getScheduleData(I);
+ if (SD->isSchedulingEntity() && SD->isReady()) {
+ ReadyList.insert(SD);
+ DEBUG(dbgs() << "SLP: initially in ready list: " << *I << "\n");
+ }
+ }
+ }
+
+ /// Checks if a bundle of instructions can be scheduled, i.e. has no
+ /// cyclic dependencies. This is only a dry-run, no instructions are
+ /// actually moved at this stage.
+ bool tryScheduleBundle(ArrayRef<Value *> VL, AliasAnalysis *AA);
+
+ /// Un-bundles a group of instructions.
+ void cancelScheduling(ArrayRef<Value *> VL);
+
+ /// Extends the scheduling region so that V is inside the region.
+ void extendSchedulingRegion(Value *V);
+
+ /// Initialize the ScheduleData structures for new instructions in the
+ /// scheduling region.
+ void initScheduleData(Instruction *FromI, Instruction *ToI,
+ ScheduleData *PrevLoadStore,
+ ScheduleData *NextLoadStore);
+
+ /// Updates the dependency information of a bundle and of all instructions/
+ /// bundles which depend on the original bundle.
+ void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
+ AliasAnalysis *AA);
+
+ /// Sets all instruction in the scheduling region to un-scheduled.
+ void resetSchedule();
+
+ BasicBlock *BB;
+
+ /// Simple memory allocation for ScheduleData.
+ std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
+
+ /// The size of a ScheduleData array in ScheduleDataChunks.
+ int ChunkSize;
+
+ /// The allocator position in the current chunk, which is the last entry
+ /// of ScheduleDataChunks.
+ int ChunkPos;
+
+ /// Attaches ScheduleData to Instruction.
+ /// Note that the mapping survives during all vectorization iterations, i.e.
+ /// ScheduleData structures are recycled.
+ DenseMap<Value *, ScheduleData *> ScheduleDataMap;
+
+ struct ReadyList : SmallVector<ScheduleData *, 8> {
+ void insert(ScheduleData *SD) { push_back(SD); }
+ };
+
+ /// The ready-list for scheduling (only used for the dry-run).
+ ReadyList ReadyInsts;
+
+ /// The first instruction of the scheduling region.
+ Instruction *ScheduleStart;
+
+ /// The first instruction _after_ the scheduling region.
+ Instruction *ScheduleEnd;
+
+ /// The first memory accessing instruction in the scheduling region
+ /// (can be null).
+ ScheduleData *FirstLoadStoreInRegion;
+
+ /// The last memory accessing instruction in the scheduling region
+ /// (can be null).
+ ScheduleData *LastLoadStoreInRegion;
+
+ /// The ID of the scheduling region. For a new vectorization iteration this
+ /// is incremented which "removes" all ScheduleData from the region.
+ int SchedulingRegionID;
+ };
+
+ /// Attaches the BlockScheduling structures to basic blocks.
+ DenseMap<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules;
+
+ /// Performs the "real" scheduling. Done before vectorization is actually
+ /// performed in a basic block.
+ void scheduleBlock(BlockScheduling *BS);
/// List of users to ignore during scheduling and that don't need extracting.
ArrayRef<Value *> UserIgnoreList;
+ // Number of load-bundles, which contain consecutive loads.
+ int NumLoadsWantToKeepOrder;
+
+ // Number of load-bundles of size 2, which are consecutive loads if reversed.
+ int NumLoadsWantToChangeOrder;
+
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
@@ -589,6 +885,13 @@ private:
IRBuilder<> Builder;
};
+#ifndef NDEBUG
+raw_ostream &operator<<(raw_ostream &os, const BoUpSLP::ScheduleData &SD) {
+ SD.dump(os);
+ return os;
+}
+#endif
+
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst) {
deleteTree();
@@ -612,18 +915,27 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
for (User *U : Scalar->users()) {
DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
- // Skip in-tree scalars that become vectors.
- if (ScalarToTreeEntry.count(U)) {
- DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
- *U << ".\n");
- int Idx = ScalarToTreeEntry[U]; (void) Idx;
- assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
- continue;
- }
Instruction *UserInst = dyn_cast<Instruction>(U);
if (!UserInst)
continue;
+ // Skip in-tree scalars that become vectors
+ if (ScalarToTreeEntry.count(U)) {
+ int Idx = ScalarToTreeEntry[U];
+ TreeEntry *UseEntry = &VectorizableTree[Idx];
+ Value *UseScalar = UseEntry->Scalars[0];
+ // Some in-tree scalars will remain as scalar in vectorized
+ // instructions. If that is the case, the one in Lane 0 will
+ // be used.
+ if (UseScalar != U ||
+ !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
+ DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
+ << ".\n");
+ assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
+ continue;
+ }
+ }
+
// Ignore users in the user ignore list.
if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
UserIgnoreList.end())
@@ -683,6 +995,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// We now know that this is a vector of instructions of the same type from
// the same block.
+ // Don't vectorize ephemeral values.
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ if (EphValues.count(VL[i])) {
+ DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
+ ") is ephemeral.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
// Check if this is a duplicate of another entry.
if (ScalarToTreeEntry.count(VL[0])) {
int Idx = ScalarToTreeEntry[VL[0]];
@@ -722,69 +1044,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Check that all of the users of the scalars that we want to vectorize are
// schedulable.
Instruction *VL0 = cast<Instruction>(VL[0]);
- int MyLastIndex = getLastIndex(VL);
BasicBlock *BB = cast<Instruction>(VL0)->getParent();
- for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- Instruction *Scalar = cast<Instruction>(VL[i]);
- DEBUG(dbgs() << "SLP: Checking users of " << *Scalar << ". \n");
- for (User *U : Scalar->users()) {
- DEBUG(dbgs() << "SLP: \tUser " << *U << ". \n");
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI) {
- DEBUG(dbgs() << "SLP: Gathering due unknown user. \n");
- newTreeEntry(VL, false);
- return;
- }
-
- // We don't care if the user is in a different basic block.
- BasicBlock *UserBlock = UI->getParent();
- if (UserBlock != BB) {
- DEBUG(dbgs() << "SLP: User from a different basic block "
- << *UI << ". \n");
- continue;
- }
-
- // If this is a PHINode within this basic block then we can place the
- // extract wherever we want.
- if (isa<PHINode>(*UI)) {
- DEBUG(dbgs() << "SLP: \tWe can schedule PHIs:" << *UI << ". \n");
- continue;
- }
-
- // Check if this is a safe in-tree user.
- if (ScalarToTreeEntry.count(UI)) {
- int Idx = ScalarToTreeEntry[UI];
- int VecLocation = VectorizableTree[Idx].LastScalarIndex;
- if (VecLocation <= MyLastIndex) {
- DEBUG(dbgs() << "SLP: Gathering due to unschedulable vector. \n");
- newTreeEntry(VL, false);
- return;
- }
- DEBUG(dbgs() << "SLP: In-tree user (" << *UI << ") at #" <<
- VecLocation << " vector value (" << *Scalar << ") at #"
- << MyLastIndex << ".\n");
- continue;
- }
-
- // Ignore users in the user ignore list.
- if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) !=
- UserIgnoreList.end())
- continue;
-
- // Make sure that we can schedule this unknown user.
- BlockNumbering &BN = getBlockNumbering(BB);
- int UserIndex = BN.getIndex(UI);
- if (UserIndex < MyLastIndex) {
-
- DEBUG(dbgs() << "SLP: Can't schedule extractelement for "
- << *UI << ". \n");
- newTreeEntry(VL, false);
- return;
- }
- }
+ if (!DT->isReachableFromEntry(BB)) {
+ // Don't go into unreachable blocks. They may contain instructions with
+ // dependency cycles which confuse the final scheduling.
+ DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
+ newTreeEntry(VL, false);
+ return;
}
-
+
// Check that every instructions appears once in this bundle.
for (unsigned i = 0, e = VL.size(); i < e; ++i)
for (unsigned j = i+1; j < e; ++j)
@@ -794,38 +1063,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
- // Check that instructions in this bundle don't reference other instructions.
- // The runtime of this check is O(N * N-1 * uses(N)) and a typical N is 4.
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- for (User *U : VL[i]->users()) {
- for (unsigned j = 0; j < e; ++j) {
- if (i != j && U == VL[j]) {
- DEBUG(dbgs() << "SLP: Intra-bundle dependencies!" << *U << ". \n");
- newTreeEntry(VL, false);
- return;
- }
- }
- }
+ auto &BSRef = BlocksSchedules[BB];
+ if (!BSRef) {
+ BSRef = llvm::make_unique<BlockScheduling>(BB);
}
+ BlockScheduling &BS = *BSRef.get();
- DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
-
- // Check if it is safe to sink the loads or the stores.
- if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
- Instruction *Last = getLastInstruction(VL);
-
- for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- if (VL[i] == Last)
- continue;
- Value *Barrier = getSinkBarrier(cast<Instruction>(VL[i]), Last);
- if (Barrier) {
- DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << *Last
- << "\n because of " << *Barrier << ". Gathering.\n");
- newTreeEntry(VL, false);
- return;
- }
- }
+ if (!BS.tryScheduleBundle(VL, AA)) {
+ DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
+ BS.cancelScheduling(VL);
+ newTreeEntry(VL, false);
+ return;
}
+ DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
switch (Opcode) {
case Instruction::PHI: {
@@ -838,6 +1088,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
if (Term) {
DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
@@ -861,6 +1112,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
bool Reuse = CanReuseExtract(VL);
if (Reuse) {
DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
+ } else {
+ BS.cancelScheduling(VL);
}
newTreeEntry(VL, Reuse);
return;
@@ -869,12 +1122,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Check if the loads are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
LoadInst *L = cast<LoadInst>(VL[i]);
- if (!L->isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
+ if (!L->isSimple()) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
- DEBUG(dbgs() << "SLP: Need to swizzle loads.\n");
+ DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
+ return;
+ }
+ if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0])) {
+ ++NumLoadsWantToChangeOrder;
+ }
+ BS.cancelScheduling(VL);
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
return;
}
}
+ ++NumLoadsWantToKeepOrder;
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of loads.\n");
return;
@@ -895,6 +1159,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned i = 0; i < VL.size(); ++i) {
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
return;
@@ -922,6 +1187,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
CmpInst *Cmp = cast<CmpInst>(VL[i]);
if (Cmp->getPredicate() != P0 ||
Cmp->getOperand(0)->getType() != ComparedTy) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
return;
@@ -968,20 +1234,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
- BasicBlock *LeftBB = getSameBlock(Left);
- BasicBlock *RightBB = getSameBlock(Right);
- // If we have common uses on separate paths in the tree make sure we
- // process the one with greater common depth first.
- // We can use block numbering to determine the subtree traversal as
- // earler user has to come in between the common use and the later user.
- if (LeftBB && RightBB && LeftBB == RightBB &&
- getLastIndex(Right) > getLastIndex(Left)) {
- buildTree_rec(Right, Depth + 1);
- buildTree_rec(Left, Depth + 1);
- } else {
- buildTree_rec(Left, Depth + 1);
- buildTree_rec(Right, Depth + 1);
- }
+ buildTree_rec(Left, Depth + 1);
+ buildTree_rec(Right, Depth + 1);
return;
}
@@ -1000,6 +1254,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned j = 0; j < VL.size(); ++j) {
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
@@ -1012,6 +1267,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
if (Ty0 != CurTy) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
@@ -1023,6 +1279,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (!isa<ConstantInt>(Op)) {
DEBUG(
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
return;
}
@@ -1044,6 +1301,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
@@ -1056,8 +1314,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned j = 0; j < VL.size(); ++j)
Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
- // We can ignore these values because we are sinking them down.
- MemBarrierIgnoreList.insert(VL.begin(), VL.end());
buildTree_rec(Operands, Depth + 1);
return;
}
@@ -1068,6 +1324,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// represented by an intrinsic call
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
if (!isTriviallyVectorizable(ID)) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
@@ -1080,6 +1337,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
if (!CI2 || CI2->getCalledFunction() != Int ||
getIntrinsicIDForCall(CI2, TLI) != ID) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
@@ -1090,6 +1348,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
Value *A1J = CI2->getArgOperand(1);
if (A1I != A1J) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument "<< A1I<<"!=" << A1J
@@ -1115,6 +1374,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// If this is not an alternate sequence of opcode like add-sub
// then do not vectorize this instruction.
if (!isAltShuffle) {
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
@@ -1132,6 +1392,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
default:
+ BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
@@ -1234,6 +1495,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
+ TargetTransformInfo::OperandValueProperties Op1VP =
+ TargetTransformInfo::OP_None;
+ TargetTransformInfo::OperandValueProperties Op2VP =
+ TargetTransformInfo::OP_None;
// If all operands are exactly the same ConstantInt then set the
// operand kind to OK_UniformConstantValue.
@@ -1255,11 +1520,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
CInt != cast<ConstantInt>(I->getOperand(1)))
Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
}
+ // FIXME: Currently cost of model modification for division by
+ // power of 2 is handled only for X86. Add support for other targets.
+ if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt &&
+ CInt->getValue().isPowerOf2())
+ Op2VP = TargetTransformInfo::OP_PowerOf2;
- ScalarCost =
- VecTy->getNumElements() *
- TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK);
- VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK);
+ ScalarCost = VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK,
+ Op1VP, Op2VP);
+ VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK,
+ Op1VP, Op2VP);
}
return VecCost - ScalarCost;
}
@@ -1364,6 +1635,68 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
return true;
}
+int BoUpSLP::getSpillCost() {
+ // Walk from the bottom of the tree to the top, tracking which values are
+ // live. When we see a call instruction that is not part of our tree,
+ // query TTI to see if there is a cost to keeping values live over it
+ // (for example, if spills and fills are required).
+ unsigned BundleWidth = VectorizableTree.front().Scalars.size();
+ int Cost = 0;
+
+ SmallPtrSet<Instruction*, 4> LiveValues;
+ Instruction *PrevInst = nullptr;
+
+ for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
+ Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
+ if (!Inst)
+ continue;
+
+ if (!PrevInst) {
+ PrevInst = Inst;
+ continue;
+ }
+
+ DEBUG(
+ dbgs() << "SLP: #LV: " << LiveValues.size();
+ for (auto *X : LiveValues)
+ dbgs() << " " << X->getName();
+ dbgs() << ", Looking at ";
+ Inst->dump();
+ );
+
+ // Update LiveValues.
+ LiveValues.erase(PrevInst);
+ for (auto &J : PrevInst->operands()) {
+ if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
+ LiveValues.insert(cast<Instruction>(&*J));
+ }
+
+ // Now find the sequence of instructions between PrevInst and Inst.
+ BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst);
+ --PrevInstIt;
+ while (InstIt != PrevInstIt) {
+ if (PrevInstIt == PrevInst->getParent()->rend()) {
+ PrevInstIt = Inst->getParent()->rbegin();
+ continue;
+ }
+
+ if (isa<CallInst>(&*PrevInstIt) && &*PrevInstIt != PrevInst) {
+ SmallVector<Type*, 4> V;
+ for (auto *II : LiveValues)
+ V.push_back(VectorType::get(II->getType(), BundleWidth));
+ Cost += TTI->getCostOfKeepingLiveOverCall(V);
+ }
+
+ ++PrevInstIt;
+ }
+
+ PrevInst = Inst;
+ }
+
+ DEBUG(dbgs() << "SLP: SpillCost=" << Cost << "\n");
+ return Cost;
+}
+
int BoUpSLP::getTreeCost() {
int Cost = 0;
DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
@@ -1391,7 +1724,13 @@ int BoUpSLP::getTreeCost() {
for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
I != E; ++I) {
// We only add extract cost once for the same scalar.
- if (!ExtractCostCalculated.insert(I->Scalar))
+ if (!ExtractCostCalculated.insert(I->Scalar).second)
+ continue;
+
+ // Uses by ephemeral values are free (because the ephemeral value will be
+ // removed prior to code generation, and so the extraction will be
+ // removed as well).
+ if (EphValues.count(I->User))
continue;
VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
@@ -1399,6 +1738,8 @@ int BoUpSLP::getTreeCost() {
I->Lane);
}
+ Cost += getSpillCost();
+
DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n");
return Cost + ExtractCost;
}
@@ -1420,14 +1761,6 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
return getGatherCost(VecTy);
}
-AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return AA->getLocation(SI);
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return AA->getLocation(LI);
- return AliasAnalysis::Location();
-}
-
Value *BoUpSLP::getPointerOperand(Value *I) {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->getPointerOperand();
@@ -1485,59 +1818,9 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
return X == PtrSCEVB;
}
-Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) {
- assert(Src->getParent() == Dst->getParent() && "Not the same BB");
- BasicBlock::iterator I = Src, E = Dst;
- /// Scan all of the instruction from SRC to DST and check if
- /// the source may alias.
- for (++I; I != E; ++I) {
- // Ignore store instructions that are marked as 'ignore'.
- if (MemBarrierIgnoreList.count(I))
- continue;
- if (Src->mayWriteToMemory()) /* Write */ {
- if (!I->mayReadOrWriteMemory())
- continue;
- } else /* Read */ {
- if (!I->mayWriteToMemory())
- continue;
- }
- AliasAnalysis::Location A = getLocation(&*I);
- AliasAnalysis::Location B = getLocation(Src);
-
- if (!A.Ptr || !B.Ptr || AA->alias(A, B))
- return I;
- }
- return nullptr;
-}
-
-int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) {
- BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
- assert(BB == getSameBlock(VL) && "Invalid block");
- BlockNumbering &BN = getBlockNumbering(BB);
-
- int MaxIdx = BN.getIndex(BB->getFirstNonPHI());
- for (unsigned i = 0, e = VL.size(); i < e; ++i)
- MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
- return MaxIdx;
-}
-
-Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) {
- BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
- assert(BB == getSameBlock(VL) && "Invalid block");
- BlockNumbering &BN = getBlockNumbering(BB);
-
- int MaxIdx = BN.getIndex(cast<Instruction>(VL[0]));
- for (unsigned i = 1, e = VL.size(); i < e; ++i)
- MaxIdx = std::max(MaxIdx, BN.getIndex(cast<Instruction>(VL[i])));
- Instruction *I = BN.getInstruction(MaxIdx);
- assert(I && "bad location");
- return I;
-}
-
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
Instruction *VL0 = cast<Instruction>(VL[0]);
- Instruction *LastInst = getLastInstruction(VL);
- BasicBlock::iterator NextInst = LastInst;
+ BasicBlock::iterator NextInst = VL0;
++NextInst;
Builder.SetInsertPoint(VL0->getParent(), NextInst);
Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
@@ -1620,6 +1903,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E->Scalars);
return Gather(E->Scalars, VecTy);
}
+
unsigned Opcode = getSameOpcode(E->Scalars);
switch (Opcode) {
@@ -1638,7 +1922,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ValueList Operands;
BasicBlock *IBB = PH->getIncomingBlock(i);
- if (!VisitedBBs.insert(IBB)) {
+ if (!VisitedBBs.insert(IBB).second) {
NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
continue;
}
@@ -1693,6 +1977,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CastInst *CI = dyn_cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
E->VectorizedValue = V;
+ ++NumVectorInstructions;
return V;
}
case Instruction::FCmp:
@@ -1719,6 +2004,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
V = Builder.CreateICmp(P0, L, R);
E->VectorizedValue = V;
+ ++NumVectorInstructions;
return V;
}
case Instruction::Select: {
@@ -1740,6 +2026,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateSelect(Cond, True, False);
E->VectorizedValue = V;
+ ++NumVectorInstructions;
return V;
}
case Instruction::Add:
@@ -1784,6 +2071,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
E->VectorizedValue = V;
+ propagateIRFlags(E->VectorizedValue, E->Scalars);
+ ++NumVectorInstructions;
if (Instruction *I = dyn_cast<Instruction>(V))
return propagateMetadata(I, E->Scalars);
@@ -1796,16 +2085,26 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E->Scalars);
LoadInst *LI = cast<LoadInst>(VL0);
+ Type *ScalarLoadTy = LI->getType();
unsigned AS = LI->getPointerAddressSpace();
Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
VecTy->getPointerTo(AS));
+
+ // The pointer operand uses an in-tree scalar so we add the new BitCast to
+ // ExternalUses list to make sure that an extract will be generated in the
+ // future.
+ if (ScalarToTreeEntry.count(LI->getPointerOperand()))
+ ExternalUses.push_back(
+ ExternalUser(LI->getPointerOperand(), cast<User>(VecPtr), 0));
+
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
if (!Alignment)
- Alignment = DL->getABITypeAlignment(LI->getPointerOperand()->getType());
+ Alignment = DL->getABITypeAlignment(ScalarLoadTy);
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
+ ++NumVectorInstructions;
return propagateMetadata(LI, E->Scalars);
}
case Instruction::Store: {
@@ -1823,10 +2122,19 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
VecTy->getPointerTo(AS));
StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
+
+ // The pointer operand uses an in-tree scalar so we add the new BitCast to
+ // ExternalUses list to make sure that an extract will be generated in the
+ // future.
+ if (ScalarToTreeEntry.count(SI->getPointerOperand()))
+ ExternalUses.push_back(
+ ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
+
if (!Alignment)
- Alignment = DL->getABITypeAlignment(SI->getPointerOperand()->getType());
+ Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
S->setAlignment(Alignment);
E->VectorizedValue = S;
+ ++NumVectorInstructions;
return propagateMetadata(S, E->Scalars);
}
case Instruction::GetElementPtr: {
@@ -1851,6 +2159,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateGEP(Op0, OpVecs);
E->VectorizedValue = V;
+ ++NumVectorInstructions;
if (Instruction *I = dyn_cast<Instruction>(V))
return propagateMetadata(I, E->Scalars);
@@ -1862,6 +2171,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E->Scalars);
Function *FI;
Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ Value *ScalarArg = nullptr;
if (CI && (FI = CI->getCalledFunction())) {
IID = (Intrinsic::ID) FI->getIntrinsicID();
}
@@ -1872,6 +2182,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// a scalar. This argument should not be vectorized.
if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
CallInst *CEI = cast<CallInst>(E->Scalars[0]);
+ ScalarArg = CEI->getArgOperand(j);
OpVecs.push_back(CEI->getArgOperand(j));
continue;
}
@@ -1890,7 +2201,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
Value *V = Builder.CreateCall(CF, OpVecs);
+
+ // The scalar argument uses an in-tree scalar so we add the new vectorized
+ // call to ExternalUses list to make sure that an extract will be
+ // generated in the future.
+ if (ScalarArg && ScalarToTreeEntry.count(ScalarArg))
+ ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
+
E->VectorizedValue = V;
+ ++NumVectorInstructions;
return V;
}
case Instruction::ShuffleVector: {
@@ -1916,21 +2235,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1);
Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS);
- // Create appropriate shuffle to take alternative operations from
- // the vector.
- std::vector<Constant *> Mask(E->Scalars.size());
+ // Create shuffle to take alternate operations from the vector.
+ // Also, gather up odd and even scalar ops to propagate IR flags to
+ // each vector operation.
+ ValueList OddScalars, EvenScalars;
unsigned e = E->Scalars.size();
+ SmallVector<Constant *, 8> Mask(e);
for (unsigned i = 0; i < e; ++i) {
- if (i & 1)
+ if (i & 1) {
Mask[i] = Builder.getInt32(e + i);
- else
+ OddScalars.push_back(E->Scalars[i]);
+ } else {
Mask[i] = Builder.getInt32(i);
+ EvenScalars.push_back(E->Scalars[i]);
+ }
}
Value *ShuffleMask = ConstantVector::get(Mask);
+ propagateIRFlags(V0, EvenScalars);
+ propagateIRFlags(V1, OddScalars);
Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
E->VectorizedValue = V;
+ ++NumVectorInstructions;
if (Instruction *I = dyn_cast<Instruction>(V))
return propagateMetadata(I, E->Scalars);
@@ -1943,6 +2270,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *BoUpSLP::vectorizeTree() {
+
+ // All blocks must be scheduled before any instructions are inserted.
+ for (auto &BSIter : BlocksSchedules) {
+ scheduleBlock(BSIter.second.get());
+ }
+
Builder.SetInsertPoint(F->getEntryBlock().begin());
vectorizeTree(&VectorizableTree[0]);
@@ -2031,9 +2364,6 @@ Value *BoUpSLP::vectorizeTree() {
}
}
- for (auto &BN : BlocksNumbers)
- BN.second.forget();
-
Builder.ClearInsertionPoint();
return VectorizableTree[0].VectorizedValue;
@@ -2127,6 +2457,363 @@ void BoUpSLP::optimizeGatherSequence() {
GatherSeq.clear();
}
+// Groups the instructions to a bundle (which is then a single scheduling entity)
+// and schedules instructions until the bundle gets ready.
+bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
+ AliasAnalysis *AA) {
+ if (isa<PHINode>(VL[0]))
+ return true;
+
+ // Initialize the instruction bundle.
+ Instruction *OldScheduleEnd = ScheduleEnd;
+ ScheduleData *PrevInBundle = nullptr;
+ ScheduleData *Bundle = nullptr;
+ bool ReSchedule = false;
+ DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n");
+ for (Value *V : VL) {
+ extendSchedulingRegion(V);
+ ScheduleData *BundleMember = getScheduleData(V);
+ assert(BundleMember &&
+ "no ScheduleData for bundle member (maybe not in same basic block)");
+ if (BundleMember->IsScheduled) {
+ // A bundle member was scheduled as single instruction before and now
+ // needs to be scheduled as part of the bundle. We just get rid of the
+ // existing schedule.
+ DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember
+ << " was already scheduled\n");
+ ReSchedule = true;
+ }
+ assert(BundleMember->isSchedulingEntity() &&
+ "bundle member already part of other bundle");
+ if (PrevInBundle) {
+ PrevInBundle->NextInBundle = BundleMember;
+ } else {
+ Bundle = BundleMember;
+ }
+ BundleMember->UnscheduledDepsInBundle = 0;
+ Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
+
+ // Group the instructions to a bundle.
+ BundleMember->FirstInBundle = Bundle;
+ PrevInBundle = BundleMember;
+ }
+ if (ScheduleEnd != OldScheduleEnd) {
+ // The scheduling region got new instructions at the lower end (or it is a
+ // new region for the first bundle). This makes it necessary to
+ // recalculate all dependencies.
+ // It is seldom that this needs to be done a second time after adding the
+ // initial bundle to the region.
+ for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
+ ScheduleData *SD = getScheduleData(I);
+ SD->clearDependencies();
+ }
+ ReSchedule = true;
+ }
+ if (ReSchedule) {
+ resetSchedule();
+ initialFillReadyList(ReadyInsts);
+ }
+
+ DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block "
+ << BB->getName() << "\n");
+
+ calculateDependencies(Bundle, true, AA);
+
+ // Now try to schedule the new bundle. As soon as the bundle is "ready" it
+ // means that there are no cyclic dependencies and we can schedule it.
+ // Note that's important that we don't "schedule" the bundle yet (see
+ // cancelScheduling).
+ while (!Bundle->isReady() && !ReadyInsts.empty()) {
+
+ ScheduleData *pickedSD = ReadyInsts.back();
+ ReadyInsts.pop_back();
+
+ if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) {
+ schedule(pickedSD, ReadyInsts);
+ }
+ }
+ return Bundle->isReady();
+}
+
+void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
+ if (isa<PHINode>(VL[0]))
+ return;
+
+ ScheduleData *Bundle = getScheduleData(VL[0]);
+ DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
+ assert(!Bundle->IsScheduled &&
+ "Can't cancel bundle which is already scheduled");
+ assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
+ "tried to unbundle something which is not a bundle");
+
+ // Un-bundle: make single instructions out of the bundle.
+ ScheduleData *BundleMember = Bundle;
+ while (BundleMember) {
+ assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links");
+ BundleMember->FirstInBundle = BundleMember;
+ ScheduleData *Next = BundleMember->NextInBundle;
+ BundleMember->NextInBundle = nullptr;
+ BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps;
+ if (BundleMember->UnscheduledDepsInBundle == 0) {
+ ReadyInsts.insert(BundleMember);
+ }
+ BundleMember = Next;
+ }
+}
+
+void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
+ if (getScheduleData(V))
+ return;
+ Instruction *I = dyn_cast<Instruction>(V);
+ assert(I && "bundle member must be an instruction");
+ assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
+ if (!ScheduleStart) {
+ // It's the first instruction in the new region.
+ initScheduleData(I, I->getNextNode(), nullptr, nullptr);
+ ScheduleStart = I;
+ ScheduleEnd = I->getNextNode();
+ assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+ DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
+ return;
+ }
+ // Search up and down at the same time, because we don't know if the new
+ // instruction is above or below the existing scheduling region.
+ BasicBlock::reverse_iterator UpIter(ScheduleStart);
+ BasicBlock::reverse_iterator UpperEnd = BB->rend();
+ BasicBlock::iterator DownIter(ScheduleEnd);
+ BasicBlock::iterator LowerEnd = BB->end();
+ for (;;) {
+ if (UpIter != UpperEnd) {
+ if (&*UpIter == I) {
+ initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
+ ScheduleStart = I;
+ DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n");
+ return;
+ }
+ UpIter++;
+ }
+ if (DownIter != LowerEnd) {
+ if (&*DownIter == I) {
+ initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
+ nullptr);
+ ScheduleEnd = I->getNextNode();
+ assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+ DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
+ return;
+ }
+ DownIter++;
+ }
+ assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
+ "instruction not found in block");
+ }
+}
+
+void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
+ Instruction *ToI,
+ ScheduleData *PrevLoadStore,
+ ScheduleData *NextLoadStore) {
+ ScheduleData *CurrentLoadStore = PrevLoadStore;
+ for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
+ ScheduleData *SD = ScheduleDataMap[I];
+ if (!SD) {
+ // Allocate a new ScheduleData for the instruction.
+ if (ChunkPos >= ChunkSize) {
+ ScheduleDataChunks.push_back(
+ llvm::make_unique<ScheduleData[]>(ChunkSize));
+ ChunkPos = 0;
+ }
+ SD = &(ScheduleDataChunks.back()[ChunkPos++]);
+ ScheduleDataMap[I] = SD;
+ SD->Inst = I;
+ }
+ assert(!isInSchedulingRegion(SD) &&
+ "new ScheduleData already in scheduling region");
+ SD->init(SchedulingRegionID);
+
+ if (I->mayReadOrWriteMemory()) {
+ // Update the linked list of memory accessing instructions.
+ if (CurrentLoadStore) {
+ CurrentLoadStore->NextLoadStore = SD;
+ } else {
+ FirstLoadStoreInRegion = SD;
+ }
+ CurrentLoadStore = SD;
+ }
+ }
+ if (NextLoadStore) {
+ if (CurrentLoadStore)
+ CurrentLoadStore->NextLoadStore = NextLoadStore;
+ } else {
+ LastLoadStoreInRegion = CurrentLoadStore;
+ }
+}
+
+/// \returns the AA location that is being access by the instruction.
+static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return AA->getLocation(SI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return AA->getLocation(LI);
+ return AliasAnalysis::Location();
+}
+
+void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
+ bool InsertInReadyList,
+ AliasAnalysis *AA) {
+ assert(SD->isSchedulingEntity());
+
+ SmallVector<ScheduleData *, 10> WorkList;
+ WorkList.push_back(SD);
+
+ while (!WorkList.empty()) {
+ ScheduleData *SD = WorkList.back();
+ WorkList.pop_back();
+
+ ScheduleData *BundleMember = SD;
+ while (BundleMember) {
+ assert(isInSchedulingRegion(BundleMember));
+ if (!BundleMember->hasValidDependencies()) {
+
+ DEBUG(dbgs() << "SLP: update deps of " << *BundleMember << "\n");
+ BundleMember->Dependencies = 0;
+ BundleMember->resetUnscheduledDeps();
+
+ // Handle def-use chain dependencies.
+ for (User *U : BundleMember->Inst->users()) {
+ if (isa<Instruction>(U)) {
+ ScheduleData *UseSD = getScheduleData(U);
+ if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
+ BundleMember->Dependencies++;
+ ScheduleData *DestBundle = UseSD->FirstInBundle;
+ if (!DestBundle->IsScheduled) {
+ BundleMember->incrementUnscheduledDeps(1);
+ }
+ if (!DestBundle->hasValidDependencies()) {
+ WorkList.push_back(DestBundle);
+ }
+ }
+ } else {
+ // I'm not sure if this can ever happen. But we need to be safe.
+ // This lets the instruction/bundle never be scheduled and eventally
+ // disable vectorization.
+ BundleMember->Dependencies++;
+ BundleMember->incrementUnscheduledDeps(1);
+ }
+ }
+
+ // Handle the memory dependencies.
+ ScheduleData *DepDest = BundleMember->NextLoadStore;
+ if (DepDest) {
+ AliasAnalysis::Location SrcLoc = getLocation(BundleMember->Inst, AA);
+ bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
+
+ while (DepDest) {
+ assert(isInSchedulingRegion(DepDest));
+ if (SrcMayWrite || DepDest->Inst->mayWriteToMemory()) {
+ AliasAnalysis::Location DstLoc = getLocation(DepDest->Inst, AA);
+ if (!SrcLoc.Ptr || !DstLoc.Ptr || AA->alias(SrcLoc, DstLoc)) {
+ DepDest->MemoryDependencies.push_back(BundleMember);
+ BundleMember->Dependencies++;
+ ScheduleData *DestBundle = DepDest->FirstInBundle;
+ if (!DestBundle->IsScheduled) {
+ BundleMember->incrementUnscheduledDeps(1);
+ }
+ if (!DestBundle->hasValidDependencies()) {
+ WorkList.push_back(DestBundle);
+ }
+ }
+ }
+ DepDest = DepDest->NextLoadStore;
+ }
+ }
+ }
+ BundleMember = BundleMember->NextInBundle;
+ }
+ if (InsertInReadyList && SD->isReady()) {
+ ReadyInsts.push_back(SD);
+ DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst << "\n");
+ }
+ }
+}
+
+void BoUpSLP::BlockScheduling::resetSchedule() {
+ assert(ScheduleStart &&
+ "tried to reset schedule on block which has not been scheduled");
+ for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
+ ScheduleData *SD = getScheduleData(I);
+ assert(isInSchedulingRegion(SD));
+ SD->IsScheduled = false;
+ SD->resetUnscheduledDeps();
+ }
+ ReadyInsts.clear();
+}
+
+void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
+
+ if (!BS->ScheduleStart)
+ return;
+
+ DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
+
+ BS->resetSchedule();
+
+ // For the real scheduling we use a more sophisticated ready-list: it is
+ // sorted by the original instruction location. This lets the final schedule
+ // be as close as possible to the original instruction order.
+ struct ScheduleDataCompare {
+ bool operator()(ScheduleData *SD1, ScheduleData *SD2) {
+ return SD2->SchedulingPriority < SD1->SchedulingPriority;
+ }
+ };
+ std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
+
+ // Ensure that all depencency data is updated and fill the ready-list with
+ // initial instructions.
+ int Idx = 0;
+ int NumToSchedule = 0;
+ for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
+ I = I->getNextNode()) {
+ ScheduleData *SD = BS->getScheduleData(I);
+ assert(
+ SD->isPartOfBundle() == (ScalarToTreeEntry.count(SD->Inst) != 0) &&
+ "scheduler and vectorizer have different opinion on what is a bundle");
+ SD->FirstInBundle->SchedulingPriority = Idx++;
+ if (SD->isSchedulingEntity()) {
+ BS->calculateDependencies(SD, false, AA);
+ NumToSchedule++;
+ }
+ }
+ BS->initialFillReadyList(ReadyInsts);
+
+ Instruction *LastScheduledInst = BS->ScheduleEnd;
+
+ // Do the "real" scheduling.
+ while (!ReadyInsts.empty()) {
+ ScheduleData *picked = *ReadyInsts.begin();
+ ReadyInsts.erase(ReadyInsts.begin());
+
+ // Move the scheduled instruction(s) to their dedicated places, if not
+ // there yet.
+ ScheduleData *BundleMember = picked;
+ while (BundleMember) {
+ Instruction *pickedInst = BundleMember->Inst;
+ if (LastScheduledInst->getNextNode() != pickedInst) {
+ BS->BB->getInstList().remove(pickedInst);
+ BS->BB->getInstList().insert(LastScheduledInst, pickedInst);
+ }
+ LastScheduledInst = pickedInst;
+ BundleMember = BundleMember->NextInBundle;
+ }
+
+ BS->schedule(picked, ReadyInsts);
+ NumToSchedule--;
+ }
+ assert(NumToSchedule == 0 && "could not schedule all instructions");
+
+ // Avoid duplicate scheduling of the block.
+ BS->ScheduleStart = nullptr;
+}
+
/// The SLPVectorizer Pass.
struct SLPVectorizer : public FunctionPass {
typedef SmallVector<StoreInst *, 8> StoreList;
@@ -2146,6 +2833,7 @@ struct SLPVectorizer : public FunctionPass {
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
+ AssumptionTracker *AT;
bool runOnFunction(Function &F) override {
if (skipOptnoneFunction(F))
@@ -2159,6 +2847,7 @@ struct SLPVectorizer : public FunctionPass {
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AT = &getAnalysis<AssumptionTracker>();
StoreRefs.clear();
bool Changed = false;
@@ -2181,7 +2870,7 @@ struct SLPVectorizer : public FunctionPass {
// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.
- BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT);
+ BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AT);
// Scan the blocks in the function in post order.
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
@@ -2208,6 +2897,7 @@ struct SLPVectorizer : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetTransformInfo>();
@@ -2234,7 +2924,8 @@ private:
/// scheduling and that don't need extracting.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- ArrayRef<Value *> BuildVector = None);
+ ArrayRef<Value *> BuildVector = None,
+ bool allowReorder = false);
/// \brief Try to vectorize a chain that may start at the operands of \V;
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
@@ -2404,11 +3095,12 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
Value *VL[] = { A, B };
- return tryToVectorizeList(VL, R);
+ return tryToVectorizeList(VL, R, None, true);
}
bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- ArrayRef<Value *> BuildVector) {
+ ArrayRef<Value *> BuildVector,
+ bool allowReorder) {
if (VL.size() < 2)
return false;
@@ -2463,6 +3155,14 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
BuildVectorSlice = BuildVector.slice(i, OpsWidth);
R.buildTree(Ops, BuildVectorSlice);
+ // TODO: check if we can allow reordering also for other cases than
+ // tryToVectorizePair()
+ if (allowReorder && R.shouldReorder()) {
+ assert(Ops.size() == 2);
+ assert(BuildVectorSlice.empty());
+ Value *ReorderedOps[] = { Ops[1], Ops[0] };
+ R.buildTree(ReorderedOps, None);
+ }
int Cost = R.getTreeCost();
if (Cost < -SLPCostThreshold) {
@@ -2514,11 +3214,9 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
if (tryToVectorizePair(A, B0, R)) {
- B->moveBefore(V);
return true;
}
if (tryToVectorizePair(A, B1, R)) {
- B->moveBefore(V);
return true;
}
}
@@ -2528,11 +3226,9 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
if (tryToVectorizePair(A0, B, R)) {
- A->moveBefore(V);
return true;
}
if (tryToVectorizePair(A1, B, R)) {
- A->moveBefore(V);
return true;
}
}
@@ -2728,8 +3424,7 @@ public:
unsigned i = 0;
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
- ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
- V.buildTree(ValsToReduce, ReductionOps);
+ V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps);
// Estimate cost.
int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
@@ -2921,8 +3616,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to vectorize them.
unsigned NumElts = (SameTypeIt - IncIt);
DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
- if (NumElts > 1 &&
- tryToVectorizeList(ArrayRef<Value *>(IncIt, NumElts), R)) {
+ if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) {
// Success start over because instructions might have been changed.
HaveVectorizedPhiNodes = true;
Changed = true;
@@ -2938,7 +3632,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
// We may go through BB multiple times so skip the one we have checked.
- if (!VisitedInstrs.insert(it))
+ if (!VisitedInstrs.insert(it).second)
continue;
if (isa<DbgInfoIntrinsic>(it))
@@ -3002,6 +3696,21 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
}
+ // Try to vectorize horizontal reductions feeding into a return.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
+ if (RI->getNumOperands() != 0)
+ if (BinaryOperator *BinOp =
+ dyn_cast<BinaryOperator>(RI->getOperand(0))) {
+ DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
+ if (tryToVectorizePair(BinOp->getOperand(0),
+ BinOp->getOperand(1), R)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+ }
+
// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
@@ -3014,15 +3723,15 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
for (int i = 0; i < 2; ++i) {
- if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
- if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
- Changed = true;
- // We would like to start over since some instructions are deleted
- // and the iterator may become invalid value.
- it = BB->begin();
- e = BB->end();
- }
- }
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
+ if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ }
+ }
}
continue;
}
@@ -3064,8 +3773,8 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
// Process the stores in chunks of 16.
for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
unsigned Len = std::min<unsigned>(CE - CI, 16);
- ArrayRef<StoreInst *> Chunk(&it->second[CI], Len);
- Changed |= vectorizeStores(Chunk, -SLPCostThreshold, R);
+ Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len),
+ -SLPCostThreshold, R);
}
}
return Changed;
@@ -3078,6 +3787,7 @@ static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)